From 2407e2b215075d490007082f2958609037d8eb72 Mon Sep 17 00:00:00 2001
From: brandonkachen <brandonchenjiacheng@gmail.com>
Date: Wed, 24 Sep 2025 22:06:04 -0700
Subject: [PATCH 01/10] feat: added logger in handleSteps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with Codebuff Co-Authored-By: Codebuff
<noreply@codebuff.com>
---
 .agents/types/agent-definition.ts          |  9 ++-
 backend/src/run-programmatic-step.ts       | 93 ++++++++++++++++++++--
 backend/src/util/quickjs-sandbox.ts        | 67 +++++++++++++++-
 common/src/actions.ts                      |  8 ++
 common/src/types/dynamic-agent-template.ts | 21 +++++
 npm-app/src/client.ts                      | 40 ++++++++++
 6 files changed, 230 insertions(+), 8 deletions(-)

diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts
index 5d0596d087..835725c251 100644
--- a/.agents/types/agent-definition.ts
+++ b/.agents/types/agent-definition.ts
@@ -179,7 +179,7 @@ export interface AgentDefinition {
    * }
    * }
    */
-  handleSteps?: (context: AgentStepContext) => Generator<
+  handleSteps?: (context: AgentStepContext, logger?: Logger) => Generator<
     ToolCall | 'STEP' | 'STEP_ALL',
     void,
     {
@@ -194,6 +194,13 @@ export interface AgentDefinition {
 // Supporting Types
 // ============================================================================
 
+export interface Logger {
+  debug: (data: any, msg?: string) => void
+  info: (data: any, msg?: string) => void
+  warn: (data: any, msg?: string) => void
+  error: (data: any, msg?: string) => void
+}
+
 export interface AgentState {
   agentId: string
   runId: string
diff --git a/backend/src/run-programmatic-step.ts b/backend/src/run-programmatic-step.ts
index ab0ceff243..7fe5b5087f 100644
--- a/backend/src/run-programmatic-step.ts
+++ b/backend/src/run-programmatic-step.ts
@@ -74,6 +74,18 @@ export async function runProgrammaticStep(
     stepNumber: number
   },
 ): Promise<{ agentState: AgentState; endTurn: boolean; stepNumber: number }> {
+  logger.info(
+    {
+      agentType: template.id,
+      runId: agentState.runId,
+      hasHandleSteps: !!template.handleSteps,
+      handleStepsType: typeof template.handleSteps,
+      stepNumber,
+      stepsComplete,
+    },
+    'runProgrammaticStep: Starting programmatic step execution',
+  )
+
   if (!template.handleSteps) {
     throw new Error('No step handler found for agent template ' + template.id)
   }
@@ -86,9 +98,37 @@ export async function runProgrammaticStep(
   let generator = runIdToGenerator[agentState.runId]
   let sandbox = sandboxManager.getSandbox(agentState.runId)
 
-  // Check if we need to initialize a generator (either native or QuickJS-based)
+  // Check if we need to initialize a generator
   if (!generator && !sandbox) {
+    const createLogMethod =
+      (level: 'debug' | 'info' | 'warn' | 'error') =>
+      (data: any, msg?: string) => {
+        logger[level](data, msg) // Log to backend
+        sendAction(ws, {
+          type: 'handlesteps-log-chunk',
+          userInputId,
+          agentId: agentState.agentId,
+          level,
+          data,
+          message: msg,
+        })
+      }
+
+    const streamingLogger = {
+      debug: createLogMethod('debug'),
+      info: createLogMethod('info'),
+      warn: createLogMethod('warn'),
+      error: createLogMethod('error'),
+    }
+
     if (typeof template.handleSteps === 'string') {
+      logger.info(
+        {
+          agentType: template.id,
+          runId: agentState.runId,
+        },
+        'runProgrammaticStep: Initializing QuickJS sandbox for string-based generator',
+      )
       // Initialize QuickJS sandbox for string-based generator
       sandbox = await sandboxManager.getOrCreateSandbox(
         agentState.runId,
@@ -98,15 +138,35 @@ export async function runProgrammaticStep(
           prompt,
           params,
         },
+        undefined, // config
+        streamingLogger, // pass the streaming logger instance
       )
     } else {
+      logger.info(
+        {
+          agentType: template.id,
+          runId: agentState.runId,
+        },
+        'runProgrammaticStep: Initializing native JavaScript generator',
+      )
       // Initialize native generator
-      generator = template.handleSteps({
-        agentState,
-        prompt,
-        params,
-      })
+      generator = (template.handleSteps as any)(
+        {
+          agentState,
+          prompt,
+          params,
+        },
+        streamingLogger,
+      )
       runIdToGenerator[agentState.runId] = generator
+      logger.info(
+        {
+          agentType: template.id,
+          runId: agentState.runId,
+          generatorInitialized: !!generator,
+        },
+        'runProgrammaticStep: Native generator initialized successfully',
+      )
     }
   }
 
@@ -168,6 +228,17 @@ export async function runProgrammaticStep(
       creditsBefore = state.agentState.directCreditsUsed
       childrenBefore = state.agentState.childRunIds.length
 
+      logger.info(
+        {
+          agentType: template.id,
+          runId: agentState.runId,
+          usingSandbox: !!sandbox,
+          usingGenerator: !!generator,
+          stepsComplete,
+        },
+        'runProgrammaticStep: About to execute generator step',
+      )
+
       const result = sandbox
         ? await sandbox.executeStep({
             agentState: getPublicAgentState(state.agentState),
@@ -180,6 +251,16 @@ export async function runProgrammaticStep(
             stepsComplete,
           })
 
+      logger.info(
+        {
+          agentType: template.id,
+          runId: agentState.runId,
+          resultDone: result.done,
+          resultValue: result.value,
+        },
+        'runProgrammaticStep: Generator step executed, got result',
+      )
+
       if (result.done) {
         endTurn = true
         break
diff --git a/backend/src/util/quickjs-sandbox.ts b/backend/src/util/quickjs-sandbox.ts
index 8d0be3eee7..ba97177922 100644
--- a/backend/src/util/quickjs-sandbox.ts
+++ b/backend/src/util/quickjs-sandbox.ts
@@ -52,6 +52,12 @@ export class QuickJSSandbox {
     generatorCode: string,
     initialInput: any,
     config: SandboxConfig = {},
+    logger?: {
+      debug: (data: any, msg?: string) => void
+      info: (data: any, msg?: string) => void
+      warn: (data: any, msg?: string) => void
+      error: (data: any, msg?: string) => void
+    },
   ): Promise<QuickJSSandbox> {
     const {
       memoryLimit = 1024 * 1024 * 20, // 20MB
@@ -80,6 +86,45 @@ export class QuickJSSandbox {
     const context = runtime.newContext()
 
     try {
+      // Set up logger handler
+      const loggerHandler = context.newFunction(
+        '_loggerHandler',
+        (level, data, msg) => {
+          try {
+            const levelStr = context.getString(level)
+            let dataObj: any
+            let msgStr: string | undefined
+
+            try {
+              dataObj = data ? JSON.parse(context.getString(data)) : undefined
+            } catch {
+              dataObj = context.getString(data)
+            }
+
+            msgStr = msg ? context.getString(msg) : undefined
+
+            if (logger) {
+              if (levelStr === 'debug' && logger.debug) {
+                logger.debug(dataObj, msgStr)
+              } else if (levelStr === 'info' && logger.info) {
+                logger.info(dataObj, msgStr)
+              } else if (levelStr === 'warn' && logger.warn) {
+                logger.warn(dataObj, msgStr)
+              } else if (levelStr === 'error' && logger.error) {
+                logger.error(dataObj, msgStr)
+              }
+            }
+          } catch (err) {
+            // Fallback for logging errors
+            if (logger?.error) {
+              logger.error({ error: err }, 'Logger handler error')
+            }
+          }
+        },
+      )
+
+      context.setProp(context.global, '_loggerHandler', loggerHandler)
+      loggerHandler.dispose()
       // Inject safe globals and the generator function
       const setupCode = `
         // Safe console implementation
@@ -89,11 +134,24 @@ export class QuickJSSandbox {
           warn: (...args) => undefined
         };
         
+        // Logger implementation
+        const createLogMethod = (level) => (data, msg) => 
+          globalThis._loggerHandler(level, 
+            typeof data === 'object' ? JSON.stringify(data) : String(data), 
+            msg ? String(msg) : undefined);
+        
+        const logger = {
+          debug: createLogMethod('debug'),
+          info: createLogMethod('info'),
+          warn: createLogMethod('warn'),
+          error: createLogMethod('error')
+        };
+        
         // Agent function
         const handleSteps = ${generatorCode};
         
         // Create generator instance
-        let generator = handleSteps(${JSON.stringify(initialInput)});
+        let generator = handleSteps(${JSON.stringify(initialInput)}, logger);
         
         // Generator management
         globalThis._generator = generator;
@@ -213,6 +271,12 @@ export class SandboxManager {
     generatorCode: string,
     initialInput: any,
     config?: SandboxConfig,
+    logger?: {
+      debug: (data: any, msg?: string) => void
+      info: (data: any, msg?: string) => void
+      warn: (data: any, msg?: string) => void
+      error: (data: any, msg?: string) => void
+    },
   ): Promise<QuickJSSandbox> {
     const existing = this.sandboxes.get(runId)
     if (existing && existing.isInitialized()) {
@@ -229,6 +293,7 @@ export class SandboxManager {
       generatorCode,
       initialInput,
       config,
+      logger,
     )
     this.sandboxes.set(runId, sandbox)
     return sandbox
diff --git a/common/src/actions.ts b/common/src/actions.ts
index 1ccb7b373c..9bd13f1d83 100644
--- a/common/src/actions.ts
+++ b/common/src/actions.ts
@@ -130,6 +130,14 @@ export const SERVER_ACTION_SCHEMA = z.discriminatedUnion('type', [
     chunk: z.string(),
     prompt: z.string().optional(),
   }),
+  z.object({
+    type: z.literal('handlesteps-log-chunk'),
+    userInputId: z.string(),
+    agentId: z.string(),
+    level: z.enum(['debug', 'info', 'warn', 'error']),
+    data: z.any(),
+    message: z.string().optional(),
+  }),
   PromptResponseSchema,
   z.object({
     type: z.literal('read-files'),
diff --git a/common/src/types/dynamic-agent-template.ts b/common/src/types/dynamic-agent-template.ts
index 928849bdfe..dc00ad403a 100644
--- a/common/src/types/dynamic-agent-template.ts
+++ b/common/src/types/dynamic-agent-template.ts
@@ -68,6 +68,26 @@ export type PromptField = z.infer<typeof PromptFieldSchema>
 
 const functionSchema = <T extends z.core.$ZodFunction>(schema: T) =>
   z.custom<Parameters<T['implement']>[0]>((fn: any) => schema.implement(fn))
+// Schema for the Logger interface
+const LoggerSchema = z.object({
+  debug: z.function({
+    input: [z.any(), z.string().optional()],
+    output: z.void(),
+  }),
+  info: z.function({
+    input: [z.any(), z.string().optional()],
+    output: z.void(),
+  }),
+  warn: z.function({
+    input: [z.any(), z.string().optional()],
+    output: z.void(),
+  }),
+  error: z.function({
+    input: [z.any(), z.string().optional()],
+    output: z.void(),
+  }),
+})
+
 // Schema for validating handleSteps function signature
 const HandleStepsSchema = functionSchema(
   z.function({
@@ -81,6 +101,7 @@ const HandleStepsSchema = functionSchema(
         prompt: z.string().optional(),
         params: z.any().optional(),
       }),
+      LoggerSchema.optional(),
     ],
     output: z.any(),
   }),
diff --git a/npm-app/src/client.ts b/npm-app/src/client.ts
index 574471e446..3694f82f88 100644
--- a/npm-app/src/client.ts
+++ b/npm-app/src/client.ts
@@ -951,6 +951,46 @@ export class Client {
       // Refresh display if we're currently viewing this agent
       refreshSubagentDisplay(agentId)
     })
+
+    // Handle handleSteps log streaming
+    this.webSocket.subscribe('handlesteps-log-chunk', (action) => {
+      const { agentId, level, data, message } = action
+
+      // Format the log message for display
+      const formattedMessage = this.formatLogMessage(level, data, message)
+
+      // Display the log message immediately
+      if (formattedMessage) {
+        process.stdout.write(formattedMessage + '\n')
+      }
+    })
+  }
+
+  private formatLogMessage(level: string, data: any, message?: string): string {
+    const timestamp = new Date().toISOString().substring(11, 23) // HH:MM:SS.mmm
+    const levelColors = { debug: blue, info: green, warn: yellow, error: red }
+    const levelColor =
+      levelColors[level as keyof typeof levelColors] || ((s: string) => s)
+
+    const timeTag = `[${timestamp}]`
+    const levelTag = levelColor(`[${level.toUpperCase()}]`)
+    const dataStr = this.serializeLogData(data)
+
+    return [timeTag, levelTag, message, dataStr].filter(Boolean).join(' ')
+  }
+
+  private serializeLogData(data: any): string {
+    if (data === undefined || data === null) return ''
+
+    if (typeof data === 'object') {
+      try {
+        return JSON.stringify(data, null, 2)
+      } catch {
+        return String(data)
+      }
+    }
+
+    return String(data)
   }
 
   private showUsageWarning() {

From 637cce4e2020b756bbd3f39251cf31817f1830ae Mon Sep 17 00:00:00 2001
From: brandonkachen <brandonchenjiacheng@gmail.com>
Date: Wed, 24 Sep 2025 22:56:11 -0700
Subject: [PATCH 02/10] fix: logger in handleSteps is never undefined

---
 .agents/README.md                        | 239 ++++++++++++++++++++++-
 .agents/__tests__/context-pruner.test.ts |  16 +-
 .agents/types/agent-definition.ts        |  22 ++-
 common/src/types/agent-template.ts       |  12 +-
 4 files changed, 277 insertions(+), 12 deletions(-)

diff --git a/.agents/README.md b/.agents/README.md
index 2f323f4d76..11f4290df6 100644
--- a/.agents/README.md
+++ b/.agents/README.md
@@ -10,9 +10,26 @@ Create specialized agent workflows that coordinate multiple AI agents to tackle
 
 ## Need Help?
 
-- For detailed documentation, see [agent-guide.md](./agent-guide.md).
 - For examples, check the `examples/` directory.
 - Join our [Discord community](https://codebuff.com/discord) and ask your questions!
+- Check our [documentation](https://codebuff.com/docs) for more details
+
+# What is Codebuff?
+
+Codebuff is an **open-source AI coding assistant** that edits your codebase through natural language instructions. Instead of using one model for everything, it coordinates specialized agents that work together to understand your project and make precise changes.
+
+Codebuff beats Claude Code at 61% vs 53% on [our evals](https://github.com/CodebuffAI/codebuff/tree/main/evals) across 175+ coding tasks over multiple open-source repos that simulate real-world tasks.
+
+## How Codebuff Works
+
+When you ask Codebuff to "add authentication to my API," it might invoke:
+
+1. A **File Explorer Agent** to scan your codebase to understand the architecture and find relevant files
+2. A **Planner Agent** to plan which files need changes and in what order
+3. An **Editor Agent** to make precise edits
+4. A **Reviewer Agent** to validate changes
+
+This multi-agent approach gives you better context understanding, more accurate edits, and fewer errors compared to single-model tools.
 
 ## Context Window Management
 
@@ -54,3 +71,223 @@ export default {
 ```
 
 This agent systematically analyzes changes, reads relevant files for context, then creates commits with clear, meaningful messages that explain the "why" behind changes.
+
+# Agent Development Guide
+
+This guide covers everything you need to know about building custom Codebuff agents.
+
+## Agent Structure
+
+Each agent is a TypeScript file that exports an `AgentDefinition` object:
+
+```typescript
+export default {
+  id: 'my-agent', // Unique identifier (lowercase, hyphens only)
+  displayName: 'My Agent', // Human-readable name
+  model: 'claude-3-5-sonnet', // AI model to use
+  toolNames: ['read_files', 'write_file'], // Available tools
+  instructionsPrompt: 'You are...', // Agent behavior instructions
+  spawnerPrompt: 'Use this agent when...', // When others should spawn this
+  spawnableAgents: ['helper-agent'], // Agents this can spawn
+
+  // Optional: Programmatic control
+  async *handleSteps() {
+    yield { tool: 'read_files', paths: ['src/config.ts'] }
+    yield 'STEP' // Let AI process and respond
+  },
+}
+```
+
+## Core Properties
+
+### Required Fields
+
+- **`id`**: Unique identifier using lowercase letters and hyphens only
+- **`displayName`**: Human-readable name shown in UI
+- **`model`**: AI model from OpenRouter (see [available models](https://openrouter.ai/models))
+- **`instructionsPrompt`**: Detailed instructions defining the agent's role and behavior
+
+### Optional Fields
+
+- **`toolNames`**: Array of tools the agent can use (defaults to common tools)
+- **`spawnerPrompt`**: Instructions for when other agents should spawn this one
+- **`spawnableAgents`**: Array of agent names this agent can spawn
+- **`handleSteps`**: Generator function for programmatic control
+
+## Available Tools
+
+### File Operations
+
+- **`read_files`**: Read file contents
+- **`write_file`**: Create or modify entire files
+- **`str_replace`**: Make targeted string replacements
+- **`code_search`**: Search for patterns across the codebase
+
+### Execution
+
+- **`run_terminal_command`**: Execute shell commands
+- **`spawn_agents`**: Delegate tasks to other agents
+- **`end_turn`**: Finish the agent's response
+
+### Web & Research
+
+- **`web_search`**: Search the internet for information
+- **`read_docs`**: Read technical documentation
+- **`browser_logs`**: Navigate and inspect web pages
+
+See `types/tools.ts` for detailed parameter information.
+
+## Programmatic Control
+
+Use the `handleSteps` generator function to mix AI reasoning with programmatic logic:
+
+```typescript
+async *handleSteps() {
+  // Execute a tool
+  yield { tool: 'read_files', paths: ['package.json'] }
+
+  // Let AI process results and respond
+  yield 'STEP'
+
+  // Conditional logic
+  if (needsMoreAnalysis) {
+    yield { tool: 'spawn_agents', agents: ['deep-analyzer'] }
+    yield 'STEP_ALL' // Wait for spawned agents to complete
+  }
+
+  // Final AI response
+  yield 'STEP'
+}
+```
+
+### Control Commands
+
+- **`'STEP'`**: Let AI process and respond once
+- **`'STEP_ALL'`**: Let AI continue until completion
+- **Tool calls**: `{ tool: 'tool_name', ...params }`
+
+## Model Selection
+
+Choose models based on your agent's needs:
+
+- **`anthropic/claude-sonnet-4`**: Best for complex reasoning and code generation
+- **`openai/gpt-5`**: Strong general-purpose capabilities
+- **`x-ai/grok-4-fast`**: Fast and cost-effective for simple or medium-complexity tasks
+
+**Any model on OpenRouter**: Unlike Claude Code which locks you into Anthropic's models, Codebuff supports any model available on [OpenRouter](https://openrouter.ai/models) - from Claude and GPT to specialized models like Qwen, DeepSeek, and others. Switch models for different tasks or use the latest releases without waiting for platform updates.
+
+See [OpenRouter](https://openrouter.ai/models) for all available models and pricing.
+
+## Agent Coordination
+
+Agents can spawn other agents to create sophisticated workflows:
+
+```typescript
+// Parent agent spawns specialists
+async *handleSteps() {
+  yield { tool: 'spawn_agents', agents: [
+    'security-scanner',
+    'performance-analyzer',
+    'code-reviewer'
+  ]}
+  yield 'STEP_ALL' // Wait for all to complete
+
+  // Synthesize results
+  yield 'STEP'
+}
+```
+
+**Reuse any published agent**: Compose existing [published agents](https://www.codebuff.com/store) to get a leg up. Codebuff agents are the new MCP!
+
+## Best Practices
+
+### Instructions
+
+- Be specific about the agent's role and expertise
+- Include examples of good outputs
+- Specify when the agent should ask for clarification
+- Define the agent's limitations
+
+### Tool Usage
+
+- Start with file exploration tools (`read_files`, `code_search`)
+- Use `str_replace` for targeted edits, `write_file` for major changes
+- Always use `end_turn` to finish responses cleanly
+
+### Error Handling
+
+- Include error checking in programmatic flows
+- Provide fallback strategies for failed operations
+- Log important decisions for debugging
+
+### Performance
+
+- Choose appropriate models for the task complexity
+- Minimize unnecessary tool calls
+- Use spawnable agents for parallel processing
+
+## Testing Your Agent
+
+1. **Local Testing**: `codebuff --agent your-agent-name`
+2. **Debug Mode**: Add logging to your `handleSteps` function
+3. **Unit Testing**: Test individual functions in isolation
+4. **Integration Testing**: Test agent coordination workflows
+
+## Publishing & Sharing
+
+1. **Validate**: Ensure your agent works across different codebases
+2. **Document**: Include clear usage instructions
+3. **Publish**: `codebuff publish your-agent-name`
+4. **Maintain**: Update as models and tools evolve
+
+## Advanced Patterns
+
+### Conditional Workflows
+
+```typescript
+async *handleSteps() {
+  const config = yield { tool: 'read_files', paths: ['config.json'] }
+  yield 'STEP'
+
+  if (config.includes('typescript')) {
+    yield { tool: 'spawn_agents', agents: ['typescript-expert'] }
+  } else {
+    yield { tool: 'spawn_agents', agents: ['javascript-expert'] }
+  }
+  yield 'STEP_ALL'
+}
+```
+
+### Iterative Refinement
+
+```typescript
+async *handleSteps() {
+  for (let attempt = 0; attempt < 3; attempt++) {
+    yield { tool: 'run_terminal_command', command: 'npm test' }
+    yield 'STEP'
+
+    if (allTestsPass) break
+
+    yield { tool: 'spawn_agents', agents: ['test-fixer'] }
+    yield 'STEP_ALL'
+  }
+}
+```
+
+## Why Choose Codebuff for Custom Agents
+
+**Deep customizability**: Create sophisticated agent workflows with TypeScript generators that mix AI generation with programmatic control. Define custom agents that spawn subagents, implement conditional logic, and orchestrate complex multi-step processes that adapt to your specific use cases.
+
+**Fully customizable SDK**: Build Codebuff's capabilities directly into your applications with a complete TypeScript SDK. Create custom tools, integrate with your CI/CD pipeline, build AI-powered development environments, or embed intelligent coding assistance into your products.
+
+Learn more about the SDK [here](https://www.npmjs.com/package/@codebuff/sdk).
+
+## Community & Support
+
+- **Discord**: [Join our community](https://codebuff.com/discord) for help and inspiration
+- **Examples**: Study the `examples/` directory for patterns
+- **Documentation**: [codebuff.com/docs](https://codebuff.com/docs) and check `types/` for detailed type information
+- **Issues**: [Report bugs and request features on GitHub](https://github.com/CodebuffAI/codebuff/issues)
+- **Support**: [support@codebuff.com](mailto:support@codebuff.com)
+
+Happy agent building! 🤖
diff --git a/.agents/__tests__/context-pruner.test.ts b/.agents/__tests__/context-pruner.test.ts
index 8d23c2879e..4fcc82560b 100644
--- a/.agents/__tests__/context-pruner.test.ts
+++ b/.agents/__tests__/context-pruner.test.ts
@@ -66,7 +66,13 @@ describe('context-pruner handleSteps', () => {
 
   const runHandleSteps = (messages: Message[]) => {
     mockAgentState.messageHistory = messages
-    const generator = contextPruner.handleSteps!({ agentState: mockAgentState })
+    const mockLogger = {
+      debug: () => {},
+      info: () => {},
+      warn: () => {},
+      error: () => {},
+    }
+    const generator = contextPruner.handleSteps!({ agentState: mockAgentState }, mockLogger)
     const results: any[] = []
     let result = generator.next()
     while (!result.done) {
@@ -324,7 +330,13 @@ describe('context-pruner edge cases', () => {
 
   const runHandleSteps = (messages: Message[]) => {
     mockAgentState.messageHistory = messages
-    const generator = contextPruner.handleSteps!({ agentState: mockAgentState })
+    const mockLogger = {
+      debug: () => {},
+      info: () => {},
+      warn: () => {},
+      error: () => {},
+    }
+    const generator = contextPruner.handleSteps!({ agentState: mockAgentState }, mockLogger)
     const results: ReturnType<typeof generator.next>['value'][] = []
     let result = generator.next()
     while (!result.done) {
diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts
index 835725c251..20c19c7189 100644
--- a/.agents/types/agent-definition.ts
+++ b/.agents/types/agent-definition.ts
@@ -179,7 +179,10 @@ export interface AgentDefinition {
    * }
    * }
    */
-  handleSteps?: (context: AgentStepContext, logger?: Logger) => Generator<
+  handleSteps?: (
+    context: AgentStepContext,
+    logger: Logger,
+  ) => Generator<
     ToolCall | 'STEP' | 'STEP_ALL',
     void,
     {
@@ -194,13 +197,6 @@ export interface AgentDefinition {
 // Supporting Types
 // ============================================================================
 
-export interface Logger {
-  debug: (data: any, msg?: string) => void
-  info: (data: any, msg?: string) => void
-  warn: (data: any, msg?: string) => void
-  error: (data: any, msg?: string) => void
-}
-
 export interface AgentState {
   agentId: string
   runId: string
@@ -222,6 +218,16 @@ export interface AgentStepContext {
   params?: Record<string, any>
 }
 
+/**
+ * Logger interface for handleSteps
+ */
+export interface Logger {
+  debug: (data: any, msg?: string) => void
+  info: (data: any, msg?: string) => void
+  warn: (data: any, msg?: string) => void
+  error: (data: any, msg?: string) => void
+}
+
 /**
  * Tool call object for handleSteps generator
  */
diff --git a/common/src/types/agent-template.ts b/common/src/types/agent-template.ts
index 4ea162fb7b..599d76e2e5 100644
--- a/common/src/types/agent-template.ts
+++ b/common/src/types/agent-template.ts
@@ -54,6 +54,16 @@ export type StepGenerator = Generator<
 export type StepHandler<
   P = string | undefined,
   T = Record<string, any> | undefined,
-> = (params: { agentState: AgentState; prompt: P; params: T }) => StepGenerator
+> = (
+  params: { agentState: AgentState; prompt: P; params: T },
+  logger: Logger,
+) => StepGenerator
+
+export interface Logger {
+  debug: (data: any, msg?: string) => void
+  info: (data: any, msg?: string) => void
+  warn: (data: any, msg?: string) => void
+  error: (data: any, msg?: string) => void
+}
 
 export { PublicAgentState }

From 901ebb56b67419ded12199e78f929491633fbf39 Mon Sep 17 00:00:00 2001
From: brandonkachen <brandonchenjiacheng@gmail.com>
Date: Wed, 24 Sep 2025 23:04:34 -0700
Subject: [PATCH 03/10] Update agent-definition.ts

---
 .agents/types/agent-definition.ts             | 28 +++++++++++--------
 .../types/agent-definition.ts                 | 23 +++++++++++++--
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts
index 20c19c7189..0f32aebb8e 100644
--- a/.agents/types/agent-definition.ts
+++ b/.agents/types/agent-definition.ts
@@ -18,6 +18,17 @@ import type * as Tools from './tools'
 import type { Message, ToolResultOutput, JsonObjectSchema } from './util-types'
 type ToolName = Tools.ToolName
 
+// ============================================================================
+// Logger Interface
+// ============================================================================
+
+export interface Logger {
+  debug: (data: any, msg?: string) => void
+  info: (data: any, msg?: string) => void
+  warn: (data: any, msg?: string) => void
+  error: (data: any, msg?: string) => void
+}
+
 // ============================================================================
 // Agent Definition and Utility Types
 // ============================================================================
@@ -144,7 +155,8 @@ export interface AgentDefinition {
    * Or use 'return' to end the turn.
    *
    * Example 1:
-   * function* handleSteps({ agentStep, prompt, params}) {
+   * function* handleSteps({ agentStep, prompt, params}, logger) {
+   *   logger.info('Starting file read process')
    *   const { toolResult } = yield {
    *     toolName: 'read_files',
    *     input: { paths: ['file1.txt', 'file2.txt'] }
@@ -152,6 +164,7 @@ export interface AgentDefinition {
    *   yield 'STEP_ALL'
    *
    *   // Optionally do a post-processing step here...
+   *   logger.info('Files read successfully, setting output')
    *   yield {
    *     toolName: 'set_output',
    *     input: {
@@ -161,8 +174,9 @@ export interface AgentDefinition {
    * }
    *
    * Example 2:
-   * handleSteps: function* ({ agentState, prompt, params }) {
+   * handleSteps: function* ({ agentState, prompt, params }, logger) {
    *   while (true) {
+   *     logger.debug('Spawning thinker agent')
    *     yield {
    *       toolName: 'spawn_agents',
    *       input: {
@@ -218,16 +232,6 @@ export interface AgentStepContext {
   params?: Record<string, any>
 }
 
-/**
- * Logger interface for handleSteps
- */
-export interface Logger {
-  debug: (data: any, msg?: string) => void
-  info: (data: any, msg?: string) => void
-  warn: (data: any, msg?: string) => void
-  error: (data: any, msg?: string) => void
-}
-
 /**
  * Tool call object for handleSteps generator
  */
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index 5d0596d087..0f32aebb8e 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -18,6 +18,17 @@ import type * as Tools from './tools'
 import type { Message, ToolResultOutput, JsonObjectSchema } from './util-types'
 type ToolName = Tools.ToolName
 
+// ============================================================================
+// Logger Interface
+// ============================================================================
+
+export interface Logger {
+  debug: (data: any, msg?: string) => void
+  info: (data: any, msg?: string) => void
+  warn: (data: any, msg?: string) => void
+  error: (data: any, msg?: string) => void
+}
+
 // ============================================================================
 // Agent Definition and Utility Types
 // ============================================================================
@@ -144,7 +155,8 @@ export interface AgentDefinition {
    * Or use 'return' to end the turn.
    *
    * Example 1:
-   * function* handleSteps({ agentStep, prompt, params}) {
+   * function* handleSteps({ agentStep, prompt, params}, logger) {
+   *   logger.info('Starting file read process')
    *   const { toolResult } = yield {
    *     toolName: 'read_files',
    *     input: { paths: ['file1.txt', 'file2.txt'] }
@@ -152,6 +164,7 @@ export interface AgentDefinition {
    *   yield 'STEP_ALL'
    *
    *   // Optionally do a post-processing step here...
+   *   logger.info('Files read successfully, setting output')
    *   yield {
    *     toolName: 'set_output',
    *     input: {
@@ -161,8 +174,9 @@ export interface AgentDefinition {
    * }
    *
    * Example 2:
-   * handleSteps: function* ({ agentState, prompt, params }) {
+   * handleSteps: function* ({ agentState, prompt, params }, logger) {
    *   while (true) {
+   *     logger.debug('Spawning thinker agent')
    *     yield {
    *       toolName: 'spawn_agents',
    *       input: {
@@ -179,7 +193,10 @@ export interface AgentDefinition {
    * }
    * }
    */
-  handleSteps?: (context: AgentStepContext) => Generator<
+  handleSteps?: (
+    context: AgentStepContext,
+    logger: Logger,
+  ) => Generator<
     ToolCall | 'STEP' | 'STEP_ALL',
     void,
     {

From 4985a32dd792a213f1d093f67b6427c8cb30b63f Mon Sep 17 00:00:00 2001
From: brandonkachen <brandonchenjiacheng@gmail.com>
Date: Wed, 24 Sep 2025 23:19:11 -0700
Subject: [PATCH 04/10] refactor: deduplicate agent-template.ts by importing
 base types from the user-facing template; update Logger parameter in
 handleSteps to ensure correct step logging.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with Codebuff
Co-Authored-By: Codebuff <noreply@codebuff.com>
---
 .../__tests__/run-programmatic-step.test.ts   | 10 ++++----
 common/src/templates/agent-validation.ts      |  1 +
 .../types/agent-definition.ts                 | 20 ++++++++--------
 common/src/types/agent-template.ts            | 23 ++++++++++++-------
 4 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/backend/src/__tests__/run-programmatic-step.test.ts b/backend/src/__tests__/run-programmatic-step.test.ts
index fc9acfd2af..d25d5479a6 100644
--- a/backend/src/__tests__/run-programmatic-step.test.ts
+++ b/backend/src/__tests__/run-programmatic-step.test.ts
@@ -84,10 +84,9 @@ describe('runProgrammaticStep', () => {
     )
 
     // Mock sendAction
-    sendActionSpy = spyOn(
-      websocketAction,
-      'sendAction',
-    ).mockImplementation(() => {})
+    sendActionSpy = spyOn(websocketAction, 'sendAction').mockImplementation(
+      () => {},
+    )
 
     // Mock crypto.randomUUID
     spyOn(crypto, 'randomUUID').mockImplementation(
@@ -118,7 +117,8 @@ describe('runProgrammaticStep', () => {
     mockAgentState = {
       ...sessionState.mainAgentState,
       agentId: 'test-agent-id',
-      runId: 'test-run-id' as `${string}-${string}-${string}-${string}-${string}`,
+      runId:
+        'test-run-id' as `${string}-${string}-${string}-${string}-${string}`,
       messageHistory: [
         { role: 'user', content: 'Initial message' },
         { role: 'assistant', content: 'Initial response' },
diff --git a/common/src/templates/agent-validation.ts b/common/src/templates/agent-validation.ts
index 82ab3dc2e2..2c97204760 100644
--- a/common/src/templates/agent-validation.ts
+++ b/common/src/templates/agent-validation.ts
@@ -284,6 +284,7 @@ export function validateSingleAgent(
       stepPrompt: validatedConfig.stepPrompt ?? '',
       outputSchema,
       inputSchema,
+
     }
 
     return {
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index 0f32aebb8e..1f65d40fc3 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -196,15 +196,17 @@ export interface AgentDefinition {
   handleSteps?: (
     context: AgentStepContext,
     logger: Logger,
-  ) => Generator<
-    ToolCall | 'STEP' | 'STEP_ALL',
-    void,
-    {
-      agentState: AgentState
-      toolResult: ToolResultOutput[] | undefined
-      stepsComplete: boolean
-    }
-  >
+  ) =>
+    | Generator<
+        ToolCall | 'STEP' | 'STEP_ALL',
+        void,
+        {
+          agentState: AgentState
+          toolResult: ToolResultOutput[] | undefined
+          stepsComplete: boolean
+        }
+      >
+    | string
 }
 
 // ============================================================================
diff --git a/common/src/types/agent-template.ts b/common/src/types/agent-template.ts
index 599d76e2e5..45cada0072 100644
--- a/common/src/types/agent-template.ts
+++ b/common/src/types/agent-template.ts
@@ -1,9 +1,19 @@
+/**
+ * Backend Agent Template Types
+ * 
+ * This file provides backend-compatible agent template types with strict validation.
+ * It imports base types from the user-facing template to eliminate duplication.
+ */
+
 import type { Model } from '../old-constants'
 import type { ToolResultOutput } from './messages/content-part'
 import type { AgentState, AgentTemplateType } from './session-state'
 import type {
+  AgentDefinition,
   ToolCall,
   AgentState as PublicAgentState,
+  Logger,
+  AgentStepContext,
 } from '../templates/initial-agents-dir/types/agent-definition'
 import type { ToolName } from '../tools/constants'
 import type { OpenRouterProviderOptions } from '@codebuff/internal/openrouter-ai-sdk'
@@ -11,6 +21,10 @@ import type { z } from 'zod/v4'
 
 export type AgentId = `${string}/${string}@${number}.${number}.${number}`
 
+/**
+ * Backend agent template with strict validation and Zod schemas
+ * Extends the user-facing AgentDefinition but with backend-specific requirements
+ */
 export type AgentTemplate<
   P = string | undefined,
   T = Record<string, any> | undefined,
@@ -59,11 +73,4 @@ export type StepHandler<
   logger: Logger,
 ) => StepGenerator
 
-export interface Logger {
-  debug: (data: any, msg?: string) => void
-  info: (data: any, msg?: string) => void
-  warn: (data: any, msg?: string) => void
-  error: (data: any, msg?: string) => void
-}
-
-export { PublicAgentState }
+export { Logger, PublicAgentState }

From b534320ef61af239b6ed58c9e1e8265f2c5c645b Mon Sep 17 00:00:00 2001
From: brandonkachen <brandonchenjiacheng@gmail.com>
Date: Thu, 25 Sep 2025 00:44:31 -0700
Subject: [PATCH 05/10] refactor(logging): simplify log handling in QuickJS
 sandbox and remove verbose logging from runProgrammaticStep to reduce noise
 while preserving essential information.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with Codebuff
Co-Authored-By: Codebuff <noreply@codebuff.com>
---
 backend/src/run-programmatic-step.ts | 55 ----------------------------
 backend/src/util/quickjs-sandbox.ts  | 13 ++-----
 2 files changed, 3 insertions(+), 65 deletions(-)

diff --git a/backend/src/run-programmatic-step.ts b/backend/src/run-programmatic-step.ts
index 7fe5b5087f..927eb41b0e 100644
--- a/backend/src/run-programmatic-step.ts
+++ b/backend/src/run-programmatic-step.ts
@@ -74,18 +74,6 @@ export async function runProgrammaticStep(
     stepNumber: number
   },
 ): Promise<{ agentState: AgentState; endTurn: boolean; stepNumber: number }> {
-  logger.info(
-    {
-      agentType: template.id,
-      runId: agentState.runId,
-      hasHandleSteps: !!template.handleSteps,
-      handleStepsType: typeof template.handleSteps,
-      stepNumber,
-      stepsComplete,
-    },
-    'runProgrammaticStep: Starting programmatic step execution',
-  )
-
   if (!template.handleSteps) {
     throw new Error('No step handler found for agent template ' + template.id)
   }
@@ -122,13 +110,6 @@ export async function runProgrammaticStep(
     }
 
     if (typeof template.handleSteps === 'string') {
-      logger.info(
-        {
-          agentType: template.id,
-          runId: agentState.runId,
-        },
-        'runProgrammaticStep: Initializing QuickJS sandbox for string-based generator',
-      )
       // Initialize QuickJS sandbox for string-based generator
       sandbox = await sandboxManager.getOrCreateSandbox(
         agentState.runId,
@@ -142,13 +123,6 @@ export async function runProgrammaticStep(
         streamingLogger, // pass the streaming logger instance
       )
     } else {
-      logger.info(
-        {
-          agentType: template.id,
-          runId: agentState.runId,
-        },
-        'runProgrammaticStep: Initializing native JavaScript generator',
-      )
       // Initialize native generator
       generator = (template.handleSteps as any)(
         {
@@ -159,14 +133,6 @@ export async function runProgrammaticStep(
         streamingLogger,
       )
       runIdToGenerator[agentState.runId] = generator
-      logger.info(
-        {
-          agentType: template.id,
-          runId: agentState.runId,
-          generatorInitialized: !!generator,
-        },
-        'runProgrammaticStep: Native generator initialized successfully',
-      )
     }
   }
 
@@ -228,17 +194,6 @@ export async function runProgrammaticStep(
       creditsBefore = state.agentState.directCreditsUsed
       childrenBefore = state.agentState.childRunIds.length
 
-      logger.info(
-        {
-          agentType: template.id,
-          runId: agentState.runId,
-          usingSandbox: !!sandbox,
-          usingGenerator: !!generator,
-          stepsComplete,
-        },
-        'runProgrammaticStep: About to execute generator step',
-      )
-
       const result = sandbox
         ? await sandbox.executeStep({
             agentState: getPublicAgentState(state.agentState),
@@ -251,16 +206,6 @@ export async function runProgrammaticStep(
             stepsComplete,
           })
 
-      logger.info(
-        {
-          agentType: template.id,
-          runId: agentState.runId,
-          resultDone: result.done,
-          resultValue: result.value,
-        },
-        'runProgrammaticStep: Generator step executed, got result',
-      )
-
       if (result.done) {
         endTurn = true
         break
diff --git a/backend/src/util/quickjs-sandbox.ts b/backend/src/util/quickjs-sandbox.ts
index ba97177922..0a13e9a640 100644
--- a/backend/src/util/quickjs-sandbox.ts
+++ b/backend/src/util/quickjs-sandbox.ts
@@ -103,16 +103,9 @@ export class QuickJSSandbox {
 
             msgStr = msg ? context.getString(msg) : undefined
 
-            if (logger) {
-              if (levelStr === 'debug' && logger.debug) {
-                logger.debug(dataObj, msgStr)
-              } else if (levelStr === 'info' && logger.info) {
-                logger.info(dataObj, msgStr)
-              } else if (levelStr === 'warn' && logger.warn) {
-                logger.warn(dataObj, msgStr)
-              } else if (levelStr === 'error' && logger.error) {
-                logger.error(dataObj, msgStr)
-              }
+            // Call the appropriate logger method if available
+            if (logger?.[levelStr as keyof typeof logger]) {
+              logger[levelStr as keyof typeof logger](dataObj, msgStr)
             }
           } catch (err) {
             // Fallback for logging errors

From 5c62bbc920c2858b470df061c75bebbe304d3ddc Mon Sep 17 00:00:00 2001
From: brandonkachen <brandonchenjiacheng@gmail.com>
Date: Thu, 25 Sep 2025 01:15:19 -0700
Subject: [PATCH 06/10] refactor(agent-definition): remove | string from
 handleSteps return type to ensure consistent typing across user-facing agent
 definition files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with Codebuff
Co-Authored-By: Codebuff <noreply@codebuff.com>
---
 .../types/agent-definition.ts                 | 20 +++++++++----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index 1f65d40fc3..0f32aebb8e 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -196,17 +196,15 @@ export interface AgentDefinition {
   handleSteps?: (
     context: AgentStepContext,
     logger: Logger,
-  ) =>
-    | Generator<
-        ToolCall | 'STEP' | 'STEP_ALL',
-        void,
-        {
-          agentState: AgentState
-          toolResult: ToolResultOutput[] | undefined
-          stepsComplete: boolean
-        }
-      >
-    | string
+  ) => Generator<
+    ToolCall | 'STEP' | 'STEP_ALL',
+    void,
+    {
+      agentState: AgentState
+      toolResult: ToolResultOutput[] | undefined
+      stepsComplete: boolean
+    }
+  >
 }
 
 // ============================================================================

From 2cf6321e4721682da99bd426b68cde4e70dce200 Mon Sep 17 00:00:00 2001
From: brandonkachen <brandonchenjiacheng@gmail.com>
Date: Thu, 25 Sep 2025 14:27:56 -0700
Subject: [PATCH 07/10] fix: handle spinners in npm-app with onChunk

---
 npm-app/src/client.ts | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/npm-app/src/client.ts b/npm-app/src/client.ts
index 3694f82f88..31ee6df92a 100644
--- a/npm-app/src/client.ts
+++ b/npm-app/src/client.ts
@@ -205,6 +205,7 @@ export class Client {
   private responseComplete: boolean = false
   private userInputId: string | undefined
   private nonCancelledUserInputIds: string[] = []
+  private currentOnChunk: ((chunk: string | PrintModeEvent) => void) | undefined
 
   public usageData: UsageData = {
     usage: 0,
@@ -959,8 +960,12 @@ export class Client {
       // Format the log message for display
       const formattedMessage = this.formatLogMessage(level, data, message)
 
-      // Display the log message immediately
-      if (formattedMessage) {
+      // Display the log message using onChunk if we're in an active user input session
+      if (formattedMessage && this.userInputId) {
+        // Use the onChunk callback to properly handle spinner state
+        this.handleLogChunk(formattedMessage + '\n')
+      } else if (formattedMessage) {
+        // Fallback to direct stdout for non-user-input scenarios
         process.stdout.write(formattedMessage + '\n')
       }
     })
@@ -993,6 +998,18 @@ export class Client {
     return String(data)
   }
 
+  /**
+   * Handle log chunks by using the current onChunk callback if available
+   */
+  private handleLogChunk(formattedMessage: string): void {
+    if (this.currentOnChunk) {
+      this.currentOnChunk(formattedMessage)
+    } else {
+      // Fallback to direct stdout if no onChunk callback is available
+      process.stdout.write(formattedMessage)
+    }
+  }
+
   private showUsageWarning() {
     // Determine user state based on login status and credit balance
     const state = match({
@@ -1304,12 +1321,14 @@ export class Client {
     })
 
     this.userInputId = userInputId
+    this.currentOnChunk = onChunk
 
     const stopResponse = () => {
       responseStopped = true
       unsubscribeChunks()
       unsubscribeComplete()
       this.cancelCurrentInput()
+      this.currentOnChunk = undefined
 
       const additionalMessages = prompt
         ? [
@@ -1531,6 +1550,10 @@ Go to https://www.codebuff.com/config for more information.`) +
           unsubscribeChunks()
           unsubscribeComplete()
         }
+
+        // Clear the onChunk callback when response is complete
+        this.currentOnChunk = undefined
+
         resolveResponse({ ...a, wasStoppedByUser: false })
       },
     )

From 99b5e4047f3f6d4f16444342ce9829bcf6757b63 Mon Sep 17 00:00:00 2001
From: brandonkachen <brandonchenjiacheng@gmail.com>
Date: Thu, 25 Sep 2025 14:35:02 -0700
Subject: [PATCH 08/10] =?UTF-8?q?refactor(logger):=20move=20logger=20into?=
 =?UTF-8?q?=20AgentStepContext\n\nCentralizes=20logging=20in=20AgentStepCo?=
 =?UTF-8?q?ntext=20to=20simplify=20step=20signatures=20and=20improve=20con?=
 =?UTF-8?q?text-aware=20tracing.=20This=20reduces=20parameter=20surface=20?=
 =?UTF-8?q?area=20and=20ensures=20logging=20is=20consistently=20tied=20to?=
 =?UTF-8?q?=20agent=20context.\n\n=F0=9F=A4=96=20Generated=20with=20Codebu?=
 =?UTF-8?q?ff\nCo-Authored-By:=20Codebuff=20<noreply@codebuff.com>?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .agents/__tests__/context-pruner.test.ts        | 10 ++++++++--
 .agents/types/agent-definition.ts               | 10 ++++------
 backend/src/run-programmatic-step.ts            | 17 ++++++++---------
 backend/src/util/quickjs-sandbox.ts             |  6 ++++--
 .../types/agent-definition.ts                   | 10 ++++------
 5 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/.agents/__tests__/context-pruner.test.ts b/.agents/__tests__/context-pruner.test.ts
index 4fcc82560b..97a5ce8508 100644
--- a/.agents/__tests__/context-pruner.test.ts
+++ b/.agents/__tests__/context-pruner.test.ts
@@ -72,7 +72,10 @@ describe('context-pruner handleSteps', () => {
       warn: () => {},
       error: () => {},
     }
-    const generator = contextPruner.handleSteps!({ agentState: mockAgentState }, mockLogger)
+    const generator = contextPruner.handleSteps!({
+      agentState: mockAgentState,
+      logger: mockLogger,
+    })
     const results: any[] = []
     let result = generator.next()
     while (!result.done) {
@@ -336,7 +339,10 @@ describe('context-pruner edge cases', () => {
       warn: () => {},
       error: () => {},
     }
-    const generator = contextPruner.handleSteps!({ agentState: mockAgentState }, mockLogger)
+    const generator = contextPruner.handleSteps!({
+      agentState: mockAgentState,
+      logger: mockLogger,
+    })
     const results: ReturnType<typeof generator.next>['value'][] = []
     let result = generator.next()
     while (!result.done) {
diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts
index 0f32aebb8e..34f969552f 100644
--- a/.agents/types/agent-definition.ts
+++ b/.agents/types/agent-definition.ts
@@ -155,7 +155,7 @@ export interface AgentDefinition {
    * Or use 'return' to end the turn.
    *
    * Example 1:
-   * function* handleSteps({ agentStep, prompt, params}, logger) {
+   * function* handleSteps({ agentState, prompt, params, logger }) {
    *   logger.info('Starting file read process')
    *   const { toolResult } = yield {
    *     toolName: 'read_files',
@@ -174,7 +174,7 @@ export interface AgentDefinition {
    * }
    *
    * Example 2:
-   * handleSteps: function* ({ agentState, prompt, params }, logger) {
+   * handleSteps: function* ({ agentState, prompt, params, logger }) {
    *   while (true) {
    *     logger.debug('Spawning thinker agent')
    *     yield {
@@ -193,10 +193,7 @@ export interface AgentDefinition {
    * }
    * }
    */
-  handleSteps?: (
-    context: AgentStepContext,
-    logger: Logger,
-  ) => Generator<
+  handleSteps?: (context: AgentStepContext) => Generator<
     ToolCall | 'STEP' | 'STEP_ALL',
     void,
     {
@@ -230,6 +227,7 @@ export interface AgentStepContext {
   agentState: AgentState
   prompt?: string
   params?: Record<string, any>
+  logger: Logger
 }
 
 /**
diff --git a/backend/src/run-programmatic-step.ts b/backend/src/run-programmatic-step.ts
index 927eb41b0e..15f837fea4 100644
--- a/backend/src/run-programmatic-step.ts
+++ b/backend/src/run-programmatic-step.ts
@@ -118,20 +118,19 @@ export async function runProgrammaticStep(
           agentState,
           prompt,
           params,
+          logger: streamingLogger,
         },
         undefined, // config
-        streamingLogger, // pass the streaming logger instance
+        streamingLogger, // pass the streaming logger instance for internal use
       )
     } else {
       // Initialize native generator
-      generator = (template.handleSteps as any)(
-        {
-          agentState,
-          prompt,
-          params,
-        },
-        streamingLogger,
-      )
+      generator = (template.handleSteps as any)({
+        agentState,
+        prompt,
+        params,
+        logger: streamingLogger,
+      })
       runIdToGenerator[agentState.runId] = generator
     }
   }
diff --git a/backend/src/util/quickjs-sandbox.ts b/backend/src/util/quickjs-sandbox.ts
index 0a13e9a640..1f5cd7a87d 100644
--- a/backend/src/util/quickjs-sandbox.ts
+++ b/backend/src/util/quickjs-sandbox.ts
@@ -143,8 +143,10 @@ export class QuickJSSandbox {
         // Agent function
         const handleSteps = ${generatorCode};
         
-        // Create generator instance
-        let generator = handleSteps(${JSON.stringify(initialInput)}, logger);
+        // Create generator instance with logger injected into context
+        const context = ${JSON.stringify(initialInput)};
+        context.logger = logger;
+        let generator = handleSteps(context);
         
         // Generator management
         globalThis._generator = generator;
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index 0f32aebb8e..34f969552f 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -155,7 +155,7 @@ export interface AgentDefinition {
    * Or use 'return' to end the turn.
    *
    * Example 1:
-   * function* handleSteps({ agentStep, prompt, params}, logger) {
+   * function* handleSteps({ agentState, prompt, params, logger }) {
    *   logger.info('Starting file read process')
    *   const { toolResult } = yield {
    *     toolName: 'read_files',
@@ -174,7 +174,7 @@ export interface AgentDefinition {
    * }
    *
    * Example 2:
-   * handleSteps: function* ({ agentState, prompt, params }, logger) {
+   * handleSteps: function* ({ agentState, prompt, params, logger }) {
    *   while (true) {
    *     logger.debug('Spawning thinker agent')
    *     yield {
@@ -193,10 +193,7 @@ export interface AgentDefinition {
    * }
    * }
    */
-  handleSteps?: (
-    context: AgentStepContext,
-    logger: Logger,
-  ) => Generator<
+  handleSteps?: (context: AgentStepContext) => Generator<
     ToolCall | 'STEP' | 'STEP_ALL',
     void,
     {
@@ -230,6 +227,7 @@ export interface AgentStepContext {
   agentState: AgentState
   prompt?: string
   params?: Record<string, any>
+  logger: Logger
 }
 
 /**

From aa389dd96c9383bbfd345bacb490a8173a77075f Mon Sep 17 00:00:00 2001
From: brandonkachen <brandonchenjiacheng@gmail.com>
Date: Thu, 25 Sep 2025 14:42:02 -0700
Subject: [PATCH 09/10] fix: types

---
 backend/src/run-programmatic-step.ts |  9 +++++----
 common/src/types/agent-template.ts   | 12 +++++++-----
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/backend/src/run-programmatic-step.ts b/backend/src/run-programmatic-step.ts
index 15f837fea4..2d5f9d14fa 100644
--- a/backend/src/run-programmatic-step.ts
+++ b/backend/src/run-programmatic-step.ts
@@ -125,7 +125,7 @@ export async function runProgrammaticStep(
       )
     } else {
       // Initialize native generator
-      generator = (template.handleSteps as any)({
+      generator = template.handleSteps({
         agentState,
         prompt,
         params,
@@ -172,9 +172,10 @@ export async function runProgrammaticStep(
         ...data,
       })
     },
-    agentState: cloneDeep(
-      agentState as AgentState & Required<Pick<AgentState, 'runId'>>,
-    ),
+    agentState: cloneDeep({
+      ...agentState,
+      runId: agentState.runId!, // We've already verified runId exists above
+    }),
     agentContext: cloneDeep(agentState.agentContext),
     messages: cloneDeep(agentState.messageHistory),
   }
diff --git a/common/src/types/agent-template.ts b/common/src/types/agent-template.ts
index 45cada0072..6b4cbf31de 100644
--- a/common/src/types/agent-template.ts
+++ b/common/src/types/agent-template.ts
@@ -1,6 +1,6 @@
 /**
  * Backend Agent Template Types
- * 
+ *
  * This file provides backend-compatible agent template types with strict validation.
  * It imports base types from the user-facing template to eliminate duplication.
  */
@@ -68,9 +68,11 @@ export type StepGenerator = Generator<
 export type StepHandler<
   P = string | undefined,
   T = Record<string, any> | undefined,
-> = (
-  params: { agentState: AgentState; prompt: P; params: T },
-  logger: Logger,
-) => StepGenerator
+> = (context: {
+  agentState: AgentState
+  prompt: P
+  params: T
+  logger: Logger
+}) => StepGenerator
 
 export { Logger, PublicAgentState }

From ee8f10cb9ecf08eceb5e86351b223a94989ec7c1 Mon Sep 17 00:00:00 2001
From: brandonkachen <brandonchenjiacheng@gmail.com>
Date: Thu, 25 Sep 2025 15:07:11 -0700
Subject: [PATCH 10/10] fix: cleaner logic for logging client-side

---
 npm-app/src/client.ts | 42 +++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/npm-app/src/client.ts b/npm-app/src/client.ts
index 31ee6df92a..bf9e7f6db1 100644
--- a/npm-app/src/client.ts
+++ b/npm-app/src/client.ts
@@ -956,22 +956,27 @@ export class Client {
     // Handle handleSteps log streaming
     this.webSocket.subscribe('handlesteps-log-chunk', (action) => {
       const { agentId, level, data, message } = action
+      const formattedMessage = this.formatLogMessage(
+        level,
+        data,
+        message,
+        agentId,
+      )
 
-      // Format the log message for display
-      const formattedMessage = this.formatLogMessage(level, data, message)
-
-      // Display the log message using onChunk if we're in an active user input session
-      if (formattedMessage && this.userInputId) {
-        // Use the onChunk callback to properly handle spinner state
-        this.handleLogChunk(formattedMessage + '\n')
-      } else if (formattedMessage) {
-        // Fallback to direct stdout for non-user-input scenarios
+      if (this.currentOnChunk && this.userInputId) {
+        this.currentOnChunk(formattedMessage + '\n')
+      } else {
         process.stdout.write(formattedMessage + '\n')
       }
     })
   }
 
-  private formatLogMessage(level: string, data: any, message?: string): string {
+  private formatLogMessage(
+    level: string,
+    data: any,
+    message?: string,
+    agentId?: string,
+  ): string {
     const timestamp = new Date().toISOString().substring(11, 23) // HH:MM:SS.mmm
     const levelColors = { debug: blue, info: green, warn: yellow, error: red }
     const levelColor =
@@ -979,9 +984,12 @@ export class Client {
 
     const timeTag = `[${timestamp}]`
     const levelTag = levelColor(`[${level.toUpperCase()}]`)
+    const agentTag = agentId ? `[Agent ${agentId}]` : ''
     const dataStr = this.serializeLogData(data)
 
-    return [timeTag, levelTag, message, dataStr].filter(Boolean).join(' ')
+    return [timeTag, levelTag, agentTag, message, dataStr]
+      .filter(Boolean)
+      .join(' ')
   }
 
   private serializeLogData(data: any): string {
@@ -998,18 +1006,6 @@ export class Client {
     return String(data)
   }
 
-  /**
-   * Handle log chunks by using the current onChunk callback if available
-   */
-  private handleLogChunk(formattedMessage: string): void {
-    if (this.currentOnChunk) {
-      this.currentOnChunk(formattedMessage)
-    } else {
-      // Fallback to direct stdout if no onChunk callback is available
-      process.stdout.write(formattedMessage)
-    }
-  }
-
   private showUsageWarning() {
     // Determine user state based on login status and credit balance
     const state = match({