From 706ddd7f720b7289eb2c824e3ab0905fa73f17b7 Mon Sep 17 00:00:00 2001 From: Justin Beckwith Date: Mon, 13 Apr 2026 13:30:37 -0700 Subject: [PATCH 1/3] fix(promptfoo): verify generated configs by relative path --- plugins/promptfoo/src/agent/loop.ts | 4 ++- .../src/generator/config-outputdir.test.ts | 34 +++++++++++++++++++ plugins/promptfoo/src/generator/config.ts | 2 ++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 plugins/promptfoo/src/generator/config-outputdir.test.ts diff --git a/plugins/promptfoo/src/agent/loop.ts b/plugins/promptfoo/src/agent/loop.ts index 63a9c1e..0cfa7cb 100644 --- a/plugins/promptfoo/src/agent/loop.ts +++ b/plugins/promptfoo/src/agent/loop.ts @@ -41,6 +41,7 @@ export interface ToolResult { interface AgentState { configFile?: string; + verifyConfigFile?: string; providerFile?: string; envVars: Record; verified: boolean; @@ -248,6 +249,7 @@ async function executeTool( }); state.configFile = generated.filePath; + state.verifyConfigFile = generated.verifyPath; state.envVars = { ...state.envVars, ...generated.envVars }; result = { @@ -277,7 +279,7 @@ async function executeTool( configFile?: string; }; - const configPath = configFile || state.configFile || 'promptfooconfig.yaml'; + const configPath = configFile || state.verifyConfigFile || 'promptfooconfig.yaml'; const steps: string[] = []; // Step 1: Direct provider smoke + session test diff --git a/plugins/promptfoo/src/generator/config-outputdir.test.ts b/plugins/promptfoo/src/generator/config-outputdir.test.ts new file mode 100644 index 0000000..0fa55c2 --- /dev/null +++ b/plugins/promptfoo/src/generator/config-outputdir.test.ts @@ -0,0 +1,34 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; + +import { afterEach, describe, expect, it } from 'vitest'; + +import { generateConfig } from './config.js'; + +const tempDirs: string[] = []; + +afterEach(() => { + for (const dir of tempDirs.splice(0)) { + fs.rmSync(dir, { recursive: true, force: true }); + } +}); + +describe('generateConfig output paths', () => { + it('returns a verify path relative to the output directory', () => { + const outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'crabcode-config-')); + tempDirs.push(outputDir); + + const generated = generateConfig({ + description: 'Test config', + providerType: 'http', + providerConfig: { url: 'https://example.com', method: 'GET' }, + outputDir, + filename: 'nested-config.yaml', + }); + + expect(generated.filePath).toBe(path.join(outputDir, 'nested-config.yaml')); + expect(generated.verifyPath).toBe('nested-config.yaml'); + expect(fs.existsSync(generated.filePath)).toBe(true); + }); +}); diff --git a/plugins/promptfoo/src/generator/config.ts b/plugins/promptfoo/src/generator/config.ts index bc749e8..34b4e33 100644 --- a/plugins/promptfoo/src/generator/config.ts +++ b/plugins/promptfoo/src/generator/config.ts @@ -21,6 +21,7 @@ export interface GenerateConfigOptions { export interface GeneratedConfig { yaml: string; filePath: string; + verifyPath: string; envVars: Record; } @@ -104,6 +105,7 @@ ${Object.entries(envVars).map(([k, v]) => `# ${k}: ${v}`).join('\n') || '# ( return { yaml: fullYaml, filePath, + verifyPath: filename, envVars, }; } From 4d692494a0e5b924027eb4a90b6b03c1404fcfb1 Mon Sep 17 00:00:00 2001 From: Justin Beckwith Date: Mon, 13 Apr 2026 13:31:27 -0700 Subject: [PATCH 2/3] fix(promptfoo): verify generated configs via stable filename --- .../src/generator/config-filename.test.ts | 36 +++++++++++++++++++ .../src/generator/config-outputdir.test.ts | 2 +- plugins/promptfoo/src/generator/config.ts | 11 ++++-- 3 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 plugins/promptfoo/src/generator/config-filename.test.ts diff --git a/plugins/promptfoo/src/generator/config-filename.test.ts b/plugins/promptfoo/src/generator/config-filename.test.ts new file mode 100644 index 0000000..436322b --- /dev/null +++ b/plugins/promptfoo/src/generator/config-filename.test.ts @@ -0,0 +1,36 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; + +import { afterEach, describe, expect, it } from 'vitest'; + +import { generateConfig } from './config.js'; + +const tempDirs: string[] = []; + +afterEach(() => { + for (const dir of tempDirs.splice(0)) { + fs.rmSync(dir, { recursive: true, force: true }); + } +}); + +describe('generateConfig filename handling', () => { + it('keeps the requested filename while writing a stable verify config alias', () => { + const outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'crabcode-config-')); + tempDirs.push(outputDir); + + const generated = generateConfig({ + description: 'Custom filename config', + providerType: 'http', + providerConfig: { url: 'https://example.com', method: 'GET' }, + outputDir, + filename: 'custom-config.yaml', + }); + + expect(generated.filePath).toBe(path.join(outputDir, 'custom-config.yaml')); + expect(generated.verifyPath).toBe('promptfooconfig.yaml'); + expect(fs.readFileSync(generated.filePath, 'utf-8')).toBe( + fs.readFileSync(path.join(outputDir, generated.verifyPath), 'utf-8') + ); + }); +}); diff --git a/plugins/promptfoo/src/generator/config-outputdir.test.ts b/plugins/promptfoo/src/generator/config-outputdir.test.ts index 0fa55c2..fa5fe05 100644 --- a/plugins/promptfoo/src/generator/config-outputdir.test.ts +++ b/plugins/promptfoo/src/generator/config-outputdir.test.ts @@ -28,7 +28,7 @@ describe('generateConfig output paths', () => { }); expect(generated.filePath).toBe(path.join(outputDir, 'nested-config.yaml')); - expect(generated.verifyPath).toBe('nested-config.yaml'); + expect(generated.verifyPath).toBe('promptfooconfig.yaml'); expect(fs.existsSync(generated.filePath)).toBe(true); }); }); diff --git a/plugins/promptfoo/src/generator/config.ts b/plugins/promptfoo/src/generator/config.ts index 34b4e33..eb90677 100644 --- a/plugins/promptfoo/src/generator/config.ts +++ b/plugins/promptfoo/src/generator/config.ts @@ -25,6 +25,8 @@ export interface GeneratedConfig { envVars: Record; } +const DEFAULT_CONFIG_FILENAME = 'promptfooconfig.yaml'; + /** * Generate a promptfoo YAML config */ @@ -35,7 +37,7 @@ export function generateConfig(options: GenerateConfigOptions): GeneratedConfig providerConfig, envVars = {}, outputDir = '.', - filename = 'promptfooconfig.yaml', + filename = DEFAULT_CONFIG_FILENAME, } = options; // Validate providerConfig has required fields for http provider @@ -101,11 +103,16 @@ ${Object.entries(envVars).map(([k, v]) => `# ${k}: ${v}`).join('\n') || '# ( // Write the file const filePath = path.join(outputDir, filename); fs.writeFileSync(filePath, fullYaml, 'utf-8'); + const verifyPath = DEFAULT_CONFIG_FILENAME; + + if (verifyPath !== filename) { + fs.writeFileSync(path.join(outputDir, verifyPath), fullYaml, 'utf-8'); + } return { yaml: fullYaml, filePath, - verifyPath: filename, + verifyPath, envVars, }; } From fe6ce86207e3d34fcecba4726b9cb1a21cb9861e Mon Sep 17 00:00:00 2001 From: Justin Beckwith Date: Mon, 13 Apr 2026 13:32:15 -0700 Subject: [PATCH 3/3] fix(promptfoo): run promptfoo eval without shell --- plugins/promptfoo/src/agent/loop-eval.test.ts | 32 +++++++++++++++++++ plugins/promptfoo/src/agent/loop.ts | 32 +++++++++++++------ 2 files changed, 54 insertions(+), 10 deletions(-) create mode 100644 plugins/promptfoo/src/agent/loop-eval.test.ts diff --git a/plugins/promptfoo/src/agent/loop-eval.test.ts b/plugins/promptfoo/src/agent/loop-eval.test.ts new file mode 100644 index 0000000..930a8f2 --- /dev/null +++ b/plugins/promptfoo/src/agent/loop-eval.test.ts @@ -0,0 +1,32 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const execFileSyncMock = vi.fn(); + +vi.mock('node:child_process', () => ({ + execFileSync: execFileSyncMock, + execSync: vi.fn(), +})); + +describe('runPromptfooEval', () => { + beforeEach(() => { + execFileSyncMock.mockReset(); + }); + + it('invokes promptfoo eval with explicit argv', async () => { + execFileSyncMock.mockReturnValue('1 passed'); + + const { runPromptfooEval } = await import('./loop.js'); + + expect(runPromptfooEval('/tmp/job dir', 'promptfooconfig.yaml')).toBe('1 passed'); + expect(execFileSyncMock).toHaveBeenCalledWith( + 'npx', + ['promptfoo', 'eval', '-c', 'promptfooconfig.yaml', '--no-progress-bar'], + expect.objectContaining({ + cwd: '/tmp/job dir', + timeout: 120000, + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'pipe'], + }) + ); + }); +}); diff --git a/plugins/promptfoo/src/agent/loop.ts b/plugins/promptfoo/src/agent/loop.ts index 0cfa7cb..60a43a7 100644 --- a/plugins/promptfoo/src/agent/loop.ts +++ b/plugins/promptfoo/src/agent/loop.ts @@ -14,7 +14,7 @@ import type { LLMProvider, Message, ToolCall, ChatResponse } from './providers.j import type { DiscoveryResult } from '../types.js'; import * as fs from 'node:fs'; import * as path from 'node:path'; -import { execSync } from 'node:child_process'; +import { execFileSync, execSync } from 'node:child_process'; import { pathToFileURL } from 'node:url'; export interface AgentOptions { @@ -194,6 +194,19 @@ Steps: }; } +export function runPromptfooEval(outputDir: string, configPath: string): string { + return execFileSync( + 'npx', + ['promptfoo', 'eval', '-c', configPath, '--no-progress-bar'], + { + cwd: outputDir, + timeout: 120000, + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'pipe'], + } + ); +} + /** * Execute a single tool call */ @@ -331,10 +344,7 @@ async function executeTool( // Step 2: Run promptfoo eval try { - const output = execSync( - `cd "${outputDir}" && npx promptfoo eval -c "${configPath}" --no-progress-bar 2>&1`, - { timeout: 120000, encoding: 'utf-8' } - ); + const output = runPromptfooEval(outputDir, configPath); const passMatch = output.match(/(\d+) passed/); const failMatch = output.match(/(\d+) failed/); @@ -362,13 +372,15 @@ async function executeTool( steps, }; } catch (error) { - const err = error as { message: string; stdout?: string; stderr?: string }; - const stdout = err.stdout || ''; + const err = error as { message: string; stdout?: string | Buffer; stderr?: string | Buffer }; + const stdout = typeof err.stdout === 'string' ? err.stdout : err.stdout?.toString('utf-8') || ''; + const stderr = typeof err.stderr === 'string' ? err.stderr : err.stderr?.toString('utf-8') || ''; + const combinedOutput = stdout + stderr; - const passMatch = stdout.match(/(\d+) passed/); + const passMatch = combinedOutput.match(/(\d+) passed/); const passed = passMatch ? parseInt(passMatch[1]) : 0; - if (passed > 0 && !stdout.includes('failed')) { + if (passed > 0 && !combinedOutput.includes('failed')) { steps.push(`Eval PASSED (non-zero exit): ${passed} passed`); state.verified = true; } else { @@ -379,7 +391,7 @@ async function executeTool( result = { success: state.verified, error: state.verified ? undefined : err.message, - stdout: stdout.slice(0, 1000), + stdout: combinedOutput.slice(0, 1000), steps, }; }