From 34dc9fbd0667a4639b187eb0133e3033f79ce576 Mon Sep 17 00:00:00 2001 From: jariy17 Date: Wed, 24 Jun 2026 20:02:58 +0000 Subject: [PATCH 1/2] fix(docs): remove non-existent Builtin.Completeness evaluator Builtin.Completeness is listed in the batch-evaluation docs and used in a CLI example, but the API rejects it ('does not exist'). The valid builtin list lives in run-eval.ts. Drop it from the evaluator table and switch the dataset example to Builtin.Correctness. --- docs/batch-evaluation.md | 1 - docs/commands.md | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/batch-evaluation.md b/docs/batch-evaluation.md index afd4744d4..679a79114 100644 --- a/docs/batch-evaluation.md +++ b/docs/batch-evaluation.md @@ -31,7 +31,6 @@ Built-in evaluators provided by AgentCore: | `Builtin.Faithfulness` | Grounding in tool results / provided context | | `Builtin.GoalSuccessRate` | Whether the agent achieved the user's goal | | `Builtin.ToolSelectionAccuracy` | Correct tool chosen for the task | -| `Builtin.Completeness` | Whether all parts of the request were handled | | `Builtin.TrajectoryExactOrderMatch` | Tool call sequence matches expected trajectory | Custom evaluators defined in your project (via `agentcore add evaluator`) can also be used. diff --git a/docs/commands.md b/docs/commands.md index 23ed07b91..1d70fe1e1 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -1001,7 +1001,7 @@ agentcore run batch-evaluation \ # Drive batch evaluation with a dataset agentcore run batch-evaluation \ -r MyAgent \ - -e Builtin.Completeness \ + -e Builtin.Correctness \ --dataset MyDataset --dataset-version DRAFT ``` From 2deb3a6a095dade9e272a6b939e905242e5217a4 Mon Sep 17 00:00:00 2001 From: jariy17 Date: Wed, 24 Jun 2026 20:04:09 +0000 Subject: [PATCH 2/2] fix(ab-test): preserve portable component placeholders on promote MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit config-bundle promote fetched the winning version's components from the service, which keys them by resolved (account/region-specific) runtime ARN, and wrote them straight into agentcore.json — replacing the committed {{runtime:}} placeholders with hardcoded ARNs and breaking cross- account/region portability of the config. Remap the service-returned ARN keys back to the bundle's existing portable placeholders (inverting the same resolver deploy uses) before adopting them. --- .../jobs/ab-test/__tests__/promote.test.ts | 41 +++++++++++++++++ src/cli/operations/jobs/ab-test/promote.ts | 44 +++++++++++++++++-- 2 files changed, 82 insertions(+), 3 deletions(-) diff --git a/src/cli/operations/jobs/ab-test/__tests__/promote.test.ts b/src/cli/operations/jobs/ab-test/__tests__/promote.test.ts index 83b88c193..864210398 100644 --- a/src/cli/operations/jobs/ab-test/__tests__/promote.test.ts +++ b/src/cli/operations/jobs/ab-test/__tests__/promote.test.ts @@ -286,6 +286,47 @@ describe('promoteABTestConfig (record-driven)', () => { expect(bundle.components['{{runtime:r}}'].configuration.systemPrompt).toBe('NEW'); }); + it('restores portable {{runtime:...}} keys when the service returns ARN-keyed components', async () => { + const RUNTIME_ARN = 'arn:aws:bedrock-agentcore:us-east-1:1:runtime/cbbugbash_cbagent-N5owhv3MRl'; + const project = makeConfigBundleProject(); + // Local bundle uses the portable placeholder for the runtime named "r". + project.configBundles[0]!.components = { '{{runtime:r}}': { configuration: { systemPrompt: 'OLD' } } }; + mockReadProjectSpec.mockResolvedValue(project); + mockReadDeployedState.mockResolvedValue({ + targets: { + default: { + resources: { + configBundles: { + promptBundle: { bundleId: 'promptBundle-abc123', bundleArn: BUNDLE_ARN, versionId: 'v1' }, + }, + runtimes: { r: { runtimeArn: RUNTIME_ARN } }, + }, + }, + }, + }); + // Service keys the winning version's components by the resolved (hardcoded) runtime ARN. + mockGetConfigurationBundleVersion.mockResolvedValue({ + components: { [RUNTIME_ARN]: { configuration: { systemPrompt: 'NEW' } } }, + }); + + const record = baseRecord({ + mode: 'config-bundle', + variants: [ + { name: 'C', weight: 50, bundleArn: BUNDLE_ARN, bundleVersion: 'v1' }, + { name: 'T1', weight: 50, bundleArn: BUNDLE_ARN, bundleVersion: 'v2' }, + ], + }); + + const result = await promoteABTestConfig(record); + + expect(result.promoted).toBe(true); + const written = mockWriteProjectSpec.mock.calls[0]![0]; + const bundle = written.configBundles.find((b: { name: string }) => b.name === 'promptBundle'); + // Placeholder preserved, ARN NOT written into the committed config. + expect(bundle.components['{{runtime:r}}'].configuration.systemPrompt).toBe('NEW'); + expect(bundle.components[RUNTIME_ARN]).toBeUndefined(); + }); + it('returns promoted=false (error) when control and treatment are DIFFERENT bundles', async () => { mockReadProjectSpec.mockResolvedValue(makeConfigBundleProject()); mockReadDeployedState.mockResolvedValue(makeBundleDeployedState()); diff --git a/src/cli/operations/jobs/ab-test/promote.ts b/src/cli/operations/jobs/ab-test/promote.ts index a70aea72b..d705f53fc 100644 --- a/src/cli/operations/jobs/ab-test/promote.ts +++ b/src/cli/operations/jobs/ab-test/promote.ts @@ -1,5 +1,7 @@ import { ConfigIO } from '../../../../lib'; +import type { DeployedState } from '../../../../schema'; import { getConfigurationBundleVersion } from '../../../aws/agentcore-config-bundles'; +import { resolveComponentKeyForJsonPath } from '../recommendation/build-config'; import { regionFromArn } from '../shared/region'; import type { ABTestJobRecord, ABTestVariantSummary } from '../shared/types'; @@ -9,6 +11,35 @@ function bundleIdFromArn(arn: string): string | undefined { return id && id.length > 0 ? id : undefined; } +/** + * Restore portable component keys when adopting service-returned components. + * + * The service keys a bundle version's components by resolved runtime/gateway ARN (account- and + * region-specific). Writing those straight into agentcore.json would replace the committed, + * portable `{{runtime:}}` / `{{gateway:}}` placeholders with hardcoded ARNs, breaking + * cross-account/region reuse of the config. We rebuild the placeholder→ARN map from the LOCAL + * bundle's existing keys (via the same resolver deploy uses) and invert it, so each incoming ARN + * key is rewritten back to the placeholder the project already uses. ARNs with no matching local + * placeholder are passed through unchanged. + */ +function restorePlaceholderKeys( + serviceComponents: Record, + localComponents: Record | undefined, + deployedState: DeployedState +): Record { + const arnToPlaceholder = new Map(); + for (const key of Object.keys(localComponents ?? {})) { + if (key.startsWith('arn:')) continue; + const arn = resolveComponentKeyForJsonPath(key, deployedState); + if (arn !== key) arnToPlaceholder.set(arn, key); + } + const remapped: Record = {}; + for (const [key, value] of Object.entries(serviceComponents)) { + remapped[arnToPlaceholder.get(key) ?? key] = value; + } + return remapped; +} + export interface PromoteABTestResult { promoted: boolean; mode?: string; @@ -156,13 +187,14 @@ export async function promoteABTestConfig(record: ABTestJobRecord, dryRun = fals } let controlName: string | undefined; + let deployedState: DeployedState | undefined; try { - const deployedState = await configIO.readDeployedState(); + deployedState = await configIO.readDeployedState(); controlName = bundleNameFromArn(deployedState, control.bundleArn); } catch { // deployed state unavailable } - if (!controlName) { + if (!controlName || !deployedState) { return { promoted: false, mode, @@ -196,7 +228,13 @@ export async function promoteABTestConfig(record: ABTestJobRecord, dryRun = fals bundleId, versionId: treatment.bundleVersion, }); - controlBundle.components = winning.components as typeof controlBundle.components; + // Service keys components by resolved ARN; restore the bundle's portable {{runtime:...}} + // placeholders so the committed config stays cross-account/region portable. + controlBundle.components = restorePlaceholderKeys( + winning.components as Record, + controlBundle.components as Record, + deployedState + ) as typeof controlBundle.components; await configIO.writeProjectSpec(project); } return {