Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/batch-evaluation.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ Built-in evaluators provided by AgentCore:
| `Builtin.Faithfulness` | Grounding in tool results / provided context |
| `Builtin.GoalSuccessRate` | Whether the agent achieved the user's goal |
| `Builtin.ToolSelectionAccuracy` | Correct tool chosen for the task |
| `Builtin.Completeness` | Whether all parts of the request were handled |
| `Builtin.TrajectoryExactOrderMatch` | Tool call sequence matches expected trajectory |

Custom evaluators defined in your project (via `agentcore add evaluator`) can also be used.
Expand Down
2 changes: 1 addition & 1 deletion docs/commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,7 @@ agentcore run batch-evaluation \
# Drive batch evaluation with a dataset
agentcore run batch-evaluation \
-r MyAgent \
-e Builtin.Completeness \
-e Builtin.Correctness \
--dataset MyDataset --dataset-version DRAFT
```

Expand Down
41 changes: 41 additions & 0 deletions src/cli/operations/jobs/ab-test/__tests__/promote.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,47 @@ describe('promoteABTestConfig (record-driven)', () => {
expect(bundle.components['{{runtime:r}}'].configuration.systemPrompt).toBe('NEW');
});

it('restores portable {{runtime:...}} keys when the service returns ARN-keyed components', async () => {
const RUNTIME_ARN = 'arn:aws:bedrock-agentcore:us-east-1:1:runtime/cbbugbash_cbagent-N5owhv3MRl';
const project = makeConfigBundleProject();
// Local bundle uses the portable placeholder for the runtime named "r".
project.configBundles[0]!.components = { '{{runtime:r}}': { configuration: { systemPrompt: 'OLD' } } };
mockReadProjectSpec.mockResolvedValue(project);
mockReadDeployedState.mockResolvedValue({
targets: {
default: {
resources: {
configBundles: {
promptBundle: { bundleId: 'promptBundle-abc123', bundleArn: BUNDLE_ARN, versionId: 'v1' },
},
runtimes: { r: { runtimeArn: RUNTIME_ARN } },
},
},
},
});
// Service keys the winning version's components by the resolved (hardcoded) runtime ARN.
mockGetConfigurationBundleVersion.mockResolvedValue({
components: { [RUNTIME_ARN]: { configuration: { systemPrompt: 'NEW' } } },
});

const record = baseRecord({
mode: 'config-bundle',
variants: [
{ name: 'C', weight: 50, bundleArn: BUNDLE_ARN, bundleVersion: 'v1' },
{ name: 'T1', weight: 50, bundleArn: BUNDLE_ARN, bundleVersion: 'v2' },
],
});

const result = await promoteABTestConfig(record);

expect(result.promoted).toBe(true);
const written = mockWriteProjectSpec.mock.calls[0]![0];
const bundle = written.configBundles.find((b: { name: string }) => b.name === 'promptBundle');
// Placeholder preserved, ARN NOT written into the committed config.
expect(bundle.components['{{runtime:r}}'].configuration.systemPrompt).toBe('NEW');
expect(bundle.components[RUNTIME_ARN]).toBeUndefined();
});

it('returns promoted=false (error) when control and treatment are DIFFERENT bundles', async () => {
mockReadProjectSpec.mockResolvedValue(makeConfigBundleProject());
mockReadDeployedState.mockResolvedValue(makeBundleDeployedState());
Expand Down
44 changes: 41 additions & 3 deletions src/cli/operations/jobs/ab-test/promote.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { ConfigIO } from '../../../../lib';
import type { DeployedState } from '../../../../schema';
import { getConfigurationBundleVersion } from '../../../aws/agentcore-config-bundles';
import { resolveComponentKeyForJsonPath } from '../recommendation/build-config';
import { regionFromArn } from '../shared/region';
import type { ABTestJobRecord, ABTestVariantSummary } from '../shared/types';

Expand All @@ -9,6 +11,35 @@ function bundleIdFromArn(arn: string): string | undefined {
return id && id.length > 0 ? id : undefined;
}

/**
* Restore portable component keys when adopting service-returned components.
*
* The service keys a bundle version's components by resolved runtime/gateway ARN (account- and
* region-specific). Writing those straight into agentcore.json would replace the committed,
* portable `{{runtime:<name>}}` / `{{gateway:<name>}}` placeholders with hardcoded ARNs, breaking
* cross-account/region reuse of the config. We rebuild the placeholder→ARN map from the LOCAL
* bundle's existing keys (via the same resolver deploy uses) and invert it, so each incoming ARN
* key is rewritten back to the placeholder the project already uses. ARNs with no matching local
* placeholder are passed through unchanged.
*/
function restorePlaceholderKeys<T>(
serviceComponents: Record<string, T>,
localComponents: Record<string, T> | undefined,
deployedState: DeployedState
): Record<string, T> {
const arnToPlaceholder = new Map<string, string>();
for (const key of Object.keys(localComponents ?? {})) {
if (key.startsWith('arn:')) continue;
const arn = resolveComponentKeyForJsonPath(key, deployedState);
if (arn !== key) arnToPlaceholder.set(arn, key);
}
const remapped: Record<string, T> = {};
for (const [key, value] of Object.entries(serviceComponents)) {
remapped[arnToPlaceholder.get(key) ?? key] = value;
}
return remapped;
}

export interface PromoteABTestResult {
promoted: boolean;
mode?: string;
Expand Down Expand Up @@ -156,13 +187,14 @@ export async function promoteABTestConfig(record: ABTestJobRecord, dryRun = fals
}

let controlName: string | undefined;
let deployedState: DeployedState | undefined;
try {
const deployedState = await configIO.readDeployedState();
deployedState = await configIO.readDeployedState();
controlName = bundleNameFromArn(deployedState, control.bundleArn);
} catch {
// deployed state unavailable
}
if (!controlName) {
if (!controlName || !deployedState) {
return {
promoted: false,
mode,
Expand Down Expand Up @@ -196,7 +228,13 @@ export async function promoteABTestConfig(record: ABTestJobRecord, dryRun = fals
bundleId,
versionId: treatment.bundleVersion,
});
controlBundle.components = winning.components as typeof controlBundle.components;
// Service keys components by resolved ARN; restore the bundle's portable {{runtime:...}}
// placeholders so the committed config stays cross-account/region portable.
controlBundle.components = restorePlaceholderKeys(
winning.components as Record<string, unknown>,
controlBundle.components as Record<string, unknown>,
deployedState
) as typeof controlBundle.components;
await configIO.writeProjectSpec(project);
}
return {
Expand Down
Loading