diff --git a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
index 0e2f5950..52c7d853 100644
--- a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
+++ b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
@@ -372,6 +372,8 @@ test('AgentCoreStack synthesizes with empty spec', () => {
       agents: [],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     },
   });
   const template = Template.fromStack(stack);
diff --git a/src/assets/cdk/test/cdk.test.ts b/src/assets/cdk/test/cdk.test.ts
index 5ff491d1..40021c58 100644
--- a/src/assets/cdk/test/cdk.test.ts
+++ b/src/assets/cdk/test/cdk.test.ts
@@ -11,6 +11,8 @@ test('AgentCoreStack synthesizes with empty spec', () => {
       agents: [],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     },
   });
   const template = Template.fromStack(stack);
diff --git a/src/cli/aws/__tests__/agentcore-control.test.ts b/src/cli/aws/__tests__/agentcore-control.test.ts
index 9ec6bae3..3683eb08 100644
--- a/src/cli/aws/__tests__/agentcore-control.test.ts
+++ b/src/cli/aws/__tests__/agentcore-control.test.ts
@@ -1,4 +1,10 @@
-import { getAgentRuntimeStatus } from '../agentcore-control.js';
+import {
+  getAgentRuntimeStatus,
+  getEvaluator,
+  getOnlineEvaluationConfig,
+  listEvaluators,
+  updateOnlineEvalExecutionStatus,
+} from '../agentcore-control.js';
 import { beforeEach, describe, expect, it, vi } from 'vitest';
 
 const { mockSend } = vi.hoisted(() => ({
@@ -12,6 +18,18 @@ vi.mock('@aws-sdk/client-bedrock-agentcore-control', () => ({
   GetAgentRuntimeCommand: class {
     constructor(public input: unknown) {}
   },
+  GetEvaluatorCommand: class {
+    constructor(public input: unknown) {}
+  },
+  GetOnlineEvaluationConfigCommand: class {
+    constructor(public input: unknown) {}
+  },
+  ListEvaluatorsCommand: class {
+    constructor(public input: unknown) {}
+  },
+  UpdateOnlineEvaluationConfigCommand: class {
+    constructor(public input: unknown) {}
+  },
 }));
 
 vi.mock('../account', () => ({
@@ -56,3 +74,292 @@ describe('getAgentRuntimeStatus', () => {
     );
   });
 });
+
+describe('getEvaluator', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('returns evaluator details', async () => {
+    mockSend.mockResolvedValue({
+      evaluatorId: 'eval-123',
+      evaluatorArn: 'arn:aws:bedrock-agentcore:us-east-1:123456:evaluator/eval-123',
+      evaluatorName: 'my-evaluator',
+      level: 'SESSION',
+      status: 'ACTIVE',
+      description: 'A test evaluator',
+    });
+
+    const result = await getEvaluator({ region: 'us-east-1', evaluatorId: 'eval-123' });
+    expect(result.evaluatorId).toBe('eval-123');
+    expect(result.evaluatorName).toBe('my-evaluator');
+    expect(result.level).toBe('SESSION');
+    expect(result.status).toBe('ACTIVE');
+    expect(result.description).toBe('A test evaluator');
+  });
+
+  it('throws when no evaluatorId in response', async () => {
+    mockSend.mockResolvedValue({ evaluatorId: undefined });
+
+    await expect(getEvaluator({ region: 'us-east-1', evaluatorId: 'eval-missing' })).rejects.toThrow(
+      'No evaluator found for ID eval-missing'
+    );
+  });
+
+  it('passes correct evaluatorId in command', async () => {
+    mockSend.mockResolvedValue({
+      evaluatorId: 'eval-abc',
+      evaluatorName: 'test',
+      level: 'TRACE',
+      status: 'ACTIVE',
+    });
+
+    await getEvaluator({ region: 'us-west-2', evaluatorId: 'eval-abc' });
+
+    const command = mockSend.mock.calls[0]![0];
+    expect(command.input.evaluatorId).toBe('eval-abc');
+  });
+
+  it('defaults level to SESSION when undefined', async () => {
+    mockSend.mockResolvedValue({
+      evaluatorId: 'eval-no-level',
+      level: undefined,
+      status: 'ACTIVE',
+    });
+
+    const result = await getEvaluator({ region: 'us-east-1', evaluatorId: 'eval-no-level' });
+    expect(result.level).toBe('SESSION');
+  });
+
+  it('propagates SDK errors', async () => {
+    mockSend.mockRejectedValue(new Error('AccessDenied'));
+
+    await expect(getEvaluator({ region: 'us-east-1', evaluatorId: 'eval-err' })).rejects.toThrow('AccessDenied');
+  });
+});
+
+describe('updateOnlineEvalExecutionStatus', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('sends DISABLED to pause and returns result', async () => {
+    mockSend.mockResolvedValue({
+      onlineEvaluationConfigId: 'cfg-123',
+      executionStatus: 'DISABLED',
+      status: 'ACTIVE',
+    });
+
+    const result = await updateOnlineEvalExecutionStatus({
+      region: 'us-east-1',
+      onlineEvaluationConfigId: 'cfg-123',
+      executionStatus: 'DISABLED',
+    });
+
+    expect(result.configId).toBe('cfg-123');
+    expect(result.executionStatus).toBe('DISABLED');
+    expect(result.status).toBe('ACTIVE');
+  });
+
+  it('sends ENABLED to resume', async () => {
+    mockSend.mockResolvedValue({
+      onlineEvaluationConfigId: 'cfg-456',
+      executionStatus: 'ENABLED',
+      status: 'ACTIVE',
+    });
+
+    const result = await updateOnlineEvalExecutionStatus({
+      region: 'us-west-2',
+      onlineEvaluationConfigId: 'cfg-456',
+      executionStatus: 'ENABLED',
+    });
+
+    expect(result.configId).toBe('cfg-456');
+    expect(result.executionStatus).toBe('ENABLED');
+  });
+
+  it('passes correct params in command', async () => {
+    mockSend.mockResolvedValue({
+      onlineEvaluationConfigId: 'cfg-789',
+      executionStatus: 'DISABLED',
+      status: 'ACTIVE',
+    });
+
+    await updateOnlineEvalExecutionStatus({
+      region: 'us-east-1',
+      onlineEvaluationConfigId: 'cfg-789',
+      executionStatus: 'DISABLED',
+    });
+
+    const command = mockSend.mock.calls[0]![0];
+    expect(command.input.onlineEvaluationConfigId).toBe('cfg-789');
+    expect(command.input.executionStatus).toBe('DISABLED');
+  });
+
+  it('falls back to input values when response fields are undefined', async () => {
+    mockSend.mockResolvedValue({});
+
+    const result = await updateOnlineEvalExecutionStatus({
+      region: 'us-east-1',
+      onlineEvaluationConfigId: 'cfg-fallback',
+      executionStatus: 'ENABLED',
+    });
+
+    expect(result.configId).toBe('cfg-fallback');
+    expect(result.executionStatus).toBe('ENABLED');
+    expect(result.status).toBe('UNKNOWN');
+  });
+
+  it('propagates SDK errors', async () => {
+    mockSend.mockRejectedValue(new Error('Throttling'));
+
+    await expect(
+      updateOnlineEvalExecutionStatus({
+        region: 'us-east-1',
+        onlineEvaluationConfigId: 'cfg-err',
+        executionStatus: 'DISABLED',
+      })
+    ).rejects.toThrow('Throttling');
+  });
+});
+
+describe('getOnlineEvaluationConfig', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('returns config details with output log group', async () => {
+    mockSend.mockResolvedValue({
+      onlineEvaluationConfigId: 'oec-123',
+      onlineEvaluationConfigArn: 'arn:aws:bedrock-agentcore:us-east-1:123456:online-eval/oec-123',
+      onlineEvaluationConfigName: 'my-online-eval',
+      status: 'ACTIVE',
+      executionStatus: 'ENABLED',
+      description: 'Production eval',
+      outputConfig: {
+        cloudWatchConfig: { logGroupName: '/aws/bedrock-agentcore/evaluations/oec-123' },
+      },
+    });
+
+    const result = await getOnlineEvaluationConfig({ region: 'us-east-1', configId: 'oec-123' });
+    expect(result.configId).toBe('oec-123');
+    expect(result.configName).toBe('my-online-eval');
+    expect(result.status).toBe('ACTIVE');
+    expect(result.executionStatus).toBe('ENABLED');
+    expect(result.description).toBe('Production eval');
+    expect(result.outputLogGroupName).toBe('/aws/bedrock-agentcore/evaluations/oec-123');
+  });
+
+  it('throws when no configId in response', async () => {
+    mockSend.mockResolvedValue({ onlineEvaluationConfigId: undefined });
+
+    await expect(getOnlineEvaluationConfig({ region: 'us-east-1', configId: 'oec-missing' })).rejects.toThrow(
+      'No online evaluation config found for ID oec-missing'
+    );
+  });
+
+  it('returns failureReason when present', async () => {
+    mockSend.mockResolvedValue({
+      onlineEvaluationConfigId: 'oec-fail',
+      onlineEvaluationConfigName: 'broken-eval',
+      status: 'CREATE_FAILED',
+      executionStatus: 'DISABLED',
+      failureReason: 'IAM role not found',
+    });
+
+    const result = await getOnlineEvaluationConfig({ region: 'us-east-1', configId: 'oec-fail' });
+    expect(result.status).toBe('CREATE_FAILED');
+    expect(result.failureReason).toBe('IAM role not found');
+  });
+
+  it('handles missing outputConfig', async () => {
+    mockSend.mockResolvedValue({
+      onlineEvaluationConfigId: 'oec-no-output',
+      status: 'CREATING',
+      executionStatus: 'DISABLED',
+    });
+
+    const result = await getOnlineEvaluationConfig({ region: 'us-east-1', configId: 'oec-no-output' });
+    expect(result.outputLogGroupName).toBeUndefined();
+  });
+
+  it('passes correct configId in command', async () => {
+    mockSend.mockResolvedValue({
+      onlineEvaluationConfigId: 'oec-abc',
+      status: 'ACTIVE',
+      executionStatus: 'ENABLED',
+    });
+
+    await getOnlineEvaluationConfig({ region: 'us-west-2', configId: 'oec-abc' });
+
+    const command = mockSend.mock.calls[0]![0];
+    expect(command.input.onlineEvaluationConfigId).toBe('oec-abc');
+  });
+
+  it('propagates SDK errors', async () => {
+    mockSend.mockRejectedValue(new Error('ResourceNotFoundException'));
+
+    await expect(getOnlineEvaluationConfig({ region: 'us-east-1', configId: 'oec-err' })).rejects.toThrow(
+      'ResourceNotFoundException'
+    );
+  });
+});
+
+describe('listEvaluators', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('returns evaluator summaries', async () => {
+    mockSend.mockResolvedValue({
+      evaluators: [
+        {
+          evaluatorId: 'eval-1',
+          evaluatorArn: 'arn:aws:bedrock-agentcore:us-east-1:123456:evaluator/eval-1',
+          evaluatorName: 'Faithfulness',
+          evaluatorType: 'Builtin',
+          status: 'ACTIVE',
+        },
+        {
+          evaluatorId: 'eval-2',
+          evaluatorArn: 'arn:aws:bedrock-agentcore:us-east-1:123456:evaluator/eval-2',
+          evaluatorName: 'my-custom',
+          evaluatorType: 'Custom',
+          status: 'ACTIVE',
+          description: 'A custom evaluator',
+        },
+      ],
+    });
+
+    const result = await listEvaluators({ region: 'us-east-1' });
+    expect(result.evaluators).toHaveLength(2);
+    expect(result.evaluators[0]!.evaluatorName).toBe('Faithfulness');
+    expect(result.evaluators[0]!.evaluatorType).toBe('Builtin');
+    expect(result.evaluators[1]!.evaluatorName).toBe('my-custom');
+    expect(result.evaluators[1]!.description).toBe('A custom evaluator');
+  });
+
+  it('returns empty array when no evaluators', async () => {
+    mockSend.mockResolvedValue({ evaluators: undefined });
+
+    const result = await listEvaluators({ region: 'us-east-1' });
+    expect(result.evaluators).toEqual([]);
+  });
+
+  it('passes maxResults and nextToken', async () => {
+    mockSend.mockResolvedValue({ evaluators: [], nextToken: 'token-2' });
+
+    const result = await listEvaluators({ region: 'us-east-1', maxResults: 5, nextToken: 'token-1' });
+
+    const command = mockSend.mock.calls[0]![0];
+    expect(command.input.maxResults).toBe(5);
+    expect(command.input.nextToken).toBe('token-1');
+    expect(result.nextToken).toBe('token-2');
+  });
+
+  it('propagates SDK errors', async () => {
+    mockSend.mockRejectedValue(new Error('AccessDeniedException'));
+
+    await expect(listEvaluators({ region: 'us-east-1' })).rejects.toThrow('AccessDeniedException');
+  });
+});
diff --git a/src/cli/aws/__tests__/agentcore-evaluate.test.ts b/src/cli/aws/__tests__/agentcore-evaluate.test.ts
new file mode 100644
index 00000000..30eafffd
--- /dev/null
+++ b/src/cli/aws/__tests__/agentcore-evaluate.test.ts
@@ -0,0 +1,235 @@
+import { evaluate } from '../agentcore.js';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+const { mockSend } = vi.hoisted(() => ({
+  mockSend: vi.fn(),
+}));
+
+vi.mock('@aws-sdk/client-bedrock-agentcore', () => ({
+  BedrockAgentCoreClient: class {
+    send = mockSend;
+  },
+  EvaluateCommand: class {
+    constructor(public input: unknown) {}
+  },
+}));
+
+vi.mock('../account', () => ({
+  getCredentialProvider: vi.fn().mockReturnValue({}),
+}));
+
+describe('evaluate', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('sends evaluatorId and sessionSpans in the command', async () => {
+    mockSend.mockResolvedValue({
+      evaluationResults: [{ value: 4.0 }],
+    });
+
+    await evaluate({
+      region: 'us-east-1',
+      evaluatorId: 'eval-123',
+      sessionSpans: [{ traceId: 't1', spanId: 's1' }],
+    });
+
+    const command = mockSend.mock.calls[0]![0];
+    expect(command.input.evaluatorId).toBe('eval-123');
+    expect(command.input.evaluationInput.sessionSpans).toEqual([{ traceId: 't1', spanId: 's1' }]);
+  });
+
+  it('includes spanIds target when targetSpanIds is provided', async () => {
+    mockSend.mockResolvedValue({
+      evaluationResults: [{ value: 3.0 }],
+    });
+
+    await evaluate({
+      region: 'us-east-1',
+      evaluatorId: 'eval-123',
+      sessionSpans: [],
+      targetSpanIds: ['span-1', 'span-2'],
+    });
+
+    const command = mockSend.mock.calls[0]![0];
+    expect(command.input.evaluationTarget).toEqual({ spanIds: ['span-1', 'span-2'] });
+  });
+
+  it('includes traceIds target when targetTraceIds is provided', async () => {
+    mockSend.mockResolvedValue({
+      evaluationResults: [{ value: 3.0 }],
+    });
+
+    await evaluate({
+      region: 'us-east-1',
+      evaluatorId: 'eval-123',
+      sessionSpans: [],
+      targetTraceIds: ['trace-1'],
+    });
+
+    const command = mockSend.mock.calls[0]![0];
+    expect(command.input.evaluationTarget).toEqual({ traceIds: ['trace-1'] });
+  });
+
+  it('prefers spanIds over traceIds when both are provided', async () => {
+    mockSend.mockResolvedValue({
+      evaluationResults: [{ value: 3.0 }],
+    });
+
+    await evaluate({
+      region: 'us-east-1',
+      evaluatorId: 'eval-123',
+      sessionSpans: [],
+      targetSpanIds: ['span-1'],
+      targetTraceIds: ['trace-1'],
+    });
+
+    const command = mockSend.mock.calls[0]![0];
+    expect(command.input.evaluationTarget).toEqual({ spanIds: ['span-1'] });
+  });
+
+  it('omits evaluationTarget when neither targetSpanIds nor targetTraceIds provided', async () => {
+    mockSend.mockResolvedValue({
+      evaluationResults: [{ value: 3.0 }],
+    });
+
+    await evaluate({
+      region: 'us-east-1',
+      evaluatorId: 'eval-123',
+      sessionSpans: [],
+    });
+
+    const command = mockSend.mock.calls[0]![0];
+    expect(command.input.evaluationTarget).toBeUndefined();
+  });
+
+  it('throws when evaluationResults is undefined', async () => {
+    mockSend.mockResolvedValue({ evaluationResults: undefined });
+
+    await expect(evaluate({ region: 'us-east-1', evaluatorId: 'eval-123', sessionSpans: [] })).rejects.toThrow(
+      'No evaluation results returned'
+    );
+  });
+
+  it('maps response with spanContext correctly', async () => {
+    mockSend.mockResolvedValue({
+      evaluationResults: [
+        {
+          evaluatorArn: 'arn:aws:evaluator/eval-123',
+          evaluatorId: 'eval-123',
+          evaluatorName: 'MyEval',
+          explanation: 'Good quality',
+          value: 4.5,
+          label: 'Excellent',
+          errorMessage: undefined,
+          errorCode: undefined,
+          context: {
+            spanContext: {
+              sessionId: 'sess-1',
+              traceId: 'trace-1',
+              spanId: 'span-1',
+            },
+          },
+          tokenUsage: {
+            inputTokens: 100,
+            outputTokens: 50,
+            totalTokens: 150,
+          },
+        },
+      ],
+    });
+
+    const result = await evaluate({
+      region: 'us-east-1',
+      evaluatorId: 'eval-123',
+      sessionSpans: [],
+    });
+
+    expect(result.evaluationResults).toHaveLength(1);
+    const r = result.evaluationResults[0]!;
+    expect(r.evaluatorArn).toBe('arn:aws:evaluator/eval-123');
+    expect(r.value).toBe(4.5);
+    expect(r.explanation).toBe('Good quality');
+    expect(r.context).toEqual({ sessionId: 'sess-1', traceId: 'trace-1', spanId: 'span-1' });
+    expect(r.tokenUsage).toEqual({ inputTokens: 100, outputTokens: 50, totalTokens: 150 });
+  });
+
+  it('handles response without spanContext', async () => {
+    mockSend.mockResolvedValue({
+      evaluationResults: [
+        {
+          value: 3.0,
+          context: undefined,
+          tokenUsage: undefined,
+        },
+      ],
+    });
+
+    const result = await evaluate({
+      region: 'us-east-1',
+      evaluatorId: 'eval-123',
+      sessionSpans: [],
+    });
+
+    const r = result.evaluationResults[0]!;
+    expect(r.context).toBeUndefined();
+    expect(r.tokenUsage).toBeUndefined();
+  });
+
+  it('defaults token usage values to 0 when partially undefined', async () => {
+    mockSend.mockResolvedValue({
+      evaluationResults: [
+        {
+          value: 3.0,
+          tokenUsage: {
+            inputTokens: undefined,
+            outputTokens: 25,
+            totalTokens: undefined,
+          },
+        },
+      ],
+    });
+
+    const result = await evaluate({
+      region: 'us-east-1',
+      evaluatorId: 'eval-123',
+      sessionSpans: [],
+    });
+
+    expect(result.evaluationResults[0]!.tokenUsage).toEqual({
+      inputTokens: 0,
+      outputTokens: 25,
+      totalTokens: 0,
+    });
+  });
+
+  it('maps error results correctly', async () => {
+    mockSend.mockResolvedValue({
+      evaluationResults: [
+        {
+          value: 0,
+          errorMessage: 'Prompt template missing required field',
+          errorCode: 'TEMPLATE_ERROR',
+        },
+      ],
+    });
+
+    const result = await evaluate({
+      region: 'us-east-1',
+      evaluatorId: 'eval-123',
+      sessionSpans: [],
+    });
+
+    const r = result.evaluationResults[0]!;
+    expect(r.errorMessage).toBe('Prompt template missing required field');
+    expect(r.errorCode).toBe('TEMPLATE_ERROR');
+  });
+
+  it('propagates SDK errors', async () => {
+    mockSend.mockRejectedValue(new Error('AccessDeniedException'));
+
+    await expect(evaluate({ region: 'us-east-1', evaluatorId: 'eval-123', sessionSpans: [] })).rejects.toThrow(
+      'AccessDeniedException'
+    );
+  });
+});
diff --git a/src/cli/aws/agentcore-control.ts b/src/cli/aws/agentcore-control.ts
index 84ba4766..40db8e48 100644
--- a/src/cli/aws/agentcore-control.ts
+++ b/src/cli/aws/agentcore-control.ts
@@ -1,5 +1,13 @@
 import { getCredentialProvider } from './account';
-import { BedrockAgentCoreControlClient, GetAgentRuntimeCommand } from '@aws-sdk/client-bedrock-agentcore-control';
+import {
+  BedrockAgentCoreControlClient,
+  DeleteOnlineEvaluationConfigCommand,
+  GetAgentRuntimeCommand,
+  GetEvaluatorCommand,
+  GetOnlineEvaluationConfigCommand,
+  ListEvaluatorsCommand,
+  UpdateOnlineEvaluationConfigCommand,
+} from '@aws-sdk/client-bedrock-agentcore-control';
 
 export interface GetAgentRuntimeStatusOptions {
   region: string;
@@ -35,3 +43,237 @@ export async function getAgentRuntimeStatus(options: GetAgentRuntimeStatusOption
     status: response.status,
   };
 }
+
+// ============================================================================
+// Evaluator
+// ============================================================================
+
+export interface GetEvaluatorOptions {
+  region: string;
+  evaluatorId: string;
+}
+
+export interface GetEvaluatorResult {
+  evaluatorId: string;
+  evaluatorArn: string;
+  evaluatorName: string;
+  level: string;
+  status: string;
+  description?: string;
+}
+
+export async function getEvaluator(options: GetEvaluatorOptions): Promise<GetEvaluatorResult> {
+  const client = new BedrockAgentCoreControlClient({
+    region: options.region,
+    credentials: getCredentialProvider(),
+  });
+
+  const command = new GetEvaluatorCommand({
+    evaluatorId: options.evaluatorId,
+  });
+
+  const response = await client.send(command);
+
+  if (!response.evaluatorId) {
+    throw new Error(`No evaluator found for ID ${options.evaluatorId}`);
+  }
+
+  return {
+    evaluatorId: response.evaluatorId,
+    evaluatorArn: response.evaluatorArn ?? '',
+    evaluatorName: response.evaluatorName ?? '',
+    level: response.level ?? 'SESSION',
+    status: response.status ?? 'UNKNOWN',
+    description: response.description,
+  };
+}
+
+export interface ListEvaluatorsOptions {
+  region: string;
+  maxResults?: number;
+  nextToken?: string;
+}
+
+export interface EvaluatorSummary {
+  evaluatorId: string;
+  evaluatorArn: string;
+  evaluatorName: string;
+  evaluatorType: string;
+  level?: string;
+  status: string;
+  description?: string;
+}
+
+export interface ListEvaluatorsResult {
+  evaluators: EvaluatorSummary[];
+  nextToken?: string;
+}
+
+export async function listEvaluators(options: ListEvaluatorsOptions): Promise<ListEvaluatorsResult> {
+  const client = new BedrockAgentCoreControlClient({
+    region: options.region,
+    credentials: getCredentialProvider(),
+  });
+
+  const command = new ListEvaluatorsCommand({
+    maxResults: options.maxResults,
+    nextToken: options.nextToken,
+  });
+
+  const response = await client.send(command);
+
+  return {
+    evaluators: (response.evaluators ?? []).map(e => ({
+      evaluatorId: e.evaluatorId ?? '',
+      evaluatorArn: e.evaluatorArn ?? '',
+      evaluatorName: e.evaluatorName ?? '',
+      evaluatorType: e.evaluatorType ?? 'Custom',
+      level: e.level,
+      status: e.status ?? 'UNKNOWN',
+      description: e.description,
+    })),
+    nextToken: response.nextToken,
+  };
+}
+
+// ============================================================================
+// Online Eval Config
+// ============================================================================
+
+export type OnlineEvalExecutionStatus = 'ENABLED' | 'DISABLED';
+
+export interface UpdateOnlineEvalStatusOptions {
+  region: string;
+  onlineEvaluationConfigId: string;
+  executionStatus: OnlineEvalExecutionStatus;
+}
+
+export interface UpdateOnlineEvalOptions {
+  region: string;
+  onlineEvaluationConfigId: string;
+  executionStatus?: OnlineEvalExecutionStatus;
+  description?: string;
+}
+
+export interface UpdateOnlineEvalStatusResult {
+  configId: string;
+  executionStatus: string;
+  status: string;
+}
+
+/**
+ * Update the execution status of an online evaluation config (pause/resume).
+ */
+export async function updateOnlineEvalExecutionStatus(
+  options: UpdateOnlineEvalStatusOptions
+): Promise<UpdateOnlineEvalStatusResult> {
+  return updateOnlineEvalConfig(options);
+}
+
+/**
+ * Update an online evaluation config with any supported fields.
+ */
+export async function updateOnlineEvalConfig(options: UpdateOnlineEvalOptions): Promise<UpdateOnlineEvalStatusResult> {
+  const client = new BedrockAgentCoreControlClient({
+    region: options.region,
+    credentials: getCredentialProvider(),
+  });
+
+  const command = new UpdateOnlineEvaluationConfigCommand({
+    onlineEvaluationConfigId: options.onlineEvaluationConfigId,
+    ...(options.executionStatus && { executionStatus: options.executionStatus }),
+    ...(options.description !== undefined && { description: options.description }),
+  });
+
+  const response = await client.send(command);
+
+  return {
+    configId: response.onlineEvaluationConfigId ?? options.onlineEvaluationConfigId,
+    executionStatus: response.executionStatus ?? options.executionStatus ?? 'UNKNOWN',
+    status: response.status ?? 'UNKNOWN',
+  };
+}
+
+export interface GetOnlineEvalConfigOptions {
+  region: string;
+  configId: string;
+}
+
+export interface GetOnlineEvalConfigResult {
+  configId: string;
+  configArn: string;
+  configName: string;
+  status: string;
+  executionStatus: string;
+  description?: string;
+  failureReason?: string;
+  outputLogGroupName?: string;
+}
+
+export async function getOnlineEvaluationConfig(
+  options: GetOnlineEvalConfigOptions
+): Promise<GetOnlineEvalConfigResult> {
+  const client = new BedrockAgentCoreControlClient({
+    region: options.region,
+    credentials: getCredentialProvider(),
+  });
+
+  const command = new GetOnlineEvaluationConfigCommand({
+    onlineEvaluationConfigId: options.configId,
+  });
+
+  const response = await client.send(command);
+
+  if (!response.onlineEvaluationConfigId) {
+    throw new Error(`No online evaluation config found for ID ${options.configId}`);
+  }
+
+  const logGroupName = response.outputConfig?.cloudWatchConfig?.logGroupName;
+
+  return {
+    configId: response.onlineEvaluationConfigId,
+    configArn: response.onlineEvaluationConfigArn ?? '',
+    configName: response.onlineEvaluationConfigName ?? '',
+    status: response.status ?? 'UNKNOWN',
+    executionStatus: response.executionStatus ?? 'UNKNOWN',
+    description: response.description,
+    failureReason: response.failureReason,
+    outputLogGroupName: logGroupName,
+  };
+}
+
+// ============================================================================
+// Delete Online Eval Config
+// ============================================================================
+
+export interface DeleteOnlineEvalConfigOptions {
+  region: string;
+  onlineEvaluationConfigId: string;
+}
+
+export interface DeleteOnlineEvalConfigResult {
+  configId: string;
+  configArn: string;
+  status: string;
+}
+
+export async function deleteOnlineEvalConfig(
+  options: DeleteOnlineEvalConfigOptions
+): Promise<DeleteOnlineEvalConfigResult> {
+  const client = new BedrockAgentCoreControlClient({
+    region: options.region,
+    credentials: getCredentialProvider(),
+  });
+
+  const command = new DeleteOnlineEvaluationConfigCommand({
+    onlineEvaluationConfigId: options.onlineEvaluationConfigId,
+  });
+
+  const response = await client.send(command);
+
+  return {
+    configId: response.onlineEvaluationConfigId ?? options.onlineEvaluationConfigId,
+    configArn: response.onlineEvaluationConfigArn ?? '',
+    status: response.status ?? 'DELETING',
+  };
+}
diff --git a/src/cli/aws/agentcore.ts b/src/cli/aws/agentcore.ts
index 8baf9f72..e84bd6a1 100644
--- a/src/cli/aws/agentcore.ts
+++ b/src/cli/aws/agentcore.ts
@@ -1,9 +1,11 @@
 import { getCredentialProvider } from './account';
 import {
   BedrockAgentCoreClient,
+  EvaluateCommand,
   InvokeAgentRuntimeCommand,
   StopRuntimeSessionCommand,
 } from '@aws-sdk/client-bedrock-agentcore';
+import type { DocumentType } from '@smithy/types';
 
 /** Logger interface for SSE events */
 export interface SSELogger {
@@ -234,6 +236,108 @@ export async function invokeAgentRuntime(options: InvokeAgentRuntimeOptions): Pr
   };
 }
 
+// ============================================================================
+// Evaluate
+// ============================================================================
+
+export interface EvaluateOptions {
+  region: string;
+  evaluatorId: string;
+  sessionSpans: DocumentType[];
+  targetSpanIds?: string[];
+  targetTraceIds?: string[];
+}
+
+export interface EvaluationResultContext {
+  sessionId: string | undefined;
+  traceId: string | undefined;
+  spanId: string | undefined;
+}
+
+export interface EvaluationResultTokenUsage {
+  inputTokens: number;
+  outputTokens: number;
+  totalTokens: number;
+}
+
+export interface EvaluationResult {
+  evaluatorArn: string | undefined;
+  evaluatorId: string | undefined;
+  evaluatorName: string | undefined;
+  explanation: string | undefined;
+  value: number | undefined;
+  label: string | undefined;
+  errorMessage: string | undefined;
+  errorCode: string | undefined;
+  context: EvaluationResultContext | undefined;
+  tokenUsage: EvaluationResultTokenUsage | undefined;
+}
+
+export interface EvaluateResult {
+  evaluationResults: EvaluationResult[];
+}
+
+/**
+ * Run on-demand evaluation of agent traces using a specified evaluator.
+ */
+export async function evaluate(options: EvaluateOptions): Promise<EvaluateResult> {
+  const client = new BedrockAgentCoreClient({
+    region: options.region,
+    credentials: getCredentialProvider(),
+  });
+
+  const evaluationTarget = options.targetSpanIds
+    ? { spanIds: options.targetSpanIds }
+    : options.targetTraceIds
+      ? { traceIds: options.targetTraceIds }
+      : undefined;
+
+  const command = new EvaluateCommand({
+    evaluatorId: options.evaluatorId,
+    evaluationInput: {
+      sessionSpans: options.sessionSpans,
+    },
+    ...(evaluationTarget ? { evaluationTarget } : {}),
+  });
+
+  const response = await client.send(command);
+
+  if (!response.evaluationResults) {
+    throw new Error('No evaluation results returned');
+  }
+
+  return {
+    evaluationResults: response.evaluationResults.map(r => {
+      const spanContext = r.context && 'spanContext' in r.context ? r.context.spanContext : undefined;
+
+      return {
+        evaluatorArn: r.evaluatorArn,
+        evaluatorId: r.evaluatorId,
+        evaluatorName: r.evaluatorName,
+        explanation: r.explanation,
+        value: r.value,
+        label: r.label,
+        errorMessage: r.errorMessage,
+        errorCode: r.errorCode,
+        context: spanContext
+          ? {
+              sessionId: spanContext.sessionId,
+              traceId: spanContext.traceId,
+              spanId: spanContext.spanId,
+            }
+          : undefined,
+        tokenUsage: r.tokenUsage
+          ? {
+              inputTokens: r.tokenUsage.inputTokens ?? 0,
+              outputTokens: r.tokenUsage.outputTokens ?? 0,
+              totalTokens: r.tokenUsage.totalTokens ?? 0,
+            }
+          : undefined,
+      };
+    }),
+  };
+}
+
 /**
  * Stop a runtime session.
  */
diff --git a/src/cli/cli.ts b/src/cli/cli.ts
index 4d992ad7..dc3b22e3 100644
--- a/src/cli/cli.ts
+++ b/src/cli/cli.ts
@@ -2,11 +2,15 @@ import { registerAdd } from './commands/add';
 import { registerCreate } from './commands/create';
 import { registerDeploy } from './commands/deploy';
 import { registerDev } from './commands/dev';
+import { registerEval } from './commands/eval';
 import { registerHelp } from './commands/help';
 import { registerInvoke } from './commands/invoke';
 import { registerLogs } from './commands/logs';
 import { registerPackage } from './commands/package';
+import { registerPause, registerStop } from './commands/pause';
 import { registerRemove } from './commands/remove';
+import { registerResume } from './commands/resume';
+import { registerRun } from './commands/run';
 import { registerStatus } from './commands/status';
 import { registerTraces } from './commands/traces';
 import { registerUpdate } from './commands/update';
@@ -130,11 +134,16 @@ export function registerCommands(program: Command) {
   registerDev(program);
   registerDeploy(program);
   registerCreate(program);
+  registerEval(program);
   registerHelp(program);
   registerInvoke(program);
   registerLogs(program);
   registerPackage(program);
+  registerPause(program);
   const removeCmd = registerRemove(program);
+  registerResume(program);
+  registerRun(program);
+  registerStop(program);
   registerStatus(program);
   registerTraces(program);
   registerUpdate(program);
diff --git a/src/cli/cloudformation/__tests__/outputs-extended.test.ts b/src/cli/cloudformation/__tests__/outputs-extended.test.ts
index 85aab1c8..16112c58 100644
--- a/src/cli/cloudformation/__tests__/outputs-extended.test.ts
+++ b/src/cli/cloudformation/__tests__/outputs-extended.test.ts
@@ -1,4 +1,4 @@
-import { buildDeployedState, parseAgentOutputs } from '../outputs.js';
+import { buildDeployedState, parseAgentOutputs, parseEvaluatorOutputs, parseOnlineEvalOutputs } from '../outputs.js';
 import type { StackOutputs } from '../outputs.js';
 import { describe, expect, it } from 'vitest';
 
@@ -233,4 +233,170 @@ describe('buildDeployedState', () => {
     const state = buildDeployedState({ targetName: 'default', stackName: 'Stack', agents: {}, gateways: {} });
     expect(state.targets.default!.resources?.agents).toBeUndefined();
   });
+
+  it('includes evaluators in deployed state when provided', () => {
+    const evaluators = {
+      MyEval: {
+        evaluatorId: 'proj_MyEval-abc',
+        evaluatorArn: 'arn:aws:bedrock:us-east-1:123:evaluator/proj_MyEval-abc',
+      },
+    };
+
+    const state = buildDeployedState({
+      targetName: 'default',
+      stackName: 'Stack',
+      agents: {},
+      gateways: {},
+      evaluators,
+    });
+    expect(state.targets.default!.resources?.evaluators).toEqual(evaluators);
+  });
+
+  it('omits evaluators from deployed state when empty', () => {
+    const state = buildDeployedState({
+      targetName: 'default',
+      stackName: 'Stack',
+      agents: {},
+      gateways: {},
+      evaluators: {},
+    });
+    expect(state.targets.default!.resources?.evaluators).toBeUndefined();
+  });
+
+  it('includes onlineEvalConfigs in deployed state when provided', () => {
+    const onlineEvalConfigs = {
+      TestConfig: {
+        onlineEvaluationConfigId: 'proj_TestConfig-xyz',
+        onlineEvaluationConfigArn: 'arn:aws:bedrock:us-east-1:123:online-evaluation-config/proj_TestConfig-xyz',
+      },
+    };
+
+    const state = buildDeployedState({
+      targetName: 'default',
+      stackName: 'Stack',
+      agents: {},
+      gateways: {},
+      onlineEvalConfigs,
+    });
+    expect(state.targets.default!.resources?.onlineEvalConfigs).toEqual(onlineEvalConfigs);
+  });
+
+  it('omits onlineEvalConfigs from deployed state when empty', () => {
+    const state = buildDeployedState({
+      targetName: 'default',
+      stackName: 'Stack',
+      agents: {},
+      gateways: {},
+      onlineEvalConfigs: {},
+    });
+    expect(state.targets.default!.resources?.onlineEvalConfigs).toBeUndefined();
+  });
+});
+
+describe('parseEvaluatorOutputs', () => {
+  it('parses evaluator Id and Arn from stack outputs', () => {
+    const outputs: StackOutputs = {
+      ApplicationEvaluatorMyEvalIdOutputABC123: 'proj_MyEval-abc',
+      ApplicationEvaluatorMyEvalArnOutputDEF456: 'arn:aws:bedrock:us-east-1:123:evaluator/proj_MyEval-abc',
+    };
+
+    const result = parseEvaluatorOutputs(outputs, ['MyEval']);
+    expect(result.MyEval).toBeDefined();
+    expect(result.MyEval!.evaluatorId).toBe('proj_MyEval-abc');
+    expect(result.MyEval!.evaluatorArn).toBe('arn:aws:bedrock:us-east-1:123:evaluator/proj_MyEval-abc');
+  });
+
+  it('parses multiple evaluators', () => {
+    const outputs: StackOutputs = {
+      ApplicationEvaluatorEvalAIdOutputA: 'id-a',
+      ApplicationEvaluatorEvalAArnOutputB: 'arn:a',
+      ApplicationEvaluatorEvalBIdOutputC: 'id-b',
+      ApplicationEvaluatorEvalBArnOutputD: 'arn:b',
+    };
+
+    const result = parseEvaluatorOutputs(outputs, ['EvalA', 'EvalB']);
+    expect(Object.keys(result)).toHaveLength(2);
+    expect(result.EvalA!.evaluatorId).toBe('id-a');
+    expect(result.EvalB!.evaluatorId).toBe('id-b');
+  });
+
+  it('skips evaluator when Id output is missing', () => {
+    const outputs: StackOutputs = {
+      ApplicationEvaluatorMyEvalArnOutputDEF456: 'arn:eval',
+    };
+
+    const result = parseEvaluatorOutputs(outputs, ['MyEval']);
+    expect(result.MyEval).toBeUndefined();
+  });
+
+  it('skips evaluator when Arn output is missing', () => {
+    const outputs: StackOutputs = {
+      ApplicationEvaluatorMyEvalIdOutputABC123: 'eval-id',
+    };
+
+    const result = parseEvaluatorOutputs(outputs, ['MyEval']);
+    expect(result.MyEval).toBeUndefined();
+  });
+
+  it('returns empty record for no matching outputs', () => {
+    const result = parseEvaluatorOutputs({ UnrelatedOutput: 'value' }, ['MyEval']);
+    expect(result).toEqual({});
+  });
+
+  it('maps PascalCase output keys back to original underscore names', () => {
+    // Evaluator name "my_eval" becomes "MyEval" in PascalCase
+    const outputs: StackOutputs = {
+      ApplicationEvaluatorMyEvalIdOutputA: 'id-1',
+      ApplicationEvaluatorMyEvalArnOutputB: 'arn:1',
+    };
+
+    const result = parseEvaluatorOutputs(outputs, ['my_eval']);
+    expect(result.my_eval).toBeDefined();
+    expect(result.my_eval!.evaluatorId).toBe('id-1');
+  });
+});
+
+describe('parseOnlineEvalOutputs', () => {
+  it('parses online eval config Id and Arn from stack outputs', () => {
+    const outputs: StackOutputs = {
+      ApplicationOnlineEvalTestConfigIdOutputABC: 'proj_TestConfig-xyz',
+      ApplicationOnlineEvalTestConfigArnOutputDEF:
+        'arn:aws:bedrock:us-east-1:123:online-evaluation-config/proj_TestConfig-xyz',
+    };
+
+    const result = parseOnlineEvalOutputs(outputs, ['TestConfig']);
+    expect(result.TestConfig).toBeDefined();
+    expect(result.TestConfig!.onlineEvaluationConfigId).toBe('proj_TestConfig-xyz');
+    expect(result.TestConfig!.onlineEvaluationConfigArn).toBe(
+      'arn:aws:bedrock:us-east-1:123:online-evaluation-config/proj_TestConfig-xyz'
+    );
+  });
+
+  it('parses multiple online eval configs', () => {
+    const outputs: StackOutputs = {
+      ApplicationOnlineEvalConfigAIdOutputA: 'id-a',
+      ApplicationOnlineEvalConfigAArnOutputB: 'arn:a',
+      ApplicationOnlineEvalConfigBIdOutputC: 'id-b',
+      ApplicationOnlineEvalConfigBArnOutputD: 'arn:b',
+    };
+
+    const result = parseOnlineEvalOutputs(outputs, ['ConfigA', 'ConfigB']);
+    expect(Object.keys(result)).toHaveLength(2);
+    expect(result.ConfigA!.onlineEvaluationConfigId).toBe('id-a');
+    expect(result.ConfigB!.onlineEvaluationConfigId).toBe('id-b');
+  });
+
+  it('skips config when Id output is missing', () => {
+    const outputs: StackOutputs = {
+      ApplicationOnlineEvalTestConfigArnOutputDEF: 'arn:config',
+    };
+
+    const result = parseOnlineEvalOutputs(outputs, ['TestConfig']);
+    expect(result.TestConfig).toBeUndefined();
+  });
+
+  it('returns empty record for empty outputs', () => {
+    const result = parseOnlineEvalOutputs({}, ['TestConfig']);
+    expect(result).toEqual({});
+  });
 });
diff --git a/src/cli/cloudformation/outputs.ts b/src/cli/cloudformation/outputs.ts
index 86ec368f..073fc05a 100644
--- a/src/cli/cloudformation/outputs.ts
+++ b/src/cli/cloudformation/outputs.ts
@@ -1,4 +1,11 @@
-import type { AgentCoreDeployedState, DeployedState, MemoryDeployedState, TargetDeployedState } from '../../schema';
+import type {
+  AgentCoreDeployedState,
+  DeployedState,
+  EvaluatorDeployedState,
+  MemoryDeployedState,
+  OnlineEvalDeployedState,
+  TargetDeployedState,
+} from '../../schema';
 import { getCredentialProvider } from '../aws';
 import { toPascalId } from './logical-ids';
 import { getStackName } from './stack-discovery';
@@ -202,6 +209,68 @@ export function parseMemoryOutputs(outputs: StackOutputs, memoryNames: string[])
   return memories;
 }
 
+/**
+ * Parse stack outputs into deployed state for evaluators.
+ *
+ * Output key pattern: ApplicationEvaluator{PascalName}(Id|Arn)Output{Hash}
+ */
+export function parseEvaluatorOutputs(
+  outputs: StackOutputs,
+  evaluatorNames: string[]
+): Record<string, EvaluatorDeployedState> {
+  const evaluators: Record<string, EvaluatorDeployedState> = {};
+  const outputKeys = Object.keys(outputs);
+
+  for (const evalName of evaluatorNames) {
+    const pascal = toPascalId('Evaluator', evalName);
+    const idPrefix = `Application${pascal}IdOutput`;
+    const arnPrefix = `Application${pascal}ArnOutput`;
+
+    const idKey = outputKeys.find(k => k.startsWith(idPrefix));
+    const arnKey = outputKeys.find(k => k.startsWith(arnPrefix));
+
+    if (idKey && arnKey) {
+      evaluators[evalName] = {
+        evaluatorId: outputs[idKey]!,
+        evaluatorArn: outputs[arnKey]!,
+      };
+    }
+  }
+
+  return evaluators;
+}
+
+/**
+ * Parse stack outputs into deployed state for online evaluation configs.
+ *
+ * Output key pattern: ApplicationOnlineEval{PascalName}(Id|Arn)Output{Hash}
+ */
+export function parseOnlineEvalOutputs(
+  outputs: StackOutputs,
+  onlineEvalNames: string[]
+): Record<string, OnlineEvalDeployedState> {
+  const configs: Record<string, OnlineEvalDeployedState> = {};
+  const outputKeys = Object.keys(outputs);
+
+  for (const configName of onlineEvalNames) {
+    const pascal = toPascalId('OnlineEval', configName);
+    const idPrefix = `Application${pascal}IdOutput`;
+    const arnPrefix = `Application${pascal}ArnOutput`;
+
+    const idKey = outputKeys.find(k => k.startsWith(idPrefix));
+    const arnKey = outputKeys.find(k => k.startsWith(arnPrefix));
+
+    if (idKey && arnKey) {
+      configs[configName] = {
+        onlineEvaluationConfigId: outputs[idKey]!,
+        onlineEvaluationConfigArn: outputs[arnKey]!,
+      };
+    }
+  }
+
+  return configs;
+}
+
 export interface BuildDeployedStateOptions {
   targetName: string;
   stackName: string;
@@ -211,13 +280,26 @@ export interface BuildDeployedStateOptions {
   identityKmsKeyArn?: string;
   credentials?: Record<string, { credentialProviderArn: string; clientSecretArn?: string; callbackUrl?: string }>;
   memories?: Record<string, MemoryDeployedState>;
+  evaluators?: Record<string, EvaluatorDeployedState>;
+  onlineEvalConfigs?: Record<string, OnlineEvalDeployedState>;
 }
 
 /**
  * Build deployed state from stack outputs.
  */
 export function buildDeployedState(opts: BuildDeployedStateOptions): DeployedState {
-  const { targetName, stackName, agents, gateways, existingState, identityKmsKeyArn, credentials, memories } = opts;
+  const {
+    targetName,
+    stackName,
+    agents,
+    gateways,
+    existingState,
+    identityKmsKeyArn,
+    credentials,
+    memories,
+    evaluators,
+    onlineEvalConfigs,
+  } = opts;
   const targetState: TargetDeployedState = {
     resources: {
       agents: Object.keys(agents).length > 0 ? agents : undefined,
@@ -239,6 +321,16 @@ export function buildDeployedState(opts: BuildDeployedStateOptions): DeployedSta
     targetState.resources!.credentials = credentials;
   }
 
+  // Add evaluator state if evaluators exist
+  if (evaluators && Object.keys(evaluators).length > 0) {
+    targetState.resources!.evaluators = evaluators;
+  }
+
+  // Add online eval config state if configs exist
+  if (onlineEvalConfigs && Object.keys(onlineEvalConfigs).length > 0) {
+    targetState.resources!.onlineEvalConfigs = onlineEvalConfigs;
+  }
+
   return {
     targets: {
       ...existingState?.targets,
diff --git a/src/cli/commands/create/action.ts b/src/cli/commands/create/action.ts
index c99f69dc..eba7385b 100644
--- a/src/cli/commands/create/action.ts
+++ b/src/cli/commands/create/action.ts
@@ -28,6 +28,8 @@ function createDefaultProjectSpec(projectName: string): AgentCoreProjectSpec {
     agents: [],
     memories: [],
     credentials: [],
+    evaluators: [],
+    onlineEvalConfigs: [],
   };
 }
 
diff --git a/src/cli/commands/deploy/actions.ts b/src/cli/commands/deploy/actions.ts
index 721a050a..6289d2d2 100644
--- a/src/cli/commands/deploy/actions.ts
+++ b/src/cli/commands/deploy/actions.ts
@@ -6,8 +6,10 @@ import {
   buildDeployedState,
   getStackOutputs,
   parseAgentOutputs,
+  parseEvaluatorOutputs,
   parseGatewayOutputs,
   parseMemoryOutputs,
+  parseOnlineEvalOutputs,
 } from '../../cloudformation';
 import { getErrorMessage } from '../../errors';
 import { ExecLogger } from '../../logging';
@@ -374,6 +376,14 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise<Dep
       );
     }
 
+    // Parse evaluator outputs
+    const evaluatorNames = (context.projectSpec.evaluators ?? []).map(e => e.name);
+    const evaluators = parseEvaluatorOutputs(outputs, evaluatorNames);
+
+    // Parse online eval config outputs
+    const onlineEvalNames = (context.projectSpec.onlineEvalConfigs ?? []).map(c => c.name);
+    const onlineEvalConfigs = parseOnlineEvalOutputs(outputs, onlineEvalNames);
+
     // Parse gateway outputs
     const gatewaySpecs =
       mcpSpec?.agentCoreGateways?.reduce(
@@ -395,6 +405,8 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise<Dep
       identityKmsKeyArn,
       credentials: deployedCredentials,
       memories,
+      evaluators,
+      onlineEvalConfigs,
     });
     await configIO.writeDeployedState(deployedState);
 
diff --git a/src/cli/commands/eval/command.tsx b/src/cli/commands/eval/command.tsx
new file mode 100644
index 00000000..44bff493
--- /dev/null
+++ b/src/cli/commands/eval/command.tsx
@@ -0,0 +1,76 @@
+import { getErrorMessage } from '../../errors';
+import { handleListEvalRuns } from '../../operations/eval';
+import { getResultsPath } from '../../operations/eval/storage';
+import { COMMAND_DESCRIPTIONS } from '../../tui/copy';
+import { requireProject } from '../../tui/guards';
+import type { Command } from '@commander-js/extra-typings';
+import { Text, render } from 'ink';
+import React from 'react';
+
+export const registerEval = (program: Command) => {
+  const evalCmd = program.command('eval').description(COMMAND_DESCRIPTIONS.eval);
+
+  evalCmd
+    .command('history')
+    .description('Show past eval run results')
+    .option('-a, --agent <name>', 'Filter by agent name')
+    .option('-n, --limit <count>', 'Maximum number of runs to show')
+    .option('--json', 'Output as JSON')
+    .action((cliOptions: { agent?: string; limit?: string; json?: boolean }) => {
+      requireProject();
+
+      try {
+        const result = handleListEvalRuns({
+          agent: cliOptions.agent,
+          limit: cliOptions.limit ? parseInt(cliOptions.limit, 10) : undefined,
+          json: cliOptions.json,
+        });
+
+        if (cliOptions.json) {
+          console.log(JSON.stringify(result));
+          process.exit(result.success ? 0 : 1);
+          return;
+        }
+
+        if (!result.success) {
+          render(<Text color="red">{result.error}</Text>);
+          process.exit(1);
+        }
+
+        const runs = result.runs ?? [];
+        if (runs.length === 0) {
+          console.log('No eval runs found. Run `agentcore run eval` to create one.');
+          return;
+        }
+
+        console.log(`\n${'Date'.padEnd(22)} ${'Agent'.padEnd(20)} ${'Evaluators'.padEnd(30)} Sessions`);
+        console.log('─'.repeat(90));
+
+        for (const run of runs) {
+          const scores = run.results.map(r => `${r.evaluator}=${r.aggregateScore.toFixed(2)}`).join(', ');
+          const date = new Date(run.timestamp).toLocaleString([], {
+            year: 'numeric',
+            month: 'short',
+            day: 'numeric',
+            hour: '2-digit',
+            minute: '2-digit',
+          });
+          console.log(`${date.padEnd(22)} ${run.agent.padEnd(20)} ${scores.padEnd(30)} ${run.sessionCount}`);
+        }
+
+        try {
+          console.log(`\nResults saved in: ${getResultsPath()}`);
+        } catch {
+          // ignore — no project context
+        }
+        console.log('');
+      } catch (error) {
+        if (cliOptions.json) {
+          console.log(JSON.stringify({ success: false, error: getErrorMessage(error) }));
+        } else {
+          render(<Text color="red">Error: {getErrorMessage(error)}</Text>);
+        }
+        process.exit(1);
+      }
+    });
+};
diff --git a/src/cli/commands/eval/index.ts b/src/cli/commands/eval/index.ts
new file mode 100644
index 00000000..5a761e17
--- /dev/null
+++ b/src/cli/commands/eval/index.ts
@@ -0,0 +1 @@
+export { registerEval } from './command';
diff --git a/src/cli/commands/index.ts b/src/cli/commands/index.ts
index 3e1fd854..3dac1c82 100644
--- a/src/cli/commands/index.ts
+++ b/src/cli/commands/index.ts
@@ -3,9 +3,13 @@ export { registerAdd } from './add';
 export { registerDeploy } from './deploy';
 export { registerDev } from './dev';
 export { registerCreate } from './create';
+export { registerEval } from './eval';
 export { registerInvoke } from './invoke';
 export { registerPackage } from './package';
+export { registerPause } from './pause';
 export { registerRemove } from './remove';
+export { registerResume } from './resume';
+export { registerRun } from './run';
 export { registerStatus } from './status';
 export { registerTraces } from './traces';
 export { registerUpdate } from './update';
diff --git a/src/cli/commands/logs/__tests__/action.test.ts b/src/cli/commands/logs/__tests__/action.test.ts
index 81e1f39f..9f41b66f 100644
--- a/src/cli/commands/logs/__tests__/action.test.ts
+++ b/src/cli/commands/logs/__tests__/action.test.ts
@@ -55,6 +55,8 @@ describe('resolveAgentContext', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     },
     deployedState: {
       targets: {
@@ -111,6 +113,8 @@ describe('resolveAgentContext', () => {
         ],
         memories: [],
         credentials: [],
+        evaluators: [],
+        onlineEvalConfigs: [],
       },
     });
     const result = resolveAgentContext(context, {});
@@ -147,6 +151,8 @@ describe('resolveAgentContext', () => {
         ],
         memories: [],
         credentials: [],
+        evaluators: [],
+        onlineEvalConfigs: [],
       },
       deployedState: {
         targets: {
@@ -187,7 +193,15 @@ describe('resolveAgentContext', () => {
 
   it('errors when no agents defined', () => {
     const context = makeContext({
-      project: { name: 'TestProject', version: 1, agents: [], memories: [], credentials: [] },
+      project: {
+        name: 'TestProject',
+        version: 1,
+        agents: [],
+        memories: [],
+        credentials: [],
+        evaluators: [],
+        onlineEvalConfigs: [],
+      },
     });
     const result = resolveAgentContext(context, {});
     expect(result.success).toBe(false);
diff --git a/src/cli/commands/logs/command.tsx b/src/cli/commands/logs/command.tsx
index 977042cd..282aed81 100644
--- a/src/cli/commands/logs/command.tsx
+++ b/src/cli/commands/logs/command.tsx
@@ -1,15 +1,24 @@
 import { getErrorMessage } from '../../errors';
+import { handleLogsEval } from '../../operations/eval';
+import type { LogsEvalOptions } from '../../operations/eval';
 import { COMMAND_DESCRIPTIONS } from '../../tui/copy';
 import { requireProject } from '../../tui/guards';
 import { handleLogs } from './action';
 import type { LogsOptions } from './types';
 import type { Command } from '@commander-js/extra-typings';
 import { Text, render } from 'ink';
+import React from 'react';
 
 export const registerLogs = (program: Command) => {
-  program
+  // enablePositionalOptions + passThroughOptions ensure options like --since and --agent
+  // are passed to the 'eval' subcommand rather than being consumed by the parent 'logs' command.
+  program.enablePositionalOptions();
+
+  const logsCmd = program
     .command('logs')
     .alias('l')
+    .enablePositionalOptions()
+    .passThroughOptions()
     .description(COMMAND_DESCRIPTIONS.logs)
     .option('--agent <name>', 'Select specific agent')
     .option('--since <time>', 'Start time — defaults to 1h ago in search mode (e.g. "1h", "30m", "2d", ISO 8601)')
@@ -24,6 +33,31 @@ export const registerLogs = (program: Command) => {
       try {
         const result = await handleLogs(cliOptions);
 
+        if (!result.success) {
+          render(<Text color="red">{result.error}</Text>);
+          process.exit(1);
+        }
+      } catch (error) {
+        render(<Text color="red">Error: {getErrorMessage(error)}</Text>);
+        process.exit(1);
+      }
+    });
+
+  logsCmd
+    .command('eval')
+    .description('Stream or search online eval logs')
+    .option('-a, --agent <name>', 'Select specific agent')
+    .option('--since <time>', 'Start time (e.g. "1h", "30m", "2d", ISO 8601)')
+    .option('--until <time>', 'End time (e.g. "now", ISO 8601)')
+    .option('-n, --lines <count>', 'Maximum number of log lines')
+    .option('-f, --follow', 'Stream logs in real-time (default when no --since/--until)')
+    .option('--json', 'Output as JSON Lines')
+    .action(async (cliOptions: LogsEvalOptions) => {
+      requireProject();
+
+      try {
+        const result = await handleLogsEval(cliOptions);
+
         if (!result.success) {
           render(<Text color="red">{result.error}</Text>);
           process.exit(1);
diff --git a/src/cli/commands/pause/command.tsx b/src/cli/commands/pause/command.tsx
new file mode 100644
index 00000000..6e5ca7d8
--- /dev/null
+++ b/src/cli/commands/pause/command.tsx
@@ -0,0 +1,159 @@
+import { getErrorMessage } from '../../errors';
+import { handleDeleteOnlineEval, handlePauseResume } from '../../operations/eval';
+import type { OnlineEvalActionOptions } from '../../operations/eval';
+import { COMMAND_DESCRIPTIONS } from '../../tui/copy';
+import { requireProject } from '../../tui/guards';
+import type { Command } from '@commander-js/extra-typings';
+import { Text, render } from 'ink';
+import React from 'react';
+import * as readline from 'readline';
+
+function registerOnlineEvalSubcommand(parent: Command, action: 'pause' | 'resume') {
+  const description = action === 'pause' ? 'Pause a deployed online eval config' : 'Resume a paused online eval config';
+  const pastTense = action === 'pause' ? 'Paused' : 'Resumed';
+
+  parent
+    .command('online-eval')
+    .description(description)
+    .argument('[name]', 'Online eval config name (from project config)')
+    .option('--arn <arn>', 'Online eval config ARN (direct mode, bypasses project config)')
+    .option('--region <region>', 'AWS region (used with --arn)')
+    .option('--json', 'Output as JSON')
+    .action(async (name: string | undefined, cliOptions: { arn?: string; region?: string; json?: boolean }) => {
+      if (!cliOptions.arn && !name) {
+        const error = 'Either a config name or --arn is required';
+        if (cliOptions.json) {
+          console.log(JSON.stringify({ success: false, error }));
+        } else {
+          render(<Text color="red">{error}</Text>);
+        }
+        process.exit(1);
+      }
+
+      if (!cliOptions.arn) {
+        requireProject();
+      }
+
+      const options: OnlineEvalActionOptions = {
+        name: name ?? '',
+        arn: cliOptions.arn,
+        region: cliOptions.region,
+        json: cliOptions.json,
+      };
+
+      try {
+        const result = await handlePauseResume(options, action);
+
+        if (cliOptions.json) {
+          console.log(JSON.stringify(result));
+        } else if (result.success) {
+          const displayName = cliOptions.arn ? result.configId : name;
+          console.log(`${pastTense} online eval config "${displayName}" (status: ${result.executionStatus})`);
+        } else {
+          render(<Text color="red">{result.error}</Text>);
+        }
+
+        process.exit(result.success ? 0 : 1);
+      } catch (error) {
+        if (cliOptions.json) {
+          console.log(JSON.stringify({ success: false, error: getErrorMessage(error) }));
+        } else {
+          render(<Text color="red">Error: {getErrorMessage(error)}</Text>);
+        }
+        process.exit(1);
+      }
+    });
+}
+
+function askConfirmation(prompt: string): Promise<boolean> {
+  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+  return new Promise(resolve => {
+    rl.question(prompt, answer => {
+      rl.close();
+      resolve(answer.toLowerCase() === 'y' || answer.toLowerCase() === 'yes');
+    });
+  });
+}
+
+export const registerPause = (program: Command) => {
+  const pauseCmd = program.command('pause').description(COMMAND_DESCRIPTIONS.pause);
+  registerOnlineEvalSubcommand(pauseCmd, 'pause');
+};
+
+export const registerResume = (program: Command) => {
+  const resumeCmd = program.command('resume').description(COMMAND_DESCRIPTIONS.resume);
+  registerOnlineEvalSubcommand(resumeCmd, 'resume');
+};
+
+export const registerStop = (program: Command) => {
+  const stopCmd = program.command('stop').description(COMMAND_DESCRIPTIONS.stop);
+
+  stopCmd
+    .command('online-eval')
+    .description('Delete a deployed online eval config')
+    .argument('[name]', 'Online eval config name (from project config)')
+    .option('--arn <arn>', 'Online eval config ARN (direct mode, bypasses project config)')
+    .option('--region <region>', 'AWS region (used with --arn)')
+    .option('--json', 'Output as JSON')
+    .option('-y, --yes', 'Skip confirmation prompt')
+    .action(
+      async (
+        name: string | undefined,
+        cliOptions: { arn?: string; region?: string; json?: boolean; yes?: boolean }
+      ) => {
+        if (!cliOptions.arn && !name) {
+          const error = 'Either a config name or --arn is required';
+          if (cliOptions.json) {
+            console.log(JSON.stringify({ success: false, error }));
+          } else {
+            render(<Text color="red">{error}</Text>);
+          }
+          process.exit(1);
+        }
+
+        if (!cliOptions.arn) {
+          requireProject();
+        }
+
+        const displayName = cliOptions.arn ?? name;
+
+        if (!cliOptions.yes && !cliOptions.json) {
+          const confirmed = await askConfirmation(
+            `Are you sure you want to delete online eval config "${displayName}"? This action cannot be undone. (y/N) `
+          );
+          if (!confirmed) {
+            console.log('Aborted.');
+            process.exit(0);
+          }
+        }
+
+        const options: OnlineEvalActionOptions = {
+          name: name ?? '',
+          arn: cliOptions.arn,
+          region: cliOptions.region,
+          json: cliOptions.json,
+        };
+
+        try {
+          const result = await handleDeleteOnlineEval(options);
+
+          if (cliOptions.json) {
+            console.log(JSON.stringify(result));
+          } else if (result.success) {
+            console.log(`Deleted online eval config "${displayName}" (status: ${result.status})`);
+          } else {
+            render(<Text color="red">{result.error}</Text>);
+          }
+
+          process.exit(result.success ? 0 : 1);
+        } catch (error) {
+          if (cliOptions.json) {
+            console.log(JSON.stringify({ success: false, error: getErrorMessage(error) }));
+          } else {
+            render(<Text color="red">Error: {getErrorMessage(error)}</Text>);
+          }
+          process.exit(1);
+        }
+      }
+    );
+};
diff --git a/src/cli/commands/pause/index.ts b/src/cli/commands/pause/index.ts
new file mode 100644
index 00000000..183fc104
--- /dev/null
+++ b/src/cli/commands/pause/index.ts
@@ -0,0 +1 @@
+export { registerPause, registerStop } from './command';
diff --git a/src/cli/commands/remove/command.tsx b/src/cli/commands/remove/command.tsx
index 8ada29c0..e0a45f07 100644
--- a/src/cli/commands/remove/command.tsx
+++ b/src/cli/commands/remove/command.tsx
@@ -29,6 +29,8 @@ async function handleRemoveAll(_options: RemoveAllOptions): Promise<RemoveResult
       agents: [],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     });
 
     // Reset mcp.json gateways if it exists
diff --git a/src/cli/commands/remove/types.ts b/src/cli/commands/remove/types.ts
index d4dbe99b..2144cc4a 100644
--- a/src/cli/commands/remove/types.ts
+++ b/src/cli/commands/remove/types.ts
@@ -1,4 +1,4 @@
-export type ResourceType = 'agent' | 'gateway' | 'gateway-target' | 'memory' | 'identity';
+export type ResourceType = 'agent' | 'gateway' | 'gateway-target' | 'memory' | 'identity' | 'evaluator' | 'online-eval';
 
 export interface RemoveOptions {
   resourceType: ResourceType;
diff --git a/src/cli/commands/resume/command.tsx b/src/cli/commands/resume/command.tsx
new file mode 100644
index 00000000..24e24241
--- /dev/null
+++ b/src/cli/commands/resume/command.tsx
@@ -0,0 +1 @@
+export { registerResume, registerStop } from '../pause/command';
diff --git a/src/cli/commands/resume/index.ts b/src/cli/commands/resume/index.ts
new file mode 100644
index 00000000..5303df0f
--- /dev/null
+++ b/src/cli/commands/resume/index.ts
@@ -0,0 +1 @@
+export { registerResume } from './command';
diff --git a/src/cli/commands/run/command.tsx b/src/cli/commands/run/command.tsx
new file mode 100644
index 00000000..b3ea2790
--- /dev/null
+++ b/src/cli/commands/run/command.tsx
@@ -0,0 +1,112 @@
+import { getErrorMessage } from '../../errors';
+import { handleRunEval } from '../../operations/eval';
+import type { RunEvalOptions } from '../../operations/eval';
+import { COMMAND_DESCRIPTIONS } from '../../tui/copy';
+import { requireProject } from '../../tui/guards';
+import type { Command } from '@commander-js/extra-typings';
+import { Text, render } from 'ink';
+import React from 'react';
+
+function formatRunOutput(result: Awaited<ReturnType<typeof handleRunEval>>): void {
+  if (!result.run) return;
+
+  const { run } = result;
+  const date = new Date(run.timestamp).toLocaleString([], {
+    year: 'numeric',
+    month: 'short',
+    day: 'numeric',
+    hour: '2-digit',
+    minute: '2-digit',
+  });
+  console.log(`\nAgent: ${run.agent} | ${date} | Sessions: ${run.sessionCount} | Lookback: ${run.lookbackDays}d\n`);
+
+  for (const r of run.results) {
+    const score = r.aggregateScore.toFixed(2);
+    const errors = r.sessionScores.filter(s => s.errorMessage).length;
+    const errorSuffix = errors > 0 ? ` (${errors} errors)` : '';
+    console.log(`  ${r.evaluator}: ${score}${errorSuffix}`);
+  }
+
+  if (result.filePath) {
+    console.log(`\nResults saved to: ${result.filePath}`);
+  }
+}
+
+export const registerRun = (program: Command) => {
+  const runCmd = program.command('run').description(COMMAND_DESCRIPTIONS.run);
+
+  runCmd
+    .command('eval')
+    .description('Run on-demand evaluation of agent traces')
+    .option('-a, --agent <name>', 'Agent to evaluate')
+    .option('--agent-arn <arn>', 'Agent runtime ARN (bypasses project config)')
+    .option('-e, --evaluator <names...>', 'Evaluator name(s) or Builtin.* IDs')
+    .option('--evaluator-arn <arns...>', 'Evaluator ARN(s) to use directly')
+    .option('--region <region>', 'AWS region (required with --agent-arn, inferred otherwise)')
+    .option('-s, --session-id <id>', 'Evaluate a specific session only')
+    .option('-t, --trace-id <id>', 'Evaluate a specific trace only')
+    .option('--days <days>', 'Lookback window in days', '7')
+    .option('--output <path>', 'Custom output file path for results')
+    .option('--json', 'Output as JSON')
+    .action(
+      async (cliOptions: {
+        agent?: string;
+        agentArn?: string;
+        evaluator?: string[];
+        evaluatorArn?: string[];
+        region?: string;
+        sessionId?: string;
+        traceId?: string;
+        days: string;
+        output?: string;
+        json?: boolean;
+      }) => {
+        requireProject();
+
+        if (!cliOptions.evaluator && !cliOptions.evaluatorArn) {
+          const error = 'At least one --evaluator or --evaluator-arn is required';
+          if (cliOptions.json) {
+            console.log(JSON.stringify({ success: false, error }));
+          } else {
+            render(<Text color="red">{error}</Text>);
+          }
+          process.exit(1);
+        }
+
+        const options: RunEvalOptions = {
+          agent: cliOptions.agent,
+          agentArn: cliOptions.agentArn,
+          evaluator: cliOptions.evaluator ?? [],
+          evaluatorArn: cliOptions.evaluatorArn,
+          region: cliOptions.region,
+          sessionId: cliOptions.sessionId,
+          traceId: cliOptions.traceId,
+          days: parseInt(cliOptions.days, 10),
+          output: cliOptions.output,
+          json: cliOptions.json,
+        };
+
+        try {
+          const result = await handleRunEval(options);
+
+          if (cliOptions.json) {
+            console.log(JSON.stringify(result));
+          } else if (result.success) {
+            formatRunOutput(result);
+          } else {
+            formatRunOutput(result);
+            render(<Text color="red">{result.error}</Text>);
+          }
+
+          process.exit(result.success ? 0 : 1);
+        } catch (error) {
+          if (cliOptions.json) {
+            console.log(JSON.stringify({ success: false, error: getErrorMessage(error) }));
+          } else {
+            render(<Text color="red">Error: {getErrorMessage(error)}</Text>);
+          }
+          process.exit(1);
+        }
+      }
+    );
+};
diff --git a/src/cli/commands/run/index.ts b/src/cli/commands/run/index.ts
new file mode 100644
index 00000000..a9200f87
--- /dev/null
+++ b/src/cli/commands/run/index.ts
@@ -0,0 +1 @@
+export { registerRun } from './command';
diff --git a/src/cli/commands/status/__tests__/action.test.ts b/src/cli/commands/status/__tests__/action.test.ts
index a9ec8ef4..731c8a82 100644
--- a/src/cli/commands/status/__tests__/action.test.ts
+++ b/src/cli/commands/status/__tests__/action.test.ts
@@ -1,6 +1,32 @@
 import type { AgentCoreMcpSpec, AgentCoreProjectSpec, DeployedResourceState } from '../../../../schema/index.js';
-import { computeResourceStatuses } from '../action.js';
-import { describe, expect, it } from 'vitest';
+import { computeResourceStatuses, handleProjectStatus } from '../action.js';
+import type { StatusContext } from '../action.js';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+const mockGetAgentRuntimeStatus = vi.fn();
+const mockGetEvaluator = vi.fn();
+const mockGetOnlineEvaluationConfig = vi.fn();
+
+vi.mock('../../../aws', () => ({
+  getAgentRuntimeStatus: (...args: unknown[]) => mockGetAgentRuntimeStatus(...args),
+}));
+
+vi.mock('../../../aws/agentcore-control', () => ({
+  getEvaluator: (...args: unknown[]) => mockGetEvaluator(...args),
+  getOnlineEvaluationConfig: (...args: unknown[]) => mockGetOnlineEvaluationConfig(...args),
+}));
+
+vi.mock('../../../logging', () => {
+  return {
+    ExecLogger: class {
+      startStep = vi.fn();
+      endStep = vi.fn();
+      log = vi.fn();
+      finalize = vi.fn();
+      getRelativeLogPath = vi.fn().mockReturnValue('logs/status.log');
+    },
+  };
+});
 
 const baseProject: AgentCoreProjectSpec = {
   name: 'test-project',
@@ -259,6 +285,116 @@ describe('computeResourceStatuses', () => {
     expect(gwEntry!.identifier).toBe('gw-456');
   });
 
+  it('marks evaluator as deployed when in both local and deployed state', () => {
+    const project = {
+      ...baseProject,
+      evaluators: [{ name: 'MyEval', level: 'SESSION', config: {} }],
+    } as unknown as AgentCoreProjectSpec;
+
+    const resources: DeployedResourceState = {
+      evaluators: {
+        MyEval: {
+          evaluatorId: 'proj_MyEval-abc123',
+          evaluatorArn: 'arn:aws:bedrock:us-east-1:123456789:evaluator/proj_MyEval-abc123',
+        },
+      },
+    };
+
+    const result = computeResourceStatuses(project, resources);
+    const evalEntry = result.find(r => r.resourceType === 'evaluator' && r.name === 'MyEval');
+
+    expect(evalEntry).toBeDefined();
+    expect(evalEntry!.deploymentState).toBe('deployed');
+    expect(evalEntry!.identifier).toBe('arn:aws:bedrock:us-east-1:123456789:evaluator/proj_MyEval-abc123');
+    expect(evalEntry!.detail).toBe('SESSION — LLM-as-a-Judge');
+  });
+
+  it('marks evaluator as local-only when not deployed', () => {
+    const project = {
+      ...baseProject,
+      evaluators: [{ name: 'MyEval', level: 'TRACE', config: {} }],
+    } as unknown as AgentCoreProjectSpec;
+
+    const result = computeResourceStatuses(project, undefined);
+    const evalEntry = result.find(r => r.resourceType === 'evaluator' && r.name === 'MyEval');
+
+    expect(evalEntry).toBeDefined();
+    expect(evalEntry!.deploymentState).toBe('local-only');
+    expect(evalEntry!.detail).toBe('TRACE — LLM-as-a-Judge');
+  });
+
+  it('marks evaluator as pending-removal when deployed but removed from schema', () => {
+    const resources: DeployedResourceState = {
+      evaluators: {
+        RemovedEval: {
+          evaluatorId: 'proj_RemovedEval-xyz',
+          evaluatorArn: 'arn:aws:bedrock:us-east-1:123456789:evaluator/proj_RemovedEval-xyz',
+        },
+      },
+    };
+
+    const result = computeResourceStatuses(baseProject, resources);
+    const evalEntry = result.find(r => r.resourceType === 'evaluator' && r.name === 'RemovedEval');
+
+    expect(evalEntry).toBeDefined();
+    expect(evalEntry!.deploymentState).toBe('pending-removal');
+  });
+
+  it('marks online-eval config as deployed when in both local and deployed state', () => {
+    const project = {
+      ...baseProject,
+      onlineEvalConfigs: [{ name: 'TestConfig', evaluators: ['Builtin.Helpfulness'], samplingRate: 10 }],
+    } as unknown as AgentCoreProjectSpec;
+
+    const resources: DeployedResourceState = {
+      onlineEvalConfigs: {
+        TestConfig: {
+          onlineEvaluationConfigId: 'proj_TestConfig-abc',
+          onlineEvaluationConfigArn: 'arn:aws:bedrock:us-east-1:123456789:online-evaluation-config/proj_TestConfig-abc',
+        },
+      },
+    };
+
+    const result = computeResourceStatuses(project, resources);
+    const configEntry = result.find(r => r.resourceType === 'online-eval' && r.name === 'TestConfig');
+
+    expect(configEntry).toBeDefined();
+    expect(configEntry!.deploymentState).toBe('deployed');
+    expect(configEntry!.detail).toBe('1 evaluator, 10% sampling');
+  });
+
+  it('marks online-eval config as local-only when not deployed', () => {
+    const project = {
+      ...baseProject,
+      onlineEvalConfigs: [{ name: 'TestConfig', evaluators: ['Builtin.X', 'Builtin.Y', 'Custom'], samplingRate: 50 }],
+    } as unknown as AgentCoreProjectSpec;
+
+    const result = computeResourceStatuses(project, undefined);
+    const configEntry = result.find(r => r.resourceType === 'online-eval' && r.name === 'TestConfig');
+
+    expect(configEntry).toBeDefined();
+    expect(configEntry!.deploymentState).toBe('local-only');
+    expect(configEntry!.detail).toBe('3 evaluators, 50% sampling');
+  });
+
+  it('marks online-eval config as pending-removal when deployed but removed from schema', () => {
+    const resources: DeployedResourceState = {
+      onlineEvalConfigs: {
+        RemovedConfig: {
+          onlineEvaluationConfigId: 'proj_RemovedConfig-xyz',
+          onlineEvaluationConfigArn:
+            'arn:aws:bedrock:us-east-1:123456789:online-evaluation-config/proj_RemovedConfig-xyz',
+        },
+      },
+    };
+
+    const result = computeResourceStatuses(baseProject, resources);
+    const configEntry = result.find(r => r.resourceType === 'online-eval' && r.name === 'RemovedConfig');
+
+    expect(configEntry).toBeDefined();
+    expect(configEntry!.deploymentState).toBe('pending-removal');
+  });
+
   it('handles mixed deployed and local-only resources', () => {
     const project = {
       ...baseProject,
@@ -301,3 +437,185 @@ describe('computeResourceStatuses', () => {
     expect(deployedCred!.deploymentState).toBe('deployed');
   });
 });
+
+describe('handleProjectStatus — live enrichment', () => {
+  beforeEach(() => {
+    mockGetAgentRuntimeStatus.mockReset();
+    mockGetEvaluator.mockReset();
+    mockGetOnlineEvaluationConfig.mockReset();
+  });
+
+  afterEach(() => vi.clearAllMocks());
+
+  function makeContext(overrides: Partial<StatusContext> = {}): StatusContext {
+    return {
+      project: {
+        ...baseProject,
+        evaluators: [{ name: 'MyEval', level: 'SESSION', config: {} }],
+        onlineEvalConfigs: [{ name: 'MyConfig', evaluators: ['Builtin.Helpfulness'], samplingRate: 10 }],
+      } as unknown as AgentCoreProjectSpec,
+      awsTargets: [{ name: 'dev', region: 'us-east-1', account: '123456789' }],
+      deployedState: {
+        targets: {
+          dev: {
+            resources: {
+              evaluators: {
+                MyEval: {
+                  evaluatorId: 'eval-123',
+                  evaluatorArn: 'arn:aws:bedrock:us-east-1:123456789:evaluator/eval-123',
+                },
+              },
+              onlineEvalConfigs: {
+                MyConfig: {
+                  onlineEvaluationConfigId: 'cfg-456',
+                  onlineEvaluationConfigArn: 'arn:aws:bedrock:us-east-1:123456789:online-evaluation-config/cfg-456',
+                },
+              },
+            },
+          },
+        },
+      },
+      ...overrides,
+    } as unknown as StatusContext;
+  }
+
+  it('enriches deployed evaluators with live status', async () => {
+    mockGetEvaluator.mockResolvedValue({
+      evaluatorId: 'eval-123',
+      evaluatorName: 'MyEval',
+      status: 'ACTIVE',
+      level: 'SESSION',
+    });
+    mockGetOnlineEvaluationConfig.mockResolvedValue({
+      configId: 'cfg-456',
+      configName: 'MyConfig',
+      status: 'ACTIVE',
+      executionStatus: 'ENABLED',
+    });
+
+    const result = await handleProjectStatus(makeContext());
+
+    expect(result.success).toBe(true);
+
+    const evalEntry = result.resources.find(r => r.resourceType === 'evaluator' && r.name === 'MyEval');
+    expect(evalEntry).toBeDefined();
+    expect(evalEntry!.detail).toContain('ACTIVE');
+
+    expect(mockGetEvaluator).toHaveBeenCalledWith({
+      region: 'us-east-1',
+      evaluatorId: 'eval-123',
+    });
+  });
+
+  it('enriches deployed online eval configs with live status', async () => {
+    mockGetEvaluator.mockResolvedValue({
+      evaluatorId: 'eval-123',
+      evaluatorName: 'MyEval',
+      status: 'ACTIVE',
+      level: 'SESSION',
+    });
+    mockGetOnlineEvaluationConfig.mockResolvedValue({
+      configId: 'cfg-456',
+      configName: 'MyConfig',
+      status: 'ACTIVE',
+      executionStatus: 'ENABLED',
+    });
+
+    const result = await handleProjectStatus(makeContext());
+
+    expect(result.success).toBe(true);
+
+    const configEntry = result.resources.find(r => r.resourceType === 'online-eval' && r.name === 'MyConfig');
+    expect(configEntry).toBeDefined();
+    expect(configEntry!.detail).toContain('ACTIVE');
+    expect(configEntry!.detail).toContain('ENABLED');
+
+    expect(mockGetOnlineEvaluationConfig).toHaveBeenCalledWith({
+      region: 'us-east-1',
+      configId: 'cfg-456',
+    });
+  });
+
+  it('sets error on evaluator when getEvaluator fails', async () => {
+    mockGetEvaluator.mockRejectedValue(new Error('AccessDenied'));
+    mockGetOnlineEvaluationConfig.mockResolvedValue({
+      configId: 'cfg-456',
+      configName: 'MyConfig',
+      status: 'ACTIVE',
+      executionStatus: 'ENABLED',
+    });
+
+    const result = await handleProjectStatus(makeContext());
+
+    expect(result.success).toBe(true);
+
+    const evalEntry = result.resources.find(r => r.resourceType === 'evaluator' && r.name === 'MyEval');
+    expect(evalEntry).toBeDefined();
+    expect(evalEntry!.error).toBe('AccessDenied');
+  });
+
+  it('sets error on online eval config when getOnlineEvaluationConfig fails', async () => {
+    mockGetEvaluator.mockResolvedValue({
+      evaluatorId: 'eval-123',
+      evaluatorName: 'MyEval',
+      status: 'ACTIVE',
+      level: 'SESSION',
+    });
+    mockGetOnlineEvaluationConfig.mockRejectedValue(new Error('ResourceNotFound'));
+
+    const result = await handleProjectStatus(makeContext());
+
+    expect(result.success).toBe(true);
+
+    const configEntry = result.resources.find(r => r.resourceType === 'online-eval' && r.name === 'MyConfig');
+    expect(configEntry).toBeDefined();
+    expect(configEntry!.error).toBe('ResourceNotFound');
+  });
+
+  it('skips enrichment when no target config is found', async () => {
+    const ctx = makeContext({
+      awsTargets: [] as unknown as StatusContext['awsTargets'],
+      deployedState: {
+        targets: {
+          dev: {
+            resources: {
+              evaluators: {
+                MyEval: {
+                  evaluatorId: 'eval-123',
+                  evaluatorArn: 'arn:aws:bedrock:us-east-1:123456789:evaluator/eval-123',
+                },
+              },
+            },
+          },
+        },
+      } as unknown as StatusContext['deployedState'],
+    });
+
+    const result = await handleProjectStatus(ctx);
+
+    expect(result.success).toBe(true);
+    expect(mockGetEvaluator).not.toHaveBeenCalled();
+    expect(mockGetOnlineEvaluationConfig).not.toHaveBeenCalled();
+  });
+
+  it('does not enrich local-only evaluators', async () => {
+    const ctx = makeContext({
+      deployedState: {
+        targets: {
+          dev: {
+            resources: {},
+          },
+        },
+      } as unknown as StatusContext['deployedState'],
+    });
+
+    const result = await handleProjectStatus(ctx);
+
+    expect(result.success).toBe(true);
+
+    const evalEntry = result.resources.find(r => r.resourceType === 'evaluator' && r.name === 'MyEval');
+    expect(evalEntry).toBeDefined();
+    expect(evalEntry!.deploymentState).toBe('local-only');
+    expect(mockGetEvaluator).not.toHaveBeenCalled();
+  });
+});
diff --git a/src/cli/commands/status/action.ts b/src/cli/commands/status/action.ts
index 7eab20de..fde41a5c 100644
--- a/src/cli/commands/status/action.ts
+++ b/src/cli/commands/status/action.ts
@@ -7,6 +7,7 @@ import type {
   DeployedState,
 } from '../../../schema';
 import { getAgentRuntimeStatus } from '../../aws';
+import { getEvaluator, getOnlineEvaluationConfig } from '../../aws/agentcore-control';
 import { getErrorMessage } from '../../errors';
 import { ExecLogger } from '../../logging';
 import type { ResourceDeploymentState } from './constants';
@@ -14,7 +15,7 @@ import type { ResourceDeploymentState } from './constants';
 export type { ResourceDeploymentState };
 
 export interface ResourceStatusEntry {
-  resourceType: 'agent' | 'memory' | 'credential' | 'gateway';
+  resourceType: 'agent' | 'memory' | 'credential' | 'gateway' | 'evaluator' | 'online-eval';
   name: string;
   deploymentState: ResourceDeploymentState;
   identifier?: string;
@@ -152,7 +153,24 @@ export function computeResourceStatuses(
     },
   });
 
-  return [...agents, ...credentials, ...memories, ...gateways];
+  const evaluators = diffResourceSet({
+    resourceType: 'evaluator',
+    localItems: project.evaluators ?? [],
+    deployedRecord: resources?.evaluators ?? {},
+    getIdentifier: deployed => deployed.evaluatorArn,
+    getLocalDetail: item => `${item.level} — LLM-as-a-Judge`,
+  });
+
+  const onlineEvalConfigs = diffResourceSet({
+    resourceType: 'online-eval',
+    localItems: project.onlineEvalConfigs ?? [],
+    deployedRecord: resources?.onlineEvalConfigs ?? {},
+    getIdentifier: deployed => deployed.onlineEvaluationConfigArn,
+    getLocalDetail: item =>
+      `${item.evaluators.length} evaluator${item.evaluators.length !== 1 ? 's' : ''}, ${item.samplingRate}% sampling`,
+  });
+
+  return [...agents, ...credentials, ...memories, ...gateways, ...evaluators, ...onlineEvalConfigs];
 }
 
 export async function handleProjectStatus(
@@ -245,6 +263,82 @@ export async function handleProjectStatus(
       const hasErrors = resources.some(r => r.error);
       logger.endStep(hasErrors ? 'error' : 'success');
     }
+
+    // Enrich deployed evaluators with live status
+    const evaluatorStates = targetResources?.evaluators ?? {};
+    const deployedEvaluators = resources.filter(
+      e => e.resourceType === 'evaluator' && e.deploymentState === 'deployed' && evaluatorStates[e.name]
+    );
+
+    if (deployedEvaluators.length > 0) {
+      logger.startStep(
+        `Fetch evaluator status (${deployedEvaluators.length} evaluator${deployedEvaluators.length !== 1 ? 's' : ''})`
+      );
+
+      await Promise.all(
+        resources.map(async (entry, i) => {
+          if (entry.resourceType !== 'evaluator' || entry.deploymentState !== 'deployed') return;
+
+          const evalState = evaluatorStates[entry.name];
+          if (!evalState) return;
+
+          try {
+            const evalResult = await getEvaluator({
+              region: targetConfig.region,
+              evaluatorId: evalState.evaluatorId,
+            });
+            resources[i] = { ...entry, detail: `${entry.detail} — ${evalResult.status}` };
+            logger.log(`  ${entry.name}: ${evalResult.status} (${evalState.evaluatorId})`);
+          } catch (error) {
+            const errorMsg = getErrorMessage(error);
+            resources[i] = { ...entry, error: errorMsg };
+            logger.log(`  ${entry.name}: ERROR - ${errorMsg}`, 'error');
+          }
+        })
+      );
+
+      const hasEvalErrors = resources.some(r => r.resourceType === 'evaluator' && r.error);
+      logger.endStep(hasEvalErrors ? 'error' : 'success');
+    }
+
+    // Enrich deployed online eval configs with live status
+    const onlineEvalStates = targetResources?.onlineEvalConfigs ?? {};
+    const deployedOnlineEvals = resources.filter(
+      e => e.resourceType === 'online-eval' && e.deploymentState === 'deployed' && onlineEvalStates[e.name]
+    );
+
+    if (deployedOnlineEvals.length > 0) {
+      logger.startStep(
+        `Fetch online eval status (${deployedOnlineEvals.length} config${deployedOnlineEvals.length !== 1 ? 's' : ''})`
+      );
+
+      await Promise.all(
+        resources.map(async (entry, i) => {
+          if (entry.resourceType !== 'online-eval' || entry.deploymentState !== 'deployed') return;
+
+          const configState = onlineEvalStates[entry.name];
+          if (!configState) return;
+
+          try {
+            const configResult = await getOnlineEvaluationConfig({
+              region: targetConfig.region,
+              configId: configState.onlineEvaluationConfigId,
+            });
+            const statusLabel = `${configResult.status} (${configResult.executionStatus})`;
+            const detail = entry.detail ? `${entry.detail} — ${statusLabel}` : statusLabel;
+            resources[i] = { ...entry, detail };
+            logger.log(`  ${entry.name}: ${statusLabel} (${configState.onlineEvaluationConfigId})`);
+          } catch (error) {
+            const errorMsg = getErrorMessage(error);
+            resources[i] = { ...entry, error: errorMsg };
+            logger.log(`  ${entry.name}: ERROR - ${errorMsg}`, 'error');
+          }
+        })
+      );
+
+      const hasOnlineEvalErrors = resources.some(r => r.resourceType === 'online-eval' && r.error);
+      logger.endStep(hasOnlineEvalErrors ? 'error' : 'success');
+    }
   }
 
   logger.finalize(true);
diff --git a/src/cli/commands/status/command.tsx b/src/cli/commands/status/command.tsx
index 09279fd6..1a80e4af 100644
--- a/src/cli/commands/status/command.tsx
+++ b/src/cli/commands/status/command.tsx
@@ -7,7 +7,7 @@ import { DEPLOYMENT_STATE_COLORS, DEPLOYMENT_STATE_LABELS } from './constants';
 import type { Command } from '@commander-js/extra-typings';
 import { Box, Text, render } from 'ink';
 
-const VALID_RESOURCE_TYPES = ['agent', 'memory', 'credential', 'gateway'] as const;
+const VALID_RESOURCE_TYPES = ['agent', 'memory', 'credential', 'gateway', 'evaluator', 'online-eval'] as const;
 const VALID_STATES = ['deployed', 'local-only', 'pending-removal'] as const;
 
 interface StatusCliOptions {
@@ -126,6 +126,8 @@ export const registerStatus = (program: Command) => {
         const credentials = filtered.filter(r => r.resourceType === 'credential');
         const memories = filtered.filter(r => r.resourceType === 'memory');
         const gateways = filtered.filter(r => r.resourceType === 'gateway');
+        const evaluators = filtered.filter(r => r.resourceType === 'evaluator');
+        const onlineEvals = filtered.filter(r => r.resourceType === 'online-eval');
 
         render(
           <Box flexDirection="column">
@@ -170,6 +172,24 @@ export const registerStatus = (program: Command) => {
               </Box>
             )}
 
+            {evaluators.length > 0 && (
+              <Box flexDirection="column" marginTop={1}>
+                <Text bold>Evaluators</Text>
+                {evaluators.map(entry => (
+                  <ResourceEntry key={`${entry.resourceType}-${entry.name}`} entry={entry} />
+                ))}
+              </Box>
+            )}
+
+            {onlineEvals.length > 0 && (
+              <Box flexDirection="column" marginTop={1}>
+                <Text bold>Online Eval Configs</Text>
+                {onlineEvals.map(entry => (
+                  <ResourceEntry key={`${entry.resourceType}-${entry.name}`} entry={entry} />
+                ))}
+              </Box>
+            )}
+
             {filtered.length === 0 && <Text dimColor>No resources match the given filters.</Text>}
           </Box>
         );
diff --git a/src/cli/external-requirements/__tests__/checks-extended.test.ts b/src/cli/external-requirements/__tests__/checks-extended.test.ts
index 30384086..42e44152 100644
--- a/src/cli/external-requirements/__tests__/checks-extended.test.ts
+++ b/src/cli/external-requirements/__tests__/checks-extended.test.ts
@@ -48,6 +48,8 @@ describe('requiresUv', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
     expect(requiresUv(project)).toBe(true);
   });
@@ -68,6 +70,8 @@ describe('requiresUv', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
     expect(requiresUv(project)).toBe(false);
   });
@@ -79,6 +83,8 @@ describe('requiresUv', () => {
       agents: [],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
     expect(requiresUv(project)).toBe(false);
   });
@@ -101,6 +107,8 @@ describe('requiresContainerRuntime', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
     expect(requiresContainerRuntime(project)).toBe(true);
   });
@@ -121,6 +129,8 @@ describe('requiresContainerRuntime', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
     expect(requiresContainerRuntime(project)).toBe(false);
   });
@@ -132,6 +142,8 @@ describe('requiresContainerRuntime', () => {
       agents: [],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
     expect(requiresContainerRuntime(project)).toBe(false);
   });
@@ -160,6 +172,8 @@ describe('requiresContainerRuntime', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
     expect(requiresContainerRuntime(project)).toBe(true);
   });
@@ -222,6 +236,8 @@ describe('checkDependencyVersions', () => {
       agents: [],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const result = await checkDependencyVersions(project);
@@ -237,6 +253,8 @@ describe('checkDependencyVersions', () => {
       agents: [],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const result = await checkDependencyVersions(project);
@@ -260,6 +278,8 @@ describe('checkDependencyVersions', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const result = await checkDependencyVersions(project);
diff --git a/src/cli/logging/remove-logger.ts b/src/cli/logging/remove-logger.ts
index a21201ff..f40ace6c 100644
--- a/src/cli/logging/remove-logger.ts
+++ b/src/cli/logging/remove-logger.ts
@@ -7,7 +7,7 @@ const REMOVE_LOGS_SUBDIR = 'remove';
 
 export interface RemoveLoggerOptions {
   /** Type of resource being removed */
-  resourceType: 'agent' | 'memory' | 'identity' | 'gateway' | 'gateway-target';
+  resourceType: 'agent' | 'memory' | 'identity' | 'gateway' | 'gateway-target' | 'evaluator' | 'online-eval';
   /** Name of the resource being removed */
   resourceName: string;
 }
diff --git a/src/cli/operations/agent/generate/write-agent-to-project.ts b/src/cli/operations/agent/generate/write-agent-to-project.ts
index 85819835..37b001ea 100644
--- a/src/cli/operations/agent/generate/write-agent-to-project.ts
+++ b/src/cli/operations/agent/generate/write-agent-to-project.ts
@@ -67,6 +67,8 @@ export async function writeAgentToProject(config: GenerateConfig, options?: Writ
       agents: [agent],
       memories,
       credentials,
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     await configIO.writeProjectSpec(project);
diff --git a/src/cli/operations/deploy/__tests__/preflight.test.ts b/src/cli/operations/deploy/__tests__/preflight.test.ts
index dd148df4..0818acf7 100644
--- a/src/cli/operations/deploy/__tests__/preflight.test.ts
+++ b/src/cli/operations/deploy/__tests__/preflight.test.ts
@@ -81,7 +81,7 @@ describe('validateProject', () => {
     mockReadDeployedState.mockRejectedValue(new Error('No deployed state'));
 
     await expect(validateProject()).rejects.toThrow(
-      'No resources defined in project. Add an agent with "agentcore add agent", a memory with "agentcore add memory", or a gateway with "agentcore add gateway" before deploying.'
+      'No resources defined in project. Add at least one resource (agent, memory, evaluator, or gateway) before deploying.'
     );
   });
 
diff --git a/src/cli/operations/deploy/preflight.ts b/src/cli/operations/deploy/preflight.ts
index 9c5025a5..fe522053 100644
--- a/src/cli/operations/deploy/preflight.ts
+++ b/src/cli/operations/deploy/preflight.ts
@@ -82,6 +82,7 @@ export async function validateProject(): Promise<PreflightContext> {
   let isTeardownDeploy = false;
   const hasAgents = projectSpec.agents && projectSpec.agents.length > 0;
   const hasMemories = projectSpec.memories && projectSpec.memories.length > 0;
+  const hasEvaluators = projectSpec.evaluators && projectSpec.evaluators.length > 0;
 
   // Check for gateways in mcp.json
   let hasGateways = false;
@@ -92,7 +93,7 @@ export async function validateProject(): Promise<PreflightContext> {
     // No mcp.json or invalid — no gateways
   }
 
-  if (!hasAgents && !hasGateways && !hasMemories) {
+  if (!hasAgents && !hasGateways && !hasMemories && !hasEvaluators) {
     let hasExistingStack = false;
     try {
       const deployedState = await configIO.readDeployedState();
@@ -102,7 +103,7 @@ export async function validateProject(): Promise<PreflightContext> {
     }
     if (!hasExistingStack) {
       throw new Error(
-        'No resources defined in project. Add an agent with "agentcore add agent", a memory with "agentcore add memory", or a gateway with "agentcore add gateway" before deploying.'
+        'No resources defined in project. Add at least one resource (agent, memory, evaluator, or gateway) before deploying.'
       );
     }
     isTeardownDeploy = true;
diff --git a/src/cli/operations/dev/__tests__/config.test.ts b/src/cli/operations/dev/__tests__/config.test.ts
index c6e04210..3751e6c6 100644
--- a/src/cli/operations/dev/__tests__/config.test.ts
+++ b/src/cli/operations/dev/__tests__/config.test.ts
@@ -16,6 +16,8 @@ describe('getDevConfig', () => {
       agents: [],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const config = getDevConfig(workingDir, project);
@@ -38,6 +40,8 @@ describe('getDevConfig', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const config = getDevConfig(workingDir, project);
@@ -60,6 +64,8 @@ describe('getDevConfig', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -88,6 +94,8 @@ describe('getDevConfig', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     expect(() => getDevConfig(workingDir, project, undefined, 'NonExistentAgent')).toThrow(
@@ -111,6 +119,8 @@ describe('getDevConfig', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     expect(() => getDevConfig(workingDir, project, undefined, 'NodeAgent')).toThrow('Dev mode only supports Python');
@@ -132,6 +142,8 @@ describe('getDevConfig', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -156,6 +168,8 @@ describe('getDevConfig', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     // No configRoot provided
@@ -180,6 +194,8 @@ describe('getDevConfig', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -204,6 +220,8 @@ describe('getDevConfig', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -228,6 +246,8 @@ describe('getDevConfig', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -265,6 +285,8 @@ describe('getAgentPort', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     expect(getAgentPort(project, 'Agent1', 8080)).toBe(8080);
@@ -278,6 +300,8 @@ describe('getAgentPort', () => {
       agents: [],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     expect(getAgentPort(project, 'NonExistent', 9000)).toBe(9000);
@@ -296,6 +320,8 @@ describe('getDevSupportedAgents', () => {
       agents: [],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     expect(getDevSupportedAgents(project)).toEqual([]);
@@ -317,6 +343,8 @@ describe('getDevSupportedAgents', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     expect(getDevSupportedAgents(project)).toEqual([]);
@@ -346,6 +374,8 @@ describe('getDevSupportedAgents', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const supported = getDevSupportedAgents(project);
@@ -369,6 +399,8 @@ describe('getDevSupportedAgents', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const supported = getDevSupportedAgents(project);
@@ -400,6 +432,8 @@ describe('getDevSupportedAgents', () => {
       ],
       memories: [],
       credentials: [],
+      evaluators: [],
+      onlineEvalConfigs: [],
     };
 
     const supported = getDevSupportedAgents(project);
diff --git a/src/cli/operations/eval/__tests__/get-eval-run.test.ts b/src/cli/operations/eval/__tests__/get-eval-run.test.ts
new file mode 100644
index 00000000..6007221d
--- /dev/null
+++ b/src/cli/operations/eval/__tests__/get-eval-run.test.ts
@@ -0,0 +1,61 @@
+import { handleGetEvalRun } from '../get-eval-run.js';
+import type { EvalRunResult } from '../types.js';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+const mockLoadEvalRun = vi.fn();
+
+vi.mock('../storage', () => ({
+  loadEvalRun: (...args: unknown[]) => mockLoadEvalRun(...args),
+}));
+
+const sampleRun: EvalRunResult = {
+  timestamp: '2025-01-15T10:00:00.000Z',
+  agent: 'test-agent',
+  evaluators: ['Builtin.GoalSuccessRate'],
+  lookbackDays: 7,
+  sessionCount: 5,
+  results: [
+    {
+      evaluator: 'Builtin.GoalSuccessRate',
+      aggregateScore: 0.9,
+      sessionScores: [{ sessionId: 's1', value: 0.9 }],
+    },
+  ],
+};
+
+describe('handleGetEvalRun', () => {
+  afterEach(() => vi.clearAllMocks());
+
+  it('returns the run on success', () => {
+    mockLoadEvalRun.mockReturnValue(sampleRun);
+
+    const result = handleGetEvalRun({ filename: 'eval_2025-01-15_10-00-00' });
+
+    expect(result.success).toBe(true);
+    expect(result.run).toEqual(sampleRun);
+    expect(mockLoadEvalRun).toHaveBeenCalledWith('eval_2025-01-15_10-00-00');
+  });
+
+  it('returns error when run is not found', () => {
+    mockLoadEvalRun.mockImplementation(() => {
+      throw new Error('Eval run "eval_2025-01-01_00-00-00" not found');
+    });
+
+    const result = handleGetEvalRun({ filename: 'eval_2025-01-01_00-00-00' });
+
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('not found');
+    expect(result.run).toBeUndefined();
+  });
+
+  it('handles non-Error thrown values via getErrorMessage', () => {
+    mockLoadEvalRun.mockImplementation(() => {
+      throw new Error('string error');
+    });
+
+    const result = handleGetEvalRun({ filename: 'eval_bad' });
+
+    expect(result.success).toBe(false);
+    expect(result.error).toBe('string error');
+  });
+});
diff --git a/src/cli/operations/eval/__tests__/list-eval-runs.test.ts b/src/cli/operations/eval/__tests__/list-eval-runs.test.ts
new file mode 100644
index 00000000..c9a71a8c
--- /dev/null
+++ b/src/cli/operations/eval/__tests__/list-eval-runs.test.ts
@@ -0,0 +1,111 @@
+import { handleListEvalRuns } from '../list-eval-runs.js';
+import type { EvalRunResult } from '../types.js';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+const mockListEvalRuns = vi.fn();
+
+vi.mock('../storage', () => ({
+  listEvalRuns: () => mockListEvalRuns(),
+}));
+
+function makeRun(agent: string, timestamp: string): EvalRunResult {
+  return {
+    timestamp,
+    agent,
+    evaluators: ['Builtin.GoalSuccessRate'],
+    lookbackDays: 7,
+    sessionCount: 3,
+    results: [],
+  };
+}
+
+describe('handleListEvalRuns', () => {
+  afterEach(() => vi.clearAllMocks());
+
+  it('returns all runs when no filters specified', () => {
+    const runs = [makeRun('agent-a', '2025-01-15T10:00:00.000Z'), makeRun('agent-b', '2025-01-15T11:00:00.000Z')];
+    mockListEvalRuns.mockReturnValue(runs);
+
+    const result = handleListEvalRuns({});
+
+    expect(result.success).toBe(true);
+    expect(result.runs).toHaveLength(2);
+  });
+
+  it('filters by agent name', () => {
+    const runs = [
+      makeRun('agent-a', '2025-01-15T10:00:00.000Z'),
+      makeRun('agent-b', '2025-01-15T11:00:00.000Z'),
+      makeRun('agent-a', '2025-01-15T12:00:00.000Z'),
+    ];
+    mockListEvalRuns.mockReturnValue(runs);
+
+    const result = handleListEvalRuns({ agent: 'agent-a' });
+
+    expect(result.success).toBe(true);
+    expect(result.runs).toHaveLength(2);
+    expect(result.runs!.every(r => r.agent === 'agent-a')).toBe(true);
+  });
+
+  it('limits the number of results', () => {
+    const runs = [
+      makeRun('a', '2025-01-15T10:00:00.000Z'),
+      makeRun('a', '2025-01-15T11:00:00.000Z'),
+      makeRun('a', '2025-01-15T12:00:00.000Z'),
+    ];
+    mockListEvalRuns.mockReturnValue(runs);
+
+    const result = handleListEvalRuns({ limit: 2 });
+
+    expect(result.success).toBe(true);
+    expect(result.runs).toHaveLength(2);
+  });
+
+  it('applies agent filter before limit', () => {
+    const runs = [
+      makeRun('a', '2025-01-15T10:00:00.000Z'),
+      makeRun('b', '2025-01-15T11:00:00.000Z'),
+      makeRun('a', '2025-01-15T12:00:00.000Z'),
+      makeRun('a', '2025-01-15T13:00:00.000Z'),
+    ];
+    mockListEvalRuns.mockReturnValue(runs);
+
+    const result = handleListEvalRuns({ agent: 'a', limit: 2 });
+
+    expect(result.runs).toHaveLength(2);
+    expect(result.runs![0]!.timestamp).toBe('2025-01-15T10:00:00.000Z');
+    expect(result.runs![1]!.timestamp).toBe('2025-01-15T12:00:00.000Z');
+  });
+
+  it('returns empty array when no runs exist', () => {
+    mockListEvalRuns.mockReturnValue([]);
+
+    const result = handleListEvalRuns({});
+
+    expect(result.success).toBe(true);
+    expect(result.runs).toEqual([]);
+  });
+
+  it('returns error when storage throws', () => {
+    mockListEvalRuns.mockImplementation(() => {
+      throw new Error('disk error');
+    });
+
+    const result = handleListEvalRuns({});
+
+    expect(result.success).toBe(false);
+    expect(result.error).toBe('disk error');
+    expect(result.runs).toBeUndefined();
+  });
+
+  it('handles non-Error thrown values', () => {
+    mockListEvalRuns.mockImplementation(() => {
+      throw new Error('42');
+    });
+
+    const result = handleListEvalRuns({});
+
+    expect(result.success).toBe(false);
+    expect(result.error).toBe('42');
+  });
+});
diff --git a/src/cli/operations/eval/__tests__/logs-eval.test.ts b/src/cli/operations/eval/__tests__/logs-eval.test.ts
new file mode 100644
index 00000000..2a0d5c14
--- /dev/null
+++ b/src/cli/operations/eval/__tests__/logs-eval.test.ts
@@ -0,0 +1,287 @@
+import { handleLogsEval } from '../logs-eval.js';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+const mockLoadDeployedProjectConfig = vi.fn();
+const mockResolveAgent = vi.fn();
+const mockGetOnlineEvaluationConfig = vi.fn();
+const mockSearchLogs = vi.fn();
+const mockStreamLogs = vi.fn();
+
+vi.mock('../../resolve-agent', () => ({
+  loadDeployedProjectConfig: () => mockLoadDeployedProjectConfig(),
+  resolveAgent: (...args: unknown[]) => mockResolveAgent(...args),
+}));
+
+vi.mock('../../../aws/agentcore-control', () => ({
+  getOnlineEvaluationConfig: (...args: unknown[]) => mockGetOnlineEvaluationConfig(...args),
+}));
+
+vi.mock('../../../aws/cloudwatch', () => ({
+  searchLogs: (...args: unknown[]) => mockSearchLogs(...args),
+  streamLogs: (...args: unknown[]) => mockStreamLogs(...args),
+}));
+
+vi.mock('../../../../lib/utils', () => ({
+  parseTimeString: (s: string) => (s === '1h' ? Date.now() - 3_600_000 : Date.now()),
+}));
+
+function makeContext({
+  agentName = 'my-agent',
+  onlineEvalConfigs = [{ name: 'eval-config' }] as { name: string }[],
+  deployedConfigId = 'cfg-123',
+} = {}) {
+  return {
+    project: {
+      agents: [{ name: agentName }],
+      onlineEvalConfigs,
+    },
+    awsTargets: [{ name: 'dev', region: 'us-east-1', account: '111222333444' }],
+    deployedState: {
+      targets: {
+        dev: {
+          resources: {
+            agents: {
+              [agentName]: {
+                runtimeId: 'rt-123',
+                runtimeArn: `arn:aws:bedrock:us-east-1:111222333444:agent-runtime/rt-123`,
+                roleArn: 'arn:aws:iam::111222333444:role/test',
+              },
+            },
+            onlineEvalConfigs: deployedConfigId
+              ? {
+                  'eval-config': {
+                    onlineEvaluationConfigId: deployedConfigId,
+                    onlineEvaluationConfigArn: `arn:aws:bedrock:us-east-1:111222333444:online-evaluation-config/${deployedConfigId}`,
+                  },
+                }
+              : {},
+          },
+        },
+      },
+    },
+  };
+}
+
+function makeResolvedAgent(agentName = 'my-agent') {
+  return {
+    success: true as const,
+    agent: {
+      agentName,
+      targetName: 'dev',
+      region: 'us-east-1',
+      accountId: '111222333444',
+      runtimeId: 'rt-123',
+    },
+  };
+}
+
+describe('handleLogsEval', () => {
+  beforeEach(() => {
+    // Default: API returns the convention-based log group name
+    mockGetOnlineEvaluationConfig.mockImplementation((opts: { configId: string }) =>
+      Promise.resolve({
+        configId: opts.configId,
+        configName: 'eval-config',
+        status: 'ACTIVE',
+        executionStatus: 'ENABLED',
+        outputLogGroupName: `/aws/bedrock-agentcore/evaluations/results/${opts.configId}`,
+      })
+    );
+  });
+
+  afterEach(() => vi.clearAllMocks());
+
+  it('returns error when agent resolution fails', async () => {
+    mockLoadDeployedProjectConfig.mockResolvedValue({});
+    mockResolveAgent.mockReturnValue({ success: false, error: 'No agents defined' });
+
+    const result = await handleLogsEval({});
+
+    expect(result.success).toBe(false);
+    expect(result.error).toBe('No agents defined');
+  });
+
+  it('returns error when no online eval configs exist for the agent', async () => {
+    const ctx = makeContext({ onlineEvalConfigs: [] });
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue(makeResolvedAgent());
+
+    const result = await handleLogsEval({});
+
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('No deployed online eval configs found');
+  });
+
+  it('returns error when online eval configs exist but none are deployed', async () => {
+    const ctx = makeContext({ deployedConfigId: '' });
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue(makeResolvedAgent());
+
+    const result = await handleLogsEval({});
+
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('No deployed online eval configs found');
+  });
+
+  it('searches logs with time range when --since is specified', async () => {
+    const ctx = makeContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue(makeResolvedAgent());
+
+    async function* emptyGenerator() {
+      // no events
+    }
+    mockSearchLogs.mockReturnValue(emptyGenerator());
+
+    const result = await handleLogsEval({ since: '1h' });
+
+    expect(result.success).toBe(true);
+    expect(mockSearchLogs).toHaveBeenCalledWith(
+      expect.objectContaining({
+        logGroupName: '/aws/bedrock-agentcore/evaluations/results/cfg-123',
+        region: 'us-east-1',
+      })
+    );
+    expect(mockStreamLogs).not.toHaveBeenCalled();
+  });
+
+  it('streams logs by default when no time range is specified', async () => {
+    const ctx = makeContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue(makeResolvedAgent());
+
+    async function* emptyGenerator() {
+      // no events
+    }
+    mockStreamLogs.mockReturnValue(emptyGenerator());
+
+    // eslint-disable-next-line @typescript-eslint/no-empty-function
+    const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
+
+    const result = await handleLogsEval({});
+
+    expect(result.success).toBe(true);
+    expect(mockStreamLogs).toHaveBeenCalledWith(
+      expect.objectContaining({
+        logGroupName: '/aws/bedrock-agentcore/evaluations/results/cfg-123',
+        region: 'us-east-1',
+      })
+    );
+    expect(mockSearchLogs).not.toHaveBeenCalled();
+
+    consoleSpy.mockRestore();
+  });
+
+  it('skips ResourceNotFoundException during search', async () => {
+    const ctx = makeContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue(makeResolvedAgent());
+
+    // eslint-disable-next-line require-yield, @typescript-eslint/require-await
+    async function* throwNotFound(): AsyncGenerator<never> {
+      const err = new Error('Log group not found');
+      (err as Error & { name: string }).name = 'ResourceNotFoundException';
+      throw err;
+    }
+    mockSearchLogs.mockReturnValue(throwNotFound());
+
+    const result = await handleLogsEval({ since: '1h' });
+
+    expect(result.success).toBe(true);
+  });
+
+  it('resolves correct log group path from deployed config', async () => {
+    const ctx = makeContext({ deployedConfigId: 'my-custom-config-id' });
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue(makeResolvedAgent());
+
+    async function* emptyGenerator() {
+      // no events
+    }
+    mockSearchLogs.mockReturnValue(emptyGenerator());
+
+    await handleLogsEval({ since: '1h' });
+
+    expect(mockSearchLogs).toHaveBeenCalledWith(
+      expect.objectContaining({
+        logGroupName: '/aws/bedrock-agentcore/evaluations/results/my-custom-config-id',
+      })
+    );
+  });
+
+  it('uses log group name from API when available', async () => {
+    const ctx = makeContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue(makeResolvedAgent());
+
+    mockGetOnlineEvaluationConfig.mockResolvedValue({
+      configId: 'cfg-123',
+      configName: 'eval-config',
+      status: 'ACTIVE',
+      executionStatus: 'ENABLED',
+      outputLogGroupName: '/custom/log/group/from-api',
+    });
+
+    async function* emptyGenerator() {
+      // no events
+    }
+    mockSearchLogs.mockReturnValue(emptyGenerator());
+
+    await handleLogsEval({ since: '1h' });
+
+    expect(mockSearchLogs).toHaveBeenCalledWith(
+      expect.objectContaining({
+        logGroupName: '/custom/log/group/from-api',
+      })
+    );
+  });
+
+  it('falls back to convention-based log group when API call fails', async () => {
+    const ctx = makeContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue(makeResolvedAgent());
+
+    mockGetOnlineEvaluationConfig.mockRejectedValue(new Error('AccessDenied'));
+
+    async function* emptyGenerator() {
+      // no events
+    }
+    mockSearchLogs.mockReturnValue(emptyGenerator());
+
+    await handleLogsEval({ since: '1h' });
+
+    expect(mockSearchLogs).toHaveBeenCalledWith(
+      expect.objectContaining({
+        logGroupName: '/aws/bedrock-agentcore/evaluations/results/cfg-123',
+      })
+    );
+  });
+
+  it('surfaces failure reason from config in failed state', async () => {
+    const ctx = makeContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue(makeResolvedAgent());
+
+    mockGetOnlineEvaluationConfig.mockResolvedValue({
+      configId: 'cfg-123',
+      configName: 'eval-config',
+      status: 'CREATE_FAILED',
+      executionStatus: 'DISABLED',
+      failureReason: 'IAM role does not exist',
+      outputLogGroupName: '/aws/bedrock-agentcore/evaluations/results/cfg-123',
+    });
+
+    async function* emptyGenerator() {
+      // no events
+    }
+    mockSearchLogs.mockReturnValue(emptyGenerator());
+
+    // eslint-disable-next-line @typescript-eslint/no-empty-function
+    const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
+
+    await handleLogsEval({ since: '1h' });
+
+    expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('IAM role does not exist'));
+    consoleSpy.mockRestore();
+  });
+});
diff --git a/src/cli/operations/eval/__tests__/pause-resume.test.ts b/src/cli/operations/eval/__tests__/pause-resume.test.ts
new file mode 100644
index 00000000..085f7326
--- /dev/null
+++ b/src/cli/operations/eval/__tests__/pause-resume.test.ts
@@ -0,0 +1,122 @@
+import { handlePauseResume } from '../pause-resume.js';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+const mockLoadDeployedProjectConfig = vi.fn();
+const mockUpdateOnlineEvalExecutionStatus = vi.fn();
+
+vi.mock('../../resolve-agent', () => ({
+  loadDeployedProjectConfig: () => mockLoadDeployedProjectConfig(),
+}));
+
+vi.mock('../../../aws/agentcore-control', () => ({
+  updateOnlineEvalExecutionStatus: (...args: unknown[]) => mockUpdateOnlineEvalExecutionStatus(...args),
+}));
+
+function makeContext(configName: string, configId: string, targetName = 'dev', region = 'us-east-1') {
+  return {
+    project: {},
+    awsTargets: [{ name: targetName, region, account: '123456789012' }],
+    deployedState: {
+      targets: {
+        [targetName]: {
+          resources: {
+            onlineEvalConfigs: {
+              [configName]: {
+                onlineEvaluationConfigId: configId,
+                onlineEvaluationConfigArn: `arn:aws:bedrock:${region}:123456789012:online-evaluation-config/${configId}`,
+              },
+            },
+          },
+        },
+      },
+    },
+  };
+}
+
+describe('handlePauseResume', () => {
+  afterEach(() => vi.clearAllMocks());
+
+  it('pauses an online eval config', async () => {
+    mockLoadDeployedProjectConfig.mockResolvedValue(makeContext('my-config', 'cfg-123'));
+    mockUpdateOnlineEvalExecutionStatus.mockResolvedValue({
+      configId: 'cfg-123',
+      executionStatus: 'DISABLED',
+      status: 'ACTIVE',
+    });
+
+    const result = await handlePauseResume({ name: 'my-config' }, 'pause');
+
+    expect(result.success).toBe(true);
+    expect(result.executionStatus).toBe('DISABLED');
+    expect(mockUpdateOnlineEvalExecutionStatus).toHaveBeenCalledWith({
+      region: 'us-east-1',
+      onlineEvaluationConfigId: 'cfg-123',
+      executionStatus: 'DISABLED',
+    });
+  });
+
+  it('resumes an online eval config', async () => {
+    mockLoadDeployedProjectConfig.mockResolvedValue(makeContext('my-config', 'cfg-123'));
+    mockUpdateOnlineEvalExecutionStatus.mockResolvedValue({
+      configId: 'cfg-123',
+      executionStatus: 'ENABLED',
+      status: 'ACTIVE',
+    });
+
+    const result = await handlePauseResume({ name: 'my-config' }, 'resume');
+
+    expect(result.success).toBe(true);
+    expect(result.executionStatus).toBe('ENABLED');
+    expect(mockUpdateOnlineEvalExecutionStatus).toHaveBeenCalledWith({
+      region: 'us-east-1',
+      onlineEvaluationConfigId: 'cfg-123',
+      executionStatus: 'ENABLED',
+    });
+  });
+
+  it('returns error when no deployed targets exist', async () => {
+    mockLoadDeployedProjectConfig.mockResolvedValue({
+      project: {},
+      awsTargets: [],
+      deployedState: { targets: {} },
+    });
+
+    const result = await handlePauseResume({ name: 'my-config' }, 'pause');
+
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('No deployed targets found');
+  });
+
+  it('returns error when config name is not found in deployed state', async () => {
+    mockLoadDeployedProjectConfig.mockResolvedValue(makeContext('other-config', 'cfg-999'));
+
+    const result = await handlePauseResume({ name: 'missing-config' }, 'pause');
+
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('missing-config');
+    expect(result.error).toContain('not found');
+  });
+
+  it('returns error when target config is missing from aws-targets', async () => {
+    const context = makeContext('my-config', 'cfg-123');
+    // Remove the target from awsTargets but keep it in deployedState
+    context.awsTargets = [];
+    mockLoadDeployedProjectConfig.mockResolvedValue(context);
+
+    const result = await handlePauseResume({ name: 'my-config' }, 'pause');
+
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('Target config');
+    expect(result.error).toContain('not found');
+  });
+
+  it('returns error when the SDK call fails', async () => {
+    mockLoadDeployedProjectConfig.mockResolvedValue(makeContext('my-config', 'cfg-123'));
+    mockUpdateOnlineEvalExecutionStatus.mockRejectedValue(new Error('Service unavailable'));
+
+    const result = await handlePauseResume({ name: 'my-config' }, 'pause');
+
+    expect(result.success).toBe(false);
+    expect(result.error).toBe('Service unavailable');
+  });
+});
diff --git a/src/cli/operations/eval/__tests__/run-eval.test.ts b/src/cli/operations/eval/__tests__/run-eval.test.ts
new file mode 100644
index 00000000..4ce1a9ae
--- /dev/null
+++ b/src/cli/operations/eval/__tests__/run-eval.test.ts
@@ -0,0 +1,940 @@
+import { handleRunEval } from '../run-eval.js';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+// ─── Mocks ────────────────────────────────────────────────────────────────────
+
+const mockResolveAgent = vi.fn();
+const mockLoadDeployedProjectConfig = vi.fn();
+const mockEvaluate = vi.fn();
+const mockGetEvaluator = vi.fn();
+const mockSaveEvalRun = vi.fn();
+const mockGenerateFilename = vi.fn();
+const mockSend = vi.fn();
+const mockGetCredentialProvider = vi.fn().mockReturnValue({});
+const mockWriteFileSync = vi.fn();
+
+vi.mock('../../resolve-agent', () => ({
+  loadDeployedProjectConfig: () => mockLoadDeployedProjectConfig(),
+  resolveAgent: (...args: unknown[]) => mockResolveAgent(...args),
+}));
+
+vi.mock('../../../aws/agentcore', () => ({
+  evaluate: (...args: unknown[]) => mockEvaluate(...args),
+}));
+
+vi.mock('../../../aws/agentcore-control', () => ({
+  getEvaluator: (...args: unknown[]) => mockGetEvaluator(...args),
+}));
+
+vi.mock('../../../aws', () => ({
+  getCredentialProvider: () => mockGetCredentialProvider(),
+}));
+
+vi.mock('../storage', () => ({
+  generateFilename: (...args: unknown[]) => mockGenerateFilename(...args),
+  saveEvalRun: (...args: unknown[]) => mockSaveEvalRun(...args),
+}));
+
+vi.mock('fs', async importOriginal => {
+  const original = await importOriginal<typeof import('fs')>();
+  return {
+    ...original,
+    writeFileSync: (...args: unknown[]) => mockWriteFileSync(...args),
+  };
+});
+
+vi.mock('@aws-sdk/client-cloudwatch-logs', () => ({
+  CloudWatchLogsClient: class {
+    send = mockSend;
+  },
+  StartQueryCommand: class {
+    constructor(public input: unknown) {}
+  },
+  GetQueryResultsCommand: class {
+    constructor(public input: unknown) {}
+  },
+}));
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function makeDeployedContext({
+  agentName = 'my-agent',
+  runtimeId = 'rt-123',
+  evaluators = {} as Record<string, { evaluatorId: string }>,
+} = {}) {
+  return {
+    project: {
+      agents: [{ name: agentName }],
+      onlineEvalConfigs: [],
+    },
+    awsTargets: [{ name: 'dev', region: 'us-east-1', account: '111222333444' }],
+    deployedState: {
+      targets: {
+        dev: {
+          resources: {
+            agents: {
+              [agentName]: {
+                runtimeId,
+                runtimeArn: `arn:aws:bedrock:us-east-1:111222333444:agent-runtime/${runtimeId}`,
+                roleArn: 'arn:aws:iam::111222333444:role/test',
+              },
+            },
+            evaluators,
+          },
+        },
+      },
+    },
+  };
+}
+
+function makeOtelSpanRow(sessionId: string, traceId: string, spanBody: Record<string, unknown> = {}) {
+  const message = JSON.stringify({
+    scope: { name: 'strands.telemetry.tracer' },
+    body: spanBody,
+    traceId,
+  });
+  return [
+    { field: '@message', value: message },
+    { field: 'sessionId', value: sessionId },
+    { field: 'traceId', value: traceId },
+  ];
+}
+
+function makeToolCallSpanRow(sessionId: string, traceId: string, spanId: string, toolName: string) {
+  const message = JSON.stringify({
+    scope: { name: 'strands.telemetry.tracer' },
+    traceId,
+    spanId,
+    kind: 'CLIENT',
+    attributes: { 'gen_ai.tool.name': toolName },
+  });
+  return [
+    { field: '@message', value: message },
+    { field: 'sessionId', value: sessionId },
+    { field: 'traceId', value: traceId },
+  ];
+}
+
+function setupCloudWatchToReturn(spanRows: unknown[][], runtimeLogRows: unknown[][] = []) {
+  let queryCount = 0;
+  mockSend.mockImplementation((cmd: { input: unknown }) => {
+    const input = cmd.input as Record<string, unknown>;
+
+    if ('queryString' in input) {
+      // StartQueryCommand
+      queryCount++;
+      return Promise.resolve({ queryId: `q-${queryCount}` });
+    }
+
+    // GetQueryResultsCommand — return Complete immediately
+    if (queryCount === 1) {
+      return Promise.resolve({ status: 'Complete', results: spanRows });
+    }
+    return Promise.resolve({ status: 'Complete', results: runtimeLogRows });
+  });
+}
+
+describe('handleRunEval', () => {
+  beforeEach(() => {
+    mockGenerateFilename.mockReturnValue('eval_2025-01-15_10-00-00');
+    mockSaveEvalRun.mockReturnValue('/tmp/eval-results/eval_2025-01-15_10-00-00.json');
+  });
+
+  afterEach(() => vi.clearAllMocks());
+
+  // ─── Context resolution ───────────────────────────────────────────────────
+
+  it('returns error when agent resolution fails', async () => {
+    mockLoadDeployedProjectConfig.mockResolvedValue({});
+    mockResolveAgent.mockReturnValue({ success: false, error: 'No agents defined' });
+
+    const result = await handleRunEval({ evaluator: ['Builtin.GoalSuccessRate'], days: 7 });
+
+    expect(result.success).toBe(false);
+    expect(result.error).toBe('No agents defined');
+  });
+
+  it('returns error when a custom evaluator is not found in deployed state', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    const result = await handleRunEval({ evaluator: ['MissingEval'], days: 7 });
+
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('MissingEval');
+    expect(result.error).toContain('not found in deployed state');
+  });
+
+  it('resolves builtin evaluators without deployed state lookup', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    // No spans found — will return before calling evaluate
+    setupCloudWatchToReturn([]);
+
+    const result = await handleRunEval({ evaluator: ['Builtin.GoalSuccessRate'], days: 7 });
+
+    // Fails because no spans, but NOT because evaluator wasn't found
+    expect(result.error).toContain('No session spans found');
+  });
+
+  it('resolves custom evaluator name to deployed evaluator ID', async () => {
+    const ctx = makeDeployedContext({
+      evaluators: { MyCustomEval: { evaluatorId: 'eval-custom-id' } },
+    });
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    const spanRows = [makeOtelSpanRow('session-1', 'trace-1')];
+    setupCloudWatchToReturn(spanRows);
+
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 'session-1' } } }],
+    });
+
+    const result = await handleRunEval({ evaluator: ['MyCustomEval'], days: 7 });
+
+    expect(result.success).toBe(true);
+    expect(mockEvaluate).toHaveBeenCalledWith(expect.objectContaining({ evaluatorId: 'eval-custom-id' }));
+  });
+
+  it('extracts evaluator ID from ARN when --evaluator-arn is passed', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    const spanRows = [makeOtelSpanRow('session-1', 'trace-1')];
+    setupCloudWatchToReturn(spanRows);
+
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 3.0, context: { spanContext: { sessionId: 'session-1' } } }],
+    });
+
+    const result = await handleRunEval({
+      evaluator: [],
+      evaluatorArn: ['arn:aws:bedrock:us-east-1:123:evaluator/my-eval-id'],
+      days: 7,
+    });
+
+    expect(result.success).toBe(true);
+    expect(mockEvaluate).toHaveBeenCalledWith(expect.objectContaining({ evaluatorId: 'my-eval-id' }));
+  });
+
+  // ─── No sessions ──────────────────────────────────────────────────────────
+
+  it('returns error when no session spans are found', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    setupCloudWatchToReturn([]);
+
+    const result = await handleRunEval({ evaluator: ['Builtin.GoalSuccessRate'], days: 7 });
+
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('No session spans found');
+    expect(result.error).toContain('my-agent');
+  });
+
+  // ─── Successful evaluation ────────────────────────────────────────────────
+
+  it('runs evaluation across sessions and computes aggregate score', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    const spanRows = [makeOtelSpanRow('session-1', 'trace-1'), makeOtelSpanRow('session-2', 'trace-2')];
+    setupCloudWatchToReturn(spanRows);
+
+    mockEvaluate
+      .mockResolvedValueOnce({
+        evaluationResults: [
+          {
+            value: 4.0,
+            context: { spanContext: { sessionId: 'session-1', traceId: 'trace-1' } },
+            tokenUsage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
+          },
+        ],
+      })
+      .mockResolvedValueOnce({
+        evaluationResults: [
+          {
+            value: 2.0,
+            context: { spanContext: { sessionId: 'session-2', traceId: 'trace-2' } },
+            tokenUsage: { inputTokens: 80, outputTokens: 40, totalTokens: 120 },
+          },
+        ],
+      });
+
+    const result = await handleRunEval({ evaluator: ['Builtin.GoalSuccessRate'], days: 7 });
+
+    expect(result.success).toBe(true);
+    expect(result.run).toBeDefined();
+    expect(result.run!.sessionCount).toBe(2);
+    expect(result.run!.results).toHaveLength(1);
+
+    const evalResult = result.run!.results[0]!;
+    expect(evalResult.aggregateScore).toBe(3.0); // (4 + 2) / 2
+    expect(evalResult.sessionScores).toHaveLength(2);
+    expect(evalResult.tokenUsage).toEqual({ inputTokens: 180, outputTokens: 90, totalTokens: 270 });
+  });
+
+  it('excludes errored sessions from aggregate score', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    const spanRows = [makeOtelSpanRow('session-1', 'trace-1')];
+    setupCloudWatchToReturn(spanRows);
+
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [
+        { value: 5.0, context: { spanContext: { sessionId: 's1' } } },
+        { value: 0, errorMessage: 'something failed', context: { spanContext: { sessionId: 's2' } } },
+      ],
+    });
+
+    const result = await handleRunEval({ evaluator: ['Builtin.GoalSuccessRate'], days: 7 });
+
+    expect(result.success).toBe(true);
+    const evalResult = result.run!.results[0]!;
+    // Only the non-errored session (value 5.0) should be in the aggregate
+    expect(evalResult.aggregateScore).toBe(5.0);
+    expect(evalResult.sessionScores).toHaveLength(2);
+  });
+
+  // ─── Output handling ──────────────────────────────────────────────────────
+
+  it('saves to default location when no output option', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]);
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 3.0, context: { spanContext: { sessionId: 's1' } } }],
+    });
+
+    const result = await handleRunEval({ evaluator: ['Builtin.GoalSuccessRate'], days: 7 });
+
+    expect(result.success).toBe(true);
+    expect(mockSaveEvalRun).toHaveBeenCalled();
+    expect(mockWriteFileSync).not.toHaveBeenCalled();
+    expect(result.filePath).toBe('/tmp/eval-results/eval_2025-01-15_10-00-00.json');
+  });
+
+  it('writes to custom output path when --output is specified', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]);
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 3.0, context: { spanContext: { sessionId: 's1' } } }],
+    });
+
+    const result = await handleRunEval({
+      evaluator: ['Builtin.GoalSuccessRate'],
+      days: 7,
+      output: '/tmp/my-output.json',
+    });
+
+    expect(result.success).toBe(true);
+    expect(mockWriteFileSync).toHaveBeenCalledWith('/tmp/my-output.json', expect.any(String));
+    expect(mockSaveEvalRun).not.toHaveBeenCalled();
+    expect(result.filePath).toBe('/tmp/my-output.json');
+  });
+
+  // ─── Multiple evaluators ─────────────────────────────────────────────────
+
+  it('runs multiple evaluators and returns separate results for each', async () => {
+    const ctx = makeDeployedContext({
+      evaluators: { CustomEval: { evaluatorId: 'eval-custom' } },
+    });
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]);
+
+    mockEvaluate
+      .mockResolvedValueOnce({
+        evaluationResults: [{ value: 0.9, context: { spanContext: { sessionId: 's1' } } }],
+      })
+      .mockResolvedValueOnce({
+        evaluationResults: [{ value: 4.5, context: { spanContext: { sessionId: 's1' } } }],
+      });
+
+    const result = await handleRunEval({
+      evaluator: ['Builtin.GoalSuccessRate', 'CustomEval'],
+      days: 7,
+    });
+
+    expect(result.success).toBe(true);
+    expect(result.run!.results).toHaveLength(2);
+    expect(result.run!.results[0]!.evaluator).toBe('Builtin.GoalSuccessRate');
+    expect(result.run!.results[0]!.aggregateScore).toBe(0.9);
+    expect(result.run!.results[1]!.evaluator).toBe('CustomEval');
+    expect(result.run!.results[1]!.aggregateScore).toBe(4.5);
+  });
+
+  // ─── ARN mode ─────────────────────────────────────────────────────────────
+
+  it('resolves context from agent runtime ARN without project config', async () => {
+    setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]);
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 's1' } } }],
+    });
+
+    const result = await handleRunEval({
+      agentArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:runtime/rt-arn-test',
+      evaluator: ['Builtin.Helpfulness'],
+      evaluatorArn: [],
+      days: 3,
+    });
+
+    expect(result.success).toBe(true);
+    expect(result.run!.agent).toBe('rt-arn-test');
+    expect(mockLoadDeployedProjectConfig).not.toHaveBeenCalled();
+    expect(mockResolveAgent).not.toHaveBeenCalled();
+  });
+
+  it('uses --region override in ARN mode', async () => {
+    setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]);
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 3.0, context: { spanContext: { sessionId: 's1' } } }],
+    });
+
+    const result = await handleRunEval({
+      agentArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:runtime/rt-region-test',
+      evaluator: ['Builtin.Helpfulness'],
+      region: 'eu-west-1',
+      days: 7,
+    });
+
+    expect(result.success).toBe(true);
+    // Should not load project config
+    expect(mockLoadDeployedProjectConfig).not.toHaveBeenCalled();
+  });
+
+  it('resolves evaluator ARNs in ARN mode', async () => {
+    setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]);
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 5.0, context: { spanContext: { sessionId: 's1' } } }],
+    });
+
+    const result = await handleRunEval({
+      agentArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/rt-abc',
+      evaluator: [],
+      evaluatorArn: ['arn:aws:bedrock-agentcore:us-east-1:123456789012:evaluator/eval-xyz'],
+      days: 7,
+    });
+
+    expect(result.success).toBe(true);
+    expect(mockEvaluate).toHaveBeenCalledWith(expect.objectContaining({ evaluatorId: 'eval-xyz' }));
+  });
+
+  it('returns error for invalid ARN format', async () => {
+    const result = await handleRunEval({
+      agentArn: 'not-an-arn',
+      evaluator: ['Builtin.Helpfulness'],
+      days: 7,
+    });
+
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('Invalid agent runtime ARN');
+  });
+
+  it('rejects custom evaluator names in ARN mode', async () => {
+    const result = await handleRunEval({
+      agentArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/rt-abc',
+      evaluator: ['MyCustomEval'],
+      days: 7,
+    });
+
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('cannot be resolved in ARN mode');
+  });
+
+  it('saves to cwd in ARN mode when no --output is specified', async () => {
+    setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]);
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 's1' } } }],
+    });
+
+    const result = await handleRunEval({
+      agentArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/rt-save-test',
+      evaluator: ['Builtin.Helpfulness'],
+      days: 7,
+    });
+
+    expect(result.success).toBe(true);
+    // Should write to cwd, not call saveEvalRun (which requires a project)
+    expect(mockSaveEvalRun).not.toHaveBeenCalled();
+    expect(mockWriteFileSync).toHaveBeenCalledWith(
+      expect.stringContaining('eval_2025-01-15_10-00-00.json'),
+      expect.any(String)
+    );
+    expect(result.filePath).toContain('eval_2025-01-15_10-00-00.json');
+  });
+
+  it('saves to --output path in ARN mode', async () => {
+    setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]);
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 's1' } } }],
+    });
+
+    const result = await handleRunEval({
+      agentArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/rt-output-test',
+      evaluator: ['Builtin.Helpfulness'],
+      days: 7,
+      output: '/tmp/custom-eval.json',
+    });
+
+    expect(result.success).toBe(true);
+    expect(mockWriteFileSync).toHaveBeenCalledWith('/tmp/custom-eval.json', expect.any(String));
+    expect(result.filePath).toBe('/tmp/custom-eval.json');
+  });
+
+  it('returns error when no evaluators in ARN mode', async () => {
+    const result = await handleRunEval({
+      agentArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/rt-abc',
+      evaluator: [],
+      days: 7,
+    });
+
+    expect(result.success).toBe(false);
+    expect(result.error).toContain('No evaluators specified');
+  });
+
+  // ─── Evaluator-level grouping ────────────────────────────────────────────
+
+  it('sends targetTraceIds for TRACE-level builtin evaluators', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    const spanRows = [makeOtelSpanRow('session-1', 'trace-1'), makeOtelSpanRow('session-1', 'trace-2')];
+    setupCloudWatchToReturn(spanRows);
+
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 'session-1', traceId: 'trace-1' } } }],
+    });
+
+    // Builtin.Helpfulness is TRACE-level
+    const result = await handleRunEval({ evaluator: ['Builtin.Helpfulness'], days: 7 });
+
+    expect(result.success).toBe(true);
+    expect(mockEvaluate).toHaveBeenCalledWith(
+      expect.objectContaining({
+        targetTraceIds: expect.arrayContaining(['trace-1', 'trace-2']),
+      })
+    );
+  });
+
+  it('does not send targetTraceIds for SESSION-level evaluators', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    setupCloudWatchToReturn([makeOtelSpanRow('session-1', 'trace-1')]);
+
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 3.0, context: { spanContext: { sessionId: 'session-1' } } }],
+    });
+
+    // Builtin.GoalSuccessRate is SESSION-level
+    const result = await handleRunEval({ evaluator: ['Builtin.GoalSuccessRate'], days: 7 });
+
+    expect(result.success).toBe(true);
+    expect(mockEvaluate).toHaveBeenCalledWith(
+      expect.objectContaining({
+        targetTraceIds: undefined,
+        targetSpanIds: undefined,
+      })
+    );
+  });
+
+  it('sends targetSpanIds for TOOL_CALL-level evaluators', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    const spanRows = [makeToolCallSpanRow('session-1', 'trace-1', 'span-tool-1', 'calculator')];
+    setupCloudWatchToReturn(spanRows);
+
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 5.0, context: { spanContext: { sessionId: 'session-1', spanId: 'span-tool-1' } } }],
+    });
+
+    // Builtin.ToolSelectionAccuracy is TOOL_CALL-level
+    const result = await handleRunEval({ evaluator: ['Builtin.ToolSelectionAccuracy'], days: 7 });
+
+    expect(result.success).toBe(true);
+    expect(mockEvaluate).toHaveBeenCalledWith(
+      expect.objectContaining({
+        targetSpanIds: ['span-tool-1'],
+      })
+    );
+  });
+
+  it('batches targetSpanIds into chunks of 10 for TOOL_CALL evaluators', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    // Create 12 tool call spans in one session
+    const spanRows = Array.from({ length: 12 }, (_, i) =>
+      makeToolCallSpanRow('session-1', 'trace-1', `span-tool-${i}`, `tool-${i}`)
+    );
+    setupCloudWatchToReturn(spanRows);
+
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 5.0, context: { spanContext: { sessionId: 'session-1' } } }],
+    });
+
+    const result = await handleRunEval({ evaluator: ['Builtin.ToolSelectionAccuracy'], days: 7 });
+
+    expect(result.success).toBe(true);
+    // Should be called twice: first batch of 10, second batch of 2
+    expect(mockEvaluate).toHaveBeenCalledTimes(2);
+    expect(mockEvaluate).toHaveBeenCalledWith(
+      expect.objectContaining({
+        targetSpanIds: expect.arrayContaining(['span-tool-0']) as string[],
+      })
+    );
+
+    const firstCallSpanIds = (mockEvaluate.mock.calls[0] as [{ targetSpanIds: string[] }])[0].targetSpanIds;
+    const secondCallSpanIds = (mockEvaluate.mock.calls[1] as [{ targetSpanIds: string[] }])[0].targetSpanIds;
+    expect(firstCallSpanIds).toHaveLength(10);
+    expect(secondCallSpanIds).toHaveLength(2);
+  });
+
+  it('fetches level from API for custom evaluators', async () => {
+    const ctx = makeDeployedContext({
+      evaluators: { MyTraceEval: { evaluatorId: 'eval-trace-custom' } },
+    });
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    // Mock getEvaluator to return TRACE level for the custom evaluator
+    mockGetEvaluator.mockResolvedValue({
+      evaluatorId: 'eval-trace-custom',
+      evaluatorName: 'MyTraceEval',
+      level: 'TRACE',
+      status: 'ACTIVE',
+    });
+
+    setupCloudWatchToReturn([makeOtelSpanRow('session-1', 'trace-1')]);
+
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 'session-1', traceId: 'trace-1' } } }],
+    });
+
+    const result = await handleRunEval({ evaluator: ['MyTraceEval'], days: 7 });
+
+    expect(result.success).toBe(true);
+    expect(mockGetEvaluator).toHaveBeenCalledWith(expect.objectContaining({ evaluatorId: 'eval-trace-custom' }));
+    expect(mockEvaluate).toHaveBeenCalledWith(
+      expect.objectContaining({
+        targetTraceIds: ['trace-1'],
+      })
+    );
+  });
+
+  it('defaults to SESSION level when getEvaluator fails for custom evaluator', async () => {
+    const ctx = makeDeployedContext({
+      evaluators: { FailingEval: { evaluatorId: 'eval-failing' } },
+    });
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    mockGetEvaluator.mockRejectedValue(new Error('Not found'));
+
+    setupCloudWatchToReturn([makeOtelSpanRow('session-1', 'trace-1')]);
+
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 3.0, context: { spanContext: { sessionId: 'session-1' } } }],
+    });
+
+    const result = await handleRunEval({ evaluator: ['FailingEval'], days: 7 });
+
+    expect(result.success).toBe(true);
+    // Should default to SESSION (no target IDs)
+    expect(mockEvaluate).toHaveBeenCalledWith(
+      expect.objectContaining({
+        targetTraceIds: undefined,
+        targetSpanIds: undefined,
+      })
+    );
+  });
+
+  // ─── Session/trace filtering ─────────────────────────────────────────────
+
+  function getFirstQueryString(): string {
+    const call = mockSend.mock.calls.find((c: unknown[]) => {
+      const input = (c[0] as { input?: { queryString?: string } }).input;
+      return input?.queryString !== undefined;
+    });
+    return (call![0] as { input: { queryString: string } }).input.queryString;
+  }
+
+  it('filters CloudWatch query by --session-id', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    setupCloudWatchToReturn([makeOtelSpanRow('session-abc', 'trace-1')]);
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 'session-abc' } } }],
+    });
+
+    const result = await handleRunEval({
+      evaluator: ['Builtin.GoalSuccessRate'],
+      days: 7,
+      sessionId: 'session-abc',
+    });
+
+    expect(result.success).toBe(true);
+    const query = getFirstQueryString();
+    expect(query).toContain("filter attributes.session.id = 'session-abc'");
+  });
+
+  it('filters CloudWatch query by --trace-id', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    setupCloudWatchToReturn([makeOtelSpanRow('session-1', 'trace-xyz')]);
+    mockEvaluate.mockResolvedValue({
+      evaluationResults: [{ value: 3.0, context: { spanContext: { sessionId: 'session-1', traceId: 'trace-xyz' } } }],
+    });
+
+    const result = await handleRunEval({
+      evaluator: ['Builtin.GoalSuccessRate'],
+      days: 7,
+      traceId: 'trace-xyz',
+    });
+
+    expect(result.success).toBe(true);
+    const query = getFirstQueryString();
+    expect(query).toContain("filter traceId = 'trace-xyz'");
+  });
+
+  it('sanitizes --session-id and --trace-id values', async () => {
+    const ctx = makeDeployedContext();
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: 'rt-123',
+      },
+    });
+
+    setupCloudWatchToReturn([]);
+
+    await handleRunEval({
+      evaluator: ['Builtin.GoalSuccessRate'],
+      days: 7,
+      sessionId: "sess'; DROP TABLE--",
+      traceId: "trace'; DROP TABLE--",
+    });
+
+    const query = getFirstQueryString();
+    expect(query).toContain("filter attributes.session.id = 'sess; DROP TABLE--'");
+    expect(query).toContain("filter traceId = 'trace; DROP TABLE--'");
+    expect(query).not.toContain("sess'");
+  });
+
+  // ─── Query sanitization ───────────────────────────────────────────────────
+
+  it('sanitizes runtimeId in CloudWatch query to prevent injection', async () => {
+    const ctx = makeDeployedContext({ runtimeId: "rt-123'; DROP TABLE" });
+    mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
+    mockResolveAgent.mockReturnValue({
+      success: true,
+      agent: {
+        agentName: 'my-agent',
+        targetName: 'dev',
+        region: 'us-east-1',
+        accountId: '111222333444',
+        runtimeId: "rt-123'; DROP TABLE",
+      },
+    });
+
+    setupCloudWatchToReturn([]);
+
+    await handleRunEval({ evaluator: ['Builtin.GoalSuccessRate'], days: 7 });
+
+    const queryString = getFirstQueryString();
+    expect(queryString).not.toContain("'rt-123'; DROP TABLE'");
+    expect(queryString).toContain('rt-123; DROP TABLE');
+  });
+});
diff --git a/src/cli/operations/eval/__tests__/storage.test.ts b/src/cli/operations/eval/__tests__/storage.test.ts
new file mode 100644
index 00000000..1ce0fbe6
--- /dev/null
+++ b/src/cli/operations/eval/__tests__/storage.test.ts
@@ -0,0 +1,167 @@
+import { generateFilename, getResultsPath, listEvalRuns, loadEvalRun, saveEvalRun } from '../storage.js';
+import type { EvalRunResult } from '../types.js';
+// Use real fs via a temp directory
+import { existsSync, mkdirSync, rmSync } from 'fs';
+import { tmpdir } from 'os';
+import { join } from 'path';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+const mockFindConfigRoot = vi.fn();
+
+vi.mock('../../../../lib', () => ({
+  findConfigRoot: () => mockFindConfigRoot(),
+}));
+
+function makeTmpDir(): string {
+  const dir = join(tmpdir(), `eval-storage-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+function makeRunResult(overrides: Partial<EvalRunResult> = {}): EvalRunResult {
+  return {
+    timestamp: '2025-01-15T10:00:00.000Z',
+    agent: 'test-agent',
+    evaluators: ['Builtin.GoalSuccessRate'],
+    lookbackDays: 7,
+    sessionCount: 3,
+    results: [
+      {
+        evaluator: 'Builtin.GoalSuccessRate',
+        aggregateScore: 0.85,
+        sessionScores: [{ sessionId: 's1', value: 0.85 }],
+      },
+    ],
+    ...overrides,
+  };
+}
+
+describe('storage', () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+    mockFindConfigRoot.mockReturnValue(tmpDir);
+  });
+
+  afterEach(() => {
+    if (existsSync(tmpDir)) {
+      rmSync(tmpDir, { recursive: true, force: true });
+    }
+    vi.clearAllMocks();
+  });
+
+  describe('generateFilename', () => {
+    it('returns a string starting with eval_', () => {
+      const name = generateFilename('2025-01-15T10:30:45.000Z');
+      expect(name).toMatch(/^eval_\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}$/);
+    });
+
+    it('formats timestamp correctly', () => {
+      const name = generateFilename('2025-03-05T08:05:09.000Z');
+      expect(name).toBe('eval_2025-03-05_08-05-09');
+    });
+  });
+
+  describe('saveEvalRun', () => {
+    it('creates eval-results directory and writes JSON file', () => {
+      const run = makeRunResult();
+      const filePath = saveEvalRun(run);
+
+      expect(filePath).toContain('eval-results');
+      expect(filePath).toContain('eval_2025-01-15');
+      expect(filePath.endsWith('.json')).toBe(true);
+      expect(existsSync(filePath)).toBe(true);
+    });
+
+    it('writes valid JSON that can be read back', () => {
+      const run = makeRunResult();
+      saveEvalRun(run);
+      const filename = generateFilename(run.timestamp);
+      const loaded = loadEvalRun(filename);
+      expect(loaded).toEqual(run);
+    });
+  });
+
+  describe('loadEvalRun', () => {
+    it('loads a previously saved run', () => {
+      const run = makeRunResult({ agent: 'my-agent' });
+      saveEvalRun(run);
+
+      const filename = generateFilename(run.timestamp);
+      const loaded = loadEvalRun(filename);
+      expect(loaded.agent).toBe('my-agent');
+      expect(loaded.results).toHaveLength(1);
+    });
+
+    it('accepts filename with .json extension', () => {
+      const run = makeRunResult();
+      saveEvalRun(run);
+
+      const filename = generateFilename(run.timestamp);
+      const loaded = loadEvalRun(`${filename}.json`);
+      expect(loaded).toEqual(run);
+    });
+
+    it('throws for a non-existent filename', () => {
+      expect(() => loadEvalRun('eval_2099-01-01_00-00-00')).toThrow('not found');
+    });
+  });
+
+  describe('listEvalRuns', () => {
+    it('returns empty array when eval-results dir does not exist', () => {
+      // Point to a dir with no eval-results subdirectory
+      const emptyDir = makeTmpDir();
+      mockFindConfigRoot.mockReturnValue(emptyDir);
+
+      expect(listEvalRuns()).toEqual([]);
+
+      rmSync(emptyDir, { recursive: true, force: true });
+    });
+
+    it('returns saved runs', () => {
+      saveEvalRun(makeRunResult({ timestamp: '2025-01-15T10:00:00.000Z' }));
+      saveEvalRun(makeRunResult({ timestamp: '2025-01-15T11:00:00.000Z' }));
+
+      const runs = listEvalRuns();
+      expect(runs).toHaveLength(2);
+    });
+
+    it('returns runs in reverse sorted order (newest first)', () => {
+      saveEvalRun(makeRunResult({ timestamp: '2025-01-15T08:00:00.000Z' }));
+      saveEvalRun(makeRunResult({ timestamp: '2025-01-15T12:00:00.000Z' }));
+      saveEvalRun(makeRunResult({ timestamp: '2025-01-15T10:00:00.000Z' }));
+
+      const runs = listEvalRuns();
+      const timestamps = runs.map(r => r.timestamp);
+      expect(timestamps).toEqual(['2025-01-15T12:00:00.000Z', '2025-01-15T10:00:00.000Z', '2025-01-15T08:00:00.000Z']);
+    });
+
+    it('ignores files that do not match the naming pattern', async () => {
+      saveEvalRun(makeRunResult());
+
+      // Write a file that doesn't match the pattern
+      const resultsDir = join(tmpDir, '.cli', 'eval-results');
+      const { writeFileSync } = await import('fs');
+      writeFileSync(join(resultsDir, 'notes.txt'), 'not a run');
+      writeFileSync(join(resultsDir, 'other.json'), '{}');
+
+      const runs = listEvalRuns();
+      expect(runs).toHaveLength(1);
+    });
+  });
+
+  describe('getResultsPath', () => {
+    it('returns the eval-results directory path', () => {
+      const path = getResultsPath();
+      expect(path).toBe(join(tmpDir, '.cli', 'eval-results'));
+    });
+  });
+
+  describe('error when no config root', () => {
+    it('throws when findConfigRoot returns null', () => {
+      mockFindConfigRoot.mockReturnValue(null);
+      expect(() => saveEvalRun(makeRunResult())).toThrow('No agentcore project found');
+    });
+  });
+});
diff --git a/src/cli/operations/eval/get-eval-run.ts b/src/cli/operations/eval/get-eval-run.ts
new file mode 100644
index 00000000..ed2aa6b6
--- /dev/null
+++ b/src/cli/operations/eval/get-eval-run.ts
@@ -0,0 +1,18 @@
+import { getErrorMessage } from '../../errors';
+import { loadEvalRun } from './storage';
+import type { EvalRunResult, GetEvalRunOptions } from './types';
+
+export interface GetEvalRunResult {
+  success: boolean;
+  error?: string;
+  run?: EvalRunResult;
+}
+
+export function handleGetEvalRun(options: GetEvalRunOptions): GetEvalRunResult {
+  try {
+    const run = loadEvalRun(options.filename);
+    return { success: true, run };
+  } catch (err) {
+    return { success: false, error: getErrorMessage(err) };
+  }
+}
diff --git a/src/cli/operations/eval/index.ts b/src/cli/operations/eval/index.ts
new file mode 100644
index 00000000..f3937236
--- /dev/null
+++ b/src/cli/operations/eval/index.ts
@@ -0,0 +1,10 @@
+export { handleRunEval } from './run-eval';
+export type { RunEvalResult } from './run-eval';
+export { handleListEvalRuns } from './list-eval-runs';
+export type { ListEvalRunsResult } from './list-eval-runs';
+export { handlePauseResume, handleDeleteOnlineEval } from './pause-resume';
+export type { PauseResumeResult, DeleteResult } from './pause-resume';
+export { handleLogsEval } from './logs-eval';
+export type { LogsEvalResult } from './logs-eval';
+export type { EvalRunResult, RunEvalOptions, ListEvalRunsOptions, OnlineEvalActionOptions } from './types';
+export type { LogsEvalOptions } from './logs-eval';
diff --git a/src/cli/operations/eval/list-eval-runs.ts b/src/cli/operations/eval/list-eval-runs.ts
new file mode 100644
index 00000000..66b0ed52
--- /dev/null
+++ b/src/cli/operations/eval/list-eval-runs.ts
@@ -0,0 +1,27 @@
+import { getErrorMessage } from '../../errors';
+import { listEvalRuns } from './storage';
+import type { EvalRunResult, ListEvalRunsOptions } from './types';
+
+export interface ListEvalRunsResult {
+  success: boolean;
+  error?: string;
+  runs?: EvalRunResult[];
+}
+
+export function handleListEvalRuns(options: ListEvalRunsOptions): ListEvalRunsResult {
+  try {
+    let runs = listEvalRuns();
+
+    if (options.agent) {
+      runs = runs.filter(r => r.agent === options.agent);
+    }
+
+    if (options.limit) {
+      runs = runs.slice(0, options.limit);
+    }
+
+    return { success: true, runs };
+  } catch (err) {
+    return { success: false, error: getErrorMessage(err) };
+  }
+}
diff --git a/src/cli/operations/eval/logs-eval.ts b/src/cli/operations/eval/logs-eval.ts
new file mode 100644
index 00000000..0e3af702
--- /dev/null
+++ b/src/cli/operations/eval/logs-eval.ts
@@ -0,0 +1,167 @@
+import { parseTimeString } from '../../../lib/utils';
+import { getOnlineEvaluationConfig } from '../../aws/agentcore-control';
+import { searchLogs, streamLogs } from '../../aws/cloudwatch';
+import type { DeployedProjectConfig } from '../resolve-agent';
+import { loadDeployedProjectConfig, resolveAgent } from '../resolve-agent';
+
+export interface LogsEvalOptions {
+  agent?: string;
+  since?: string;
+  until?: string;
+  lines?: string;
+  json?: boolean;
+  follow?: boolean;
+}
+
+export interface LogsEvalResult {
+  success: boolean;
+  error?: string;
+}
+
+function formatLogLine(event: { timestamp: number; message: string }, json: boolean): string {
+  if (json) {
+    return JSON.stringify({ timestamp: new Date(event.timestamp).toISOString(), message: event.message });
+  }
+  const ts = new Date(event.timestamp).toISOString();
+  return `${ts}  ${event.message}`;
+}
+
+interface ResolvedLogGroup {
+  logGroupName: string;
+  configName: string;
+  failureReason?: string;
+}
+
+/**
+ * Resolve the online eval config log group names.
+ * Fetches the actual log group from the API when possible, falls back to convention.
+ */
+async function resolveEvalLogGroups(
+  context: DeployedProjectConfig,
+  targetName: string,
+  region: string
+): Promise<ResolvedLogGroup[]> {
+  const { project, deployedState } = context;
+  const targetResources = deployedState.targets[targetName]?.resources;
+
+  const matchingConfigs = project.onlineEvalConfigs ?? [];
+
+  const results: ResolvedLogGroup[] = [];
+  for (const config of matchingConfigs) {
+    const deployed = targetResources?.onlineEvalConfigs?.[config.name];
+    if (!deployed?.onlineEvaluationConfigId) continue;
+
+    const configId = deployed.onlineEvaluationConfigId;
+    const fallbackLogGroup = `/aws/bedrock-agentcore/evaluations/results/${configId}`;
+
+    try {
+      const apiConfig = await getOnlineEvaluationConfig({ region, configId });
+      results.push({
+        logGroupName: apiConfig.outputLogGroupName ?? fallbackLogGroup,
+        configName: config.name,
+        failureReason: apiConfig.failureReason,
+      });
+    } catch {
+      // API call failed — fall back to convention-based name
+      results.push({ logGroupName: fallbackLogGroup, configName: config.name });
+    }
+  }
+
+  return results;
+}
+
+export async function handleLogsEval(options: LogsEvalOptions): Promise<LogsEvalResult> {
+  const context = await loadDeployedProjectConfig();
+  const agentResult = resolveAgent(context, { agent: options.agent });
+
+  if (!agentResult.success) {
+    return { success: false, error: agentResult.error };
+  }
+
+  const { agent } = agentResult;
+
+  const resolvedLogGroups = await resolveEvalLogGroups(context, agent.targetName, agent.region);
+
+  if (resolvedLogGroups.length === 0) {
+    return {
+      success: false,
+      error: `No deployed online eval configs found. Add one with 'agentcore add online-eval' and deploy.`,
+    };
+  }
+
+  // Surface failure reasons from configs that are in a failed state
+  for (const lg of resolvedLogGroups) {
+    if (lg.failureReason) {
+      console.error(`Warning: Online eval config '${lg.configName}' has a failure: ${lg.failureReason}`);
+    }
+  }
+
+  const isJson = options.json ?? false;
+  const isFollow = options.follow ?? (!options.since && !options.until);
+
+  const ac = new AbortController();
+  const onSignal = () => ac.abort();
+  process.on('SIGINT', onSignal);
+
+  try {
+    // Query all matching log groups
+    for (const { logGroupName } of resolvedLogGroups) {
+      if (!isFollow) {
+        const startTimeMs = options.since ? parseTimeString(options.since) : Date.now() - 3_600_000;
+        const endTimeMs = options.until ? parseTimeString(options.until) : Date.now();
+        const limit = options.lines ? parseInt(options.lines, 10) : undefined;
+
+        try {
+          for await (const event of searchLogs({
+            logGroupName,
+            region: agent.region,
+            startTimeMs,
+            endTimeMs,
+            limit,
+          })) {
+            console.log(formatLogLine(event, isJson));
+          }
+        } catch (err: unknown) {
+          const errorName = (err as { name?: string })?.name;
+          if (errorName === 'ResourceNotFoundException') {
+            // Log group exists in config but not yet in CloudWatch — skip
+            continue;
+          }
+          throw err;
+        }
+      } else {
+        console.error(`Streaming eval logs for ${agent.agentName} from ${logGroupName}... (Ctrl+C to stop)`);
+
+        try {
+          for await (const event of streamLogs({
+            logGroupName,
+            region: agent.region,
+            accountId: agent.accountId,
+            abortSignal: ac.signal,
+          })) {
+            console.log(formatLogLine(event, isJson));
+          }
+        } catch (err: unknown) {
+          const errorName = (err as { name?: string })?.name;
+          if (errorName === 'ResourceNotFoundException') {
+            console.error(`Log group ${logGroupName} not found yet — waiting for online eval results...`);
+            continue;
+          }
+          throw err;
+        }
+      }
+    }
+
+    return { success: true };
+  } catch (err: unknown) {
+    const errorName = (err as { name?: string })?.name;
+
+    if (errorName === 'AbortError' || ac.signal.aborted) {
+      return { success: true };
+    }
+
+    throw err;
+  } finally {
+    process.removeListener('SIGINT', onSignal);
+  }
+}
diff --git a/src/cli/operations/eval/pause-resume.ts b/src/cli/operations/eval/pause-resume.ts
new file mode 100644
index 00000000..c1b11a44
--- /dev/null
+++ b/src/cli/operations/eval/pause-resume.ts
@@ -0,0 +1,140 @@
+import type { OnlineEvalExecutionStatus } from '../../aws/agentcore-control';
+import { deleteOnlineEvalConfig, updateOnlineEvalExecutionStatus } from '../../aws/agentcore-control';
+import { loadDeployedProjectConfig } from '../resolve-agent';
+import type { OnlineEvalActionOptions } from './types';
+
+export interface PauseResumeResult {
+  success: boolean;
+  error?: string;
+  configId?: string;
+  executionStatus?: string;
+}
+
+export interface DeleteResult {
+  success: boolean;
+  error?: string;
+  configId?: string;
+  status?: string;
+}
+
+async function resolveOnlineEvalConfig(
+  configName: string
+): Promise<{ success: true; configId: string; region: string } | { success: false; error: string }> {
+  const context = await loadDeployedProjectConfig();
+  const targetNames = Object.keys(context.deployedState.targets);
+
+  if (targetNames.length === 0) {
+    return { success: false, error: 'No deployed targets found. Run `agentcore deploy` first.' };
+  }
+
+  const targetName = targetNames[0]!;
+  const targetResources = context.deployedState.targets[targetName]?.resources;
+  const deployedConfig = targetResources?.onlineEvalConfigs?.[configName];
+
+  if (!deployedConfig) {
+    return {
+      success: false,
+      error: `Online eval config "${configName}" not found in deployed state. Has it been deployed?`,
+    };
+  }
+
+  const targetConfig = context.awsTargets.find(t => t.name === targetName);
+  if (!targetConfig) {
+    return { success: false, error: `Target config "${targetName}" not found in aws-targets.` };
+  }
+
+  return {
+    success: true,
+    configId: deployedConfig.onlineEvaluationConfigId,
+    region: targetConfig.region,
+  };
+}
+
+/**
+ * Parse an online eval config ARN to extract the config ID and region.
+ * ARN format: arn:aws:bedrock-agentcore:<region>:<account>:online-evaluation-config/<configId>
+ */
+function parseOnlineEvalConfigArn(
+  arn: string,
+  regionOverride?: string
+): { success: true; configId: string; region: string } | { success: false; error: string } {
+  const parts = arn.split(':');
+  if (parts.length < 6 || !arn.startsWith('arn:')) {
+    return { success: false, error: `Invalid online eval config ARN: ${arn}` };
+  }
+
+  const region = regionOverride ?? parts[3];
+  if (!region) {
+    return { success: false, error: 'Could not determine region from ARN. Use --region to specify.' };
+  }
+
+  const resource = parts.slice(5).join(':');
+  const match = /online-evaluation-config\/(.+)$/.exec(resource);
+  if (!match) {
+    return { success: false, error: `Could not extract config ID from ARN: ${arn}` };
+  }
+
+  return { success: true, configId: match[1]!, region };
+}
+
+/**
+ * Resolve config ID and region from either a project config name or an ARN.
+ */
+async function resolveConfig(
+  options: OnlineEvalActionOptions
+): Promise<{ success: true; configId: string; region: string } | { success: false; error: string }> {
+  if (options.arn) {
+    return parseOnlineEvalConfigArn(options.arn, options.region);
+  }
+  return resolveOnlineEvalConfig(options.name);
+}
+
+export async function handlePauseResume(
+  options: OnlineEvalActionOptions,
+  action: 'pause' | 'resume'
+): Promise<PauseResumeResult> {
+  const resolution = await resolveConfig(options);
+  if (!resolution.success) {
+    return resolution;
+  }
+
+  const executionStatus: OnlineEvalExecutionStatus = action === 'pause' ? 'DISABLED' : 'ENABLED';
+
+  try {
+    const result = await updateOnlineEvalExecutionStatus({
+      region: resolution.region,
+      onlineEvaluationConfigId: resolution.configId,
+      executionStatus,
+    });
+
+    return {
+      success: true,
+      configId: result.configId,
+      executionStatus: result.executionStatus,
+    };
+  } catch (err) {
+    return { success: false, error: (err as Error).message };
+  }
+}
+
+export async function handleDeleteOnlineEval(options: OnlineEvalActionOptions): Promise<DeleteResult> {
+  const resolution = await resolveConfig(options);
+  if (!resolution.success) {
+    return resolution;
+  }
+
+  try {
+    const result = await deleteOnlineEvalConfig({
+      region: resolution.region,
+      onlineEvaluationConfigId: resolution.configId,
+    });
+
+    return {
+      success: true,
+      configId: result.configId,
+      status: result.status,
+    };
+  } catch (err) {
+    return { success: false, error: (err as Error).message };
+  }
+}
diff --git a/src/cli/operations/eval/run-eval.ts b/src/cli/operations/eval/run-eval.ts
new file mode 100644
index 00000000..bee6d9b5
--- /dev/null
+++ b/src/cli/operations/eval/run-eval.ts
@@ -0,0 +1,643 @@
+import { getCredentialProvider } from '../../aws';
+import { evaluate } from '../../aws/agentcore';
+import { getEvaluator } from '../../aws/agentcore-control';
+import { DEFAULT_ENDPOINT_NAME } from '../../constants';
+import type { DeployedProjectConfig } from '../resolve-agent';
+import { loadDeployedProjectConfig, resolveAgent } from '../resolve-agent';
+import { generateFilename, saveEvalRun } from './storage';
+import type { EvalEvaluatorResult, EvalRunResult, EvalSessionScore, RunEvalOptions } from './types';
+import { CloudWatchLogsClient, GetQueryResultsCommand, StartQueryCommand } from '@aws-sdk/client-cloudwatch-logs';
+import type { ResultField } from '@aws-sdk/client-cloudwatch-logs';
+import type { DocumentType } from '@smithy/types';
+import { writeFileSync } from 'fs';
+import { join } from 'path';
+
+const SPANS_LOG_GROUP = 'aws/spans';
+
+const SUPPORTED_SCOPES = new Set([
+  'strands.telemetry.tracer',
+  'opentelemetry.instrumentation.langchain',
+  'openinference.instrumentation.langchain',
+]);
+
+interface ResolvedEvalContext {
+  agentLabel: string;
+  region: string;
+  runtimeId: string;
+  runtimeLogGroup: string;
+  evaluatorIds: string[];
+  evaluatorLabels: string[];
+}
+
+type ResolveResult = { success: true; ctx: ResolvedEvalContext } | { success: false; error: string };
+
+/**
+ * Resolve evaluator IDs from ARN strings or raw IDs.
+ * Returns the extracted evaluator ID (last segment of ARN, or the value as-is).
+ */
+function resolveEvaluatorArns(arns: string[]): string[] {
+  return arns.map(arnOrId => {
+    const arnMatch = /evaluator\/(.+)$/.exec(arnOrId);
+    return arnMatch ? arnMatch[1]! : arnOrId;
+  });
+}
+
+/**
+ * ARN mode: resolve context directly from an agent runtime ARN.
+ * No project config needed.
+ */
+function resolveFromArn(options: RunEvalOptions): ResolveResult {
+  const arn = options.agentArn!;
+
+  // Parse ARN: arn:aws:bedrock-agentcore:<region>:<account>:runtime/<runtimeId>
+  const arnParts = arn.split(':');
+  if (arnParts.length < 6) {
+    return { success: false, error: `Invalid agent runtime ARN: ${arn}` };
+  }
+
+  const region = options.region ?? arnParts[3];
+  if (!region) {
+    return { success: false, error: 'Could not determine region from ARN. Use --region to specify.' };
+  }
+
+  const resourcePart = arnParts.slice(5).join(':');
+  const runtimeMatch = /runtime\/(.+)$/.exec(resourcePart);
+  if (!runtimeMatch) {
+    return { success: false, error: `Could not extract runtime ID from ARN: ${arn}` };
+  }
+  const runtimeId = runtimeMatch[1]!;
+
+  // In ARN mode, evaluators must come from --evaluator-arn or Builtin.* names
+  const evaluatorIds: string[] = [];
+  const evaluatorLabels: string[] = [];
+
+  for (const evalName of options.evaluator) {
+    if (evalName.startsWith('Builtin.')) {
+      evaluatorIds.push(evalName);
+      evaluatorLabels.push(evalName);
+    } else {
+      return {
+        success: false,
+        error: `Custom evaluator "${evalName}" cannot be resolved in ARN mode. Use --evaluator-arn with an evaluator ARN or ID, or use Builtin.* evaluators.`,
+      };
+    }
+  }
+
+  if (options.evaluatorArn) {
+    const resolved = resolveEvaluatorArns(options.evaluatorArn);
+    evaluatorIds.push(...resolved);
+    evaluatorLabels.push(...options.evaluatorArn);
+  }
+
+  if (evaluatorIds.length === 0) {
+    return { success: false, error: 'No evaluators specified. Use -e/--evaluator with Builtin.* or --evaluator-arn.' };
+  }
+
+  const runtimeLogGroup = `/aws/bedrock-agentcore/runtimes/${runtimeId}-${DEFAULT_ENDPOINT_NAME}`;
+
+  return {
+    success: true,
+    ctx: {
+      agentLabel: runtimeId,
+      region,
+      runtimeId,
+      runtimeLogGroup,
+      evaluatorIds,
+      evaluatorLabels,
+    },
+  };
+}
+
+/**
+ * Project mode: resolve context from agentcore.json + deployed-state.json.
+ */
+function resolveFromProject(context: DeployedProjectConfig, options: RunEvalOptions): ResolveResult {
+  const agentResult = resolveAgent(context, { agent: options.agent });
+  if (!agentResult.success) {
+    return agentResult;
+  }
+
+  const { agent } = agentResult;
+  const runtimeLogGroup = `/aws/bedrock-agentcore/runtimes/${agent.runtimeId}-${DEFAULT_ENDPOINT_NAME}`;
+
+  // Resolve evaluator names to IDs
+  const evaluatorIds: string[] = [];
+  const evaluatorLabels: string[] = [];
+  const targetResources = context.deployedState.targets[agent.targetName]?.resources;
+
+  for (const evalName of options.evaluator) {
+    if (evalName.startsWith('Builtin.')) {
+      evaluatorIds.push(evalName);
+      evaluatorLabels.push(evalName);
+      continue;
+    }
+
+    const deployedEval = targetResources?.evaluators?.[evalName];
+    if (!deployedEval) {
+      return {
+        success: false,
+        error: `Evaluator "${evalName}" not found in deployed state. Has it been deployed?`,
+      };
+    }
+    evaluatorIds.push(deployedEval.evaluatorId);
+    evaluatorLabels.push(evalName);
+  }
+
+  // Also add any direct ARNs/IDs
+  if (options.evaluatorArn) {
+    const resolved = resolveEvaluatorArns(options.evaluatorArn);
+    evaluatorIds.push(...resolved);
+    evaluatorLabels.push(...options.evaluatorArn);
+  }
+
+  if (evaluatorIds.length === 0) {
+    return { success: false, error: 'No evaluators specified. Use -e/--evaluator or --evaluator-arn.' };
+  }
+
+  return {
+    success: true,
+    ctx: {
+      agentLabel: agent.agentName,
+      region: agent.region,
+      runtimeId: agent.runtimeId,
+      runtimeLogGroup,
+      evaluatorIds,
+      evaluatorLabels,
+    },
+  };
+}
+
+type EvaluatorLevel = 'SESSION' | 'TRACE' | 'TOOL_CALL';
+
+const BUILTIN_EVALUATOR_LEVELS: Record<string, EvaluatorLevel> = {
+  'Builtin.GoalSuccessRate': 'SESSION',
+  'Builtin.Correctness': 'TRACE',
+  'Builtin.Faithfulness': 'TRACE',
+  'Builtin.Helpfulness': 'TRACE',
+  'Builtin.ResponseRelevance': 'TRACE',
+  'Builtin.Conciseness': 'TRACE',
+  'Builtin.Coherence': 'TRACE',
+  'Builtin.InstructionFollowing': 'TRACE',
+  'Builtin.Refusal': 'TRACE',
+  'Builtin.ToolSelectionAccuracy': 'TOOL_CALL',
+};
+
+/**
+ * Resolve the evaluation level for each evaluator.
+ * Builtin evaluators use a known mapping; custom evaluators are fetched via the API.
+ */
+async function resolveEvaluatorLevels(evaluatorIds: string[], region: string): Promise<Map<string, EvaluatorLevel>> {
+  const levels = new Map<string, EvaluatorLevel>();
+
+  for (const id of evaluatorIds) {
+    const builtinLevel = BUILTIN_EVALUATOR_LEVELS[id];
+    if (builtinLevel) {
+      levels.set(id, builtinLevel);
+      continue;
+    }
+
+    // Unknown builtin — default to SESSION
+    if (id.startsWith('Builtin.')) {
+      levels.set(id, 'SESSION');
+      continue;
+    }
+
+    // Custom evaluator — fetch level from API
+    try {
+      const evaluator = await getEvaluator({ region, evaluatorId: id });
+      levels.set(id, (evaluator.level as EvaluatorLevel) ?? 'SESSION');
+    } catch {
+      // If we can't determine the level, default to SESSION (most permissive)
+      levels.set(id, 'SESSION');
+    }
+  }
+
+  return levels;
+}
+
+/**
+ * Extract distinct trace IDs from session spans.
+ */
+function extractTraceIds(spans: DocumentType[]): string[] {
+  const traceIds = new Set<string>();
+  for (const span of spans) {
+    const traceId = (span as Record<string, unknown>).traceId as string | undefined;
+    if (traceId) {
+      traceIds.add(traceId);
+    }
+  }
+  return [...traceIds];
+}
+
+/**
+ * Extract span IDs that represent tool calls from session spans.
+ */
+function extractToolCallSpanIds(spans: DocumentType[]): string[] {
+  const spanIds: string[] = [];
+  for (const span of spans) {
+    const doc = span as Record<string, unknown>;
+    const spanId = doc.spanId as string | undefined;
+    if (!spanId) continue;
+
+    // Tool call spans must have a tool name attribute — kind=CLIENT alone is too broad
+    const attrs = doc.attributes as Record<string, unknown> | undefined;
+    if (attrs?.['gen_ai.tool.name'] ?? attrs?.['tool.name']) {
+      spanIds.push(spanId);
+    }
+  }
+  return spanIds;
+}
+
+const EVALUATE_TARGET_BATCH_SIZE = 10;
+
+interface TargetIdBatch {
+  traceIds?: string[];
+  spanIds?: string[];
+}
+
+/**
+ * Batch targetTraceIds / targetSpanIds into chunks of EVALUATE_TARGET_BATCH_SIZE.
+ * The Evaluate API limits these arrays to 10 items per call.
+ * For SESSION-level evaluators (both undefined), returns a single batch with no IDs.
+ */
+function batchTargetIds(traceIds?: string[], spanIds?: string[]): TargetIdBatch[] {
+  if (spanIds) {
+    return chunk(spanIds, EVALUATE_TARGET_BATCH_SIZE).map(batch => ({ spanIds: batch }));
+  }
+  if (traceIds) {
+    return chunk(traceIds, EVALUATE_TARGET_BATCH_SIZE).map(batch => ({ traceIds: batch }));
+  }
+  // SESSION level — single call with no target IDs
+  return [{}];
+}
+
+function chunk<T>(arr: T[], size: number): T[][] {
+  const batches: T[][] = [];
+  for (let i = 0; i < arr.length; i += size) {
+    batches.push(arr.slice(i, i + size));
+  }
+  return batches;
+}
+
+/**
+ * Execute a CloudWatch Logs Insights query and wait for results.
+ */
+async function executeQuery(
+  client: CloudWatchLogsClient,
+  logGroupName: string,
+  queryString: string,
+  startTimeSec: number,
+  endTimeSec: number
+): Promise<ResultField[][]> {
+  const startQuery = await client.send(
+    new StartQueryCommand({
+      logGroupName,
+      startTime: startTimeSec,
+      endTime: endTimeSec,
+      queryString,
+    })
+  );
+
+  if (!startQuery.queryId) {
+    throw new Error('Failed to start CloudWatch Logs Insights query');
+  }
+
+  for (let i = 0; i < 60; i++) {
+    await new Promise(resolve => setTimeout(resolve, 1000));
+
+    const queryResults = await client.send(new GetQueryResultsCommand({ queryId: startQuery.queryId }));
+    const status = queryResults.status ?? 'Unknown';
+
+    if (status === 'Failed' || status === 'Cancelled') {
+      throw new Error(`CloudWatch query ${status.toLowerCase()}`);
+    }
+
+    if (status === 'Complete') {
+      return queryResults.results ?? [];
+    }
+  }
+
+  throw new Error('CloudWatch query timed out after 60 seconds');
+}
+
+/**
+ * Extract parsed @message documents from CloudWatch Insights results.
+ */
+function extractMessages(rows: ResultField[][]): Record<string, unknown>[] {
+  const docs: Record<string, unknown>[] = [];
+  for (const row of rows) {
+    const messageField = row.find(f => f.field === '@message');
+    if (messageField?.value) {
+      try {
+        docs.push(JSON.parse(messageField.value) as Record<string, unknown>);
+      } catch {
+        // Skip non-JSON log lines
+      }
+    }
+  }
+  return docs;
+}
+
+/**
+ * Check if a document is relevant for evaluation:
+ * - Has a supported instrumentation scope, OR
+ * - Is a log record with conversation data (body.input / body.output)
+ */
+function isRelevantForEval(doc: Record<string, unknown>): boolean {
+  const scope = doc.scope as Record<string, unknown> | undefined;
+  const scopeName = scope?.name as string | undefined;
+  if (scopeName && SUPPORTED_SCOPES.has(scopeName)) {
+    return true;
+  }
+
+  const body = doc.body;
+  if (body && typeof body === 'object' && ('input' in body || 'output' in body)) {
+    return true;
+  }
+
+  return false;
+}
+
+/** Sanitize a value for use in CloudWatch Insights query strings by removing single quotes. */
+function sanitizeQueryValue(value: string): string {
+  return value.replace(/'/g, '');
+}
+
+interface SessionSpans {
+  sessionId: string;
+  spans: DocumentType[];
+}
+
+interface FetchSpansOptions {
+  runtimeId: string;
+  runtimeLogGroup: string;
+  region: string;
+  lookbackDays: number;
+  sessionId?: string;
+  traceId?: string;
+}
+
+/**
+ * Fetch OTel spans from the `aws/spans` log group and runtime logs from the agent's
+ * log group, then group them by session.
+ *
+ * The Evaluate API requires spans from a single session per call.
+ */
+async function fetchSessionSpans(opts: FetchSpansOptions): Promise<SessionSpans[]> {
+  const { runtimeId, runtimeLogGroup, region, lookbackDays } = opts;
+  const endTimeMs = Date.now();
+  const startTimeMs = endTimeMs - lookbackDays * 24 * 60 * 60 * 1000;
+  const startTimeSec = Math.floor(startTimeMs / 1000);
+  const endTimeSec = Math.floor(endTimeMs / 1000);
+
+  const client = new CloudWatchLogsClient({
+    credentials: getCredentialProvider(),
+    region,
+  });
+
+  // 1. Query proper OTel spans from the aws/spans log group
+  let spanQuery = `fields @message, attributes.session.id as sessionId, traceId
+     | parse resource.attributes.cloud.resource_id "runtime/*/" as parsedAgentId
+     | filter parsedAgentId = '${sanitizeQueryValue(runtimeId)}'`;
+
+  if (opts.sessionId) {
+    spanQuery += `\n     | filter attributes.session.id = '${sanitizeQueryValue(opts.sessionId)}'`;
+  }
+  if (opts.traceId) {
+    spanQuery += `\n     | filter traceId = '${sanitizeQueryValue(opts.traceId)}'`;
+  }
+
+  spanQuery += `\n     | sort startTimeUnixNano asc\n     | limit 10000`;
+
+  const spanRows = await executeQuery(client, SPANS_LOG_GROUP, spanQuery, startTimeSec, endTimeSec);
+
+  // Group spans by session and collect trace IDs
+  const sessionMap = new Map<string, DocumentType[]>();
+  const traceIds = new Set<string>();
+
+  for (const row of spanRows) {
+    const messageField = row.find(f => f.field === '@message');
+    const sessionField = row.find(f => f.field === 'sessionId');
+    const traceField = row.find(f => f.field === 'traceId');
+
+    if (!messageField?.value) continue;
+
+    let doc: Record<string, unknown>;
+    try {
+      doc = JSON.parse(messageField.value) as Record<string, unknown>;
+    } catch {
+      continue;
+    }
+
+    const sessionId = sessionField?.value ?? 'unknown';
+    if (!sessionMap.has(sessionId)) {
+      sessionMap.set(sessionId, []);
+    }
+    sessionMap.get(sessionId)!.push(doc as DocumentType);
+
+    if (traceField?.value) {
+      traceIds.add(traceField.value);
+    }
+  }
+
+  if (sessionMap.size === 0) {
+    return [];
+  }
+
+  // 2. Query runtime logs from the agent's log group for the trace IDs found
+  if (traceIds.size > 0) {
+    const traceFilter = [...traceIds].map(t => `'${sanitizeQueryValue(t)}'`).join(', ');
+    let logRows: ResultField[][] = [];
+    try {
+      logRows = await executeQuery(
+        client,
+        runtimeLogGroup,
+        `fields @message, traceId
+         | filter traceId in [${traceFilter}]
+         | sort @timestamp asc
+         | limit 10000`,
+        startTimeSec,
+        endTimeSec
+      );
+    } catch {
+      // Runtime log group may not exist yet; continue with spans only
+    }
+
+    const logDocs = extractMessages(logRows);
+
+    // Match runtime logs to sessions via traceId
+    // Build traceId → sessionId mapping from spans
+    const traceToSession = new Map<string, string>();
+    for (const row of spanRows) {
+      const traceField = row.find(f => f.field === 'traceId');
+      const sessionField = row.find(f => f.field === 'sessionId');
+      if (traceField?.value && sessionField?.value) {
+        traceToSession.set(traceField.value, sessionField.value);
+      }
+    }
+
+    for (const logDoc of logDocs) {
+      if (!isRelevantForEval(logDoc)) continue;
+
+      const logTraceId = logDoc.traceId as string | undefined;
+      const sessionId = logTraceId ? (traceToSession.get(logTraceId) ?? 'unknown') : 'unknown';
+      if (!sessionMap.has(sessionId)) {
+        sessionMap.set(sessionId, []);
+      }
+      sessionMap.get(sessionId)!.push(logDoc as DocumentType);
+    }
+  }
+
+  // 3. Build session list — aws/spans docs are already scoped by runtimeId (step 1),
+  //    and runtime log docs were filtered through isRelevantForEval (step 2).
+  //    We keep all docs so the Evaluate API has full trace context for resolving
+  //    template variables like {context} and {assistant_turn}.
+  const sessions: SessionSpans[] = [];
+  for (const [sessionId, docs] of sessionMap) {
+    if (docs.length > 0) {
+      sessions.push({ sessionId, spans: docs });
+    }
+  }
+
+  return sessions;
+}
+
+export interface RunEvalResult {
+  success: boolean;
+  error?: string;
+  run?: EvalRunResult;
+  filePath?: string;
+}
+
+export async function handleRunEval(options: RunEvalOptions): Promise<RunEvalResult> {
+  let resolution: ResolveResult;
+
+  if (options.agentArn) {
+    resolution = resolveFromArn(options);
+  } else {
+    const context = await loadDeployedProjectConfig();
+    resolution = resolveFromProject(context, options);
+  }
+
+  if (!resolution.success) {
+    return { success: false, error: resolution.error };
+  }
+
+  const { ctx } = resolution;
+
+  // Fetch spans grouped by session
+  const sessions = await fetchSessionSpans({
+    runtimeId: ctx.runtimeId,
+    runtimeLogGroup: ctx.runtimeLogGroup,
+    region: ctx.region,
+    lookbackDays: options.days,
+    sessionId: options.sessionId,
+    traceId: options.traceId,
+  });
+
+  if (sessions.length === 0) {
+    return {
+      success: false,
+      error: `No session spans found for agent "${ctx.agentLabel}" in the last ${options.days} day(s). Has the agent been invoked?`,
+    };
+  }
+
+  // Resolve evaluator levels to determine how to send spans
+  const evaluatorLevels = await resolveEvaluatorLevels(ctx.evaluatorIds, ctx.region);
+
+  // Run each evaluator against each session with level-appropriate targeting
+  const results: EvalEvaluatorResult[] = [];
+
+  for (let i = 0; i < ctx.evaluatorIds.length; i++) {
+    const evaluatorId = ctx.evaluatorIds[i]!;
+    const evaluatorName = ctx.evaluatorLabels[i] ?? evaluatorId;
+    const level = evaluatorLevels.get(evaluatorId) ?? 'SESSION';
+
+    const sessionScores: EvalSessionScore[] = [];
+    let totalInputTokens = 0;
+    let totalOutputTokens = 0;
+    let totalTokens = 0;
+
+    for (const session of sessions) {
+      // Build evaluation target based on evaluator level
+      let targetTraceIds: string[] | undefined;
+      let targetSpanIds: string[] | undefined;
+
+      if (level === 'TRACE') {
+        targetTraceIds = extractTraceIds(session.spans);
+        if (targetTraceIds.length === 0) continue;
+      } else if (level === 'TOOL_CALL') {
+        targetSpanIds = extractToolCallSpanIds(session.spans);
+        if (targetSpanIds.length === 0) continue;
+      }
+
+      // The Evaluate API limits targetSpanIds and targetTraceIds to 10 per call.
+      // Batch into chunks and merge results.
+      const batches = batchTargetIds(targetTraceIds, targetSpanIds);
+
+      for (const batch of batches) {
+        const response = await evaluate({
+          region: ctx.region,
+          evaluatorId,
+          sessionSpans: session.spans,
+          targetTraceIds: batch.traceIds,
+          targetSpanIds: batch.spanIds,
+        });
+
+        for (const r of response.evaluationResults) {
+          sessionScores.push({
+            sessionId: r.context?.sessionId ?? session.sessionId,
+            traceId: r.context?.traceId,
+            spanId: r.context?.spanId,
+            value: r.value ?? 0,
+            label: r.label,
+            explanation: r.explanation,
+            errorMessage: r.errorMessage,
+          });
+
+          totalInputTokens += r.tokenUsage?.inputTokens ?? 0;
+          totalOutputTokens += r.tokenUsage?.outputTokens ?? 0;
+          totalTokens += r.tokenUsage?.totalTokens ?? 0;
+        }
+      }
+    }
+
+    const validScores = sessionScores.filter(s => !s.errorMessage);
+    const aggregateScore =
+      validScores.length > 0 ? validScores.reduce((sum, s) => sum + s.value, 0) / validScores.length : 0;
+
+    results.push({
+      evaluator: evaluatorName,
+      aggregateScore,
+      sessionScores,
+      tokenUsage: { inputTokens: totalInputTokens, outputTokens: totalOutputTokens, totalTokens },
+    });
+  }
+
+  // Build run result
+  const timestamp = new Date().toISOString();
+  const run: EvalRunResult = {
+    timestamp,
+    agent: ctx.agentLabel,
+    evaluators: ctx.evaluatorLabels,
+    lookbackDays: options.days,
+    sessionCount: sessions.length,
+    results,
+  };
+
+  // Save to disk
+  let filePath: string;
+  if (options.output) {
+    writeFileSync(options.output, JSON.stringify(run, null, 2));
+    filePath = options.output;
+  } else if (options.agentArn) {
+    // ARN mode may not have a project directory — save to cwd
+    const fallbackPath = join(process.cwd(), `${generateFilename(timestamp)}.json`);
+    writeFileSync(fallbackPath, JSON.stringify(run, null, 2));
+    filePath = fallbackPath;
+  } else {
+    filePath = saveEvalRun(run);
+  }
+
+  return { success: true, run, filePath };
+}
diff --git a/src/cli/operations/eval/storage.ts b/src/cli/operations/eval/storage.ts
new file mode 100644
index 00000000..7c65e868
--- /dev/null
+++ b/src/cli/operations/eval/storage.ts
@@ -0,0 +1,61 @@
+import { findConfigRoot } from '../../../lib';
+import type { EvalRunResult } from './types';
+import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'fs';
+import { join } from 'path';
+
+const EVAL_RESULTS_DIR = 'eval-results';
+
+function getResultsDir(): string {
+  const configRoot = findConfigRoot();
+  if (!configRoot) {
+    throw new Error('No agentcore project found. Run `agentcore create` first.');
+  }
+  return join(configRoot, '.cli', EVAL_RESULTS_DIR);
+}
+
+export function generateFilename(timestamp: string): string {
+  const d = new Date(timestamp);
+  const pad = (n: number) => String(n).padStart(2, '0');
+  return `eval_${d.getUTCFullYear()}-${pad(d.getUTCMonth() + 1)}-${pad(d.getUTCDate())}_${pad(d.getUTCHours())}-${pad(d.getUTCMinutes())}-${pad(d.getUTCSeconds())}`;
+}
+
+export function saveEvalRun(result: EvalRunResult): string {
+  const dir = getResultsDir();
+  mkdirSync(dir, { recursive: true });
+
+  const filename = generateFilename(result.timestamp);
+  const filePath = join(dir, `${filename}.json`);
+  writeFileSync(filePath, JSON.stringify(result, null, 2));
+  return filePath;
+}
+
+export function loadEvalRun(filename: string): EvalRunResult {
+  const dir = getResultsDir();
+  const jsonName = filename.endsWith('.json') ? filename : `${filename}.json`;
+  const filePath = join(dir, jsonName);
+
+  if (!existsSync(filePath)) {
+    throw new Error(`Eval run "${filename}" not found at ${filePath}`);
+  }
+
+  return JSON.parse(readFileSync(filePath, 'utf-8')) as EvalRunResult;
+}
+
+export function listEvalRuns(): EvalRunResult[] {
+  const dir = getResultsDir();
+
+  if (!existsSync(dir)) {
+    return [];
+  }
+
+  const files = readdirSync(dir)
+    .filter(f => f.startsWith('eval_') && f.endsWith('.json'))
+    .sort()
+    .reverse();
+
+  return files.map(f => JSON.parse(readFileSync(join(dir, f), 'utf-8')) as EvalRunResult);
+}
+
+export function getResultsPath(): string {
+  return getResultsDir();
+}
diff --git a/src/cli/operations/eval/types.ts b/src/cli/operations/eval/types.ts
new file mode 100644
index 00000000..522f8e36
--- /dev/null
+++ b/src/cli/operations/eval/types.ts
@@ -0,0 +1,77 @@
+/** Result of a single evaluator within an eval run */
+export interface EvalEvaluatorResult {
+  evaluator: string;
+  aggregateScore: number;
+  sessionScores: EvalSessionScore[];
+  tokenUsage?: {
+    inputTokens: number;
+    outputTokens: number;
+    totalTokens: number;
+  };
+}
+
+/** Per-session score from an evaluator */
+export interface EvalSessionScore {
+  sessionId: string;
+  traceId?: string;
+  spanId?: string;
+  value: number;
+  label?: string;
+  explanation?: string;
+  errorMessage?: string;
+}
+
+/** Full eval run result stored to disk */
+export interface EvalRunResult {
+  timestamp: string;
+  agent: string;
+  evaluators: string[];
+  lookbackDays: number;
+  sessionCount: number;
+  results: EvalEvaluatorResult[];
+}
+
+/** Options for running an eval */
+export interface RunEvalOptions {
+  /** Agent name (project mode) */
+  agent?: string;
+  /** Evaluator names or Builtin.* IDs (resolved via project deployed state) */
+  evaluator: string[];
+  /** Evaluator ARN(s) or IDs passed directly */
+  evaluatorArn?: string[];
+  /** Agent runtime ARN (ARN mode — bypasses project config) */
+  agentArn?: string;
+  /** AWS region (required with --agent-arn, inferred otherwise) */
+  region?: string;
+  /** Filter to a specific session */
+  sessionId?: string;
+  /** Filter to a specific trace */
+  traceId?: string;
+  days: number;
+  output?: string;
+  json?: boolean;
+}
+
+/** Options for listing eval runs */
+export interface ListEvalRunsOptions {
+  agent?: string;
+  limit?: number;
+  json?: boolean;
+}
+
+/** Options for getting a single eval run */
+export interface GetEvalRunOptions {
+  filename: string;
+  sessions?: boolean;
+  json?: boolean;
+}
+
+/** Options for pause/resume/delete online eval */
+export interface OnlineEvalActionOptions {
+  name: string;
+  /** Online eval config ARN (direct mode — bypasses project config) */
+  arn?: string;
+  /** AWS region (required with --arn when region cannot be parsed from ARN) */
+  region?: string;
+  json?: boolean;
+}
diff --git a/src/cli/primitives/EvaluatorPrimitive.ts b/src/cli/primitives/EvaluatorPrimitive.ts
new file mode 100644
index 00000000..a8aced79
--- /dev/null
+++ b/src/cli/primitives/EvaluatorPrimitive.ts
@@ -0,0 +1,262 @@
+import { findConfigRoot } from '../../lib';
+import type { EvaluationLevel, Evaluator, EvaluatorConfig } from '../../schema';
+import { EvaluationLevelSchema, EvaluatorSchema } from '../../schema';
+import { getErrorMessage } from '../errors';
+import type { RemovalPreview, RemovalResult, SchemaChange } from '../operations/remove/types';
+import { BasePrimitive } from './BasePrimitive';
+import type { AddResult, AddScreenComponent, RemovableResource } from './types';
+import type { Command } from '@commander-js/extra-typings';
+
+export interface AddEvaluatorOptions {
+  name: string;
+  level: EvaluationLevel;
+  description?: string;
+  config: EvaluatorConfig;
+}
+
+export type RemovableEvaluator = RemovableResource;
+
+/**
+ * EvaluatorPrimitive handles all evaluator add/remove operations.
+ */
+export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, RemovableEvaluator> {
+  readonly kind = 'evaluator' as const;
+  readonly label = 'Evaluator';
+  override readonly article = 'an';
+  readonly primitiveSchema = EvaluatorSchema;
+
+  async add(options: AddEvaluatorOptions): Promise<AddResult<{ evaluatorName: string }>> {
+    try {
+      const evaluator = await this.createEvaluator(options);
+      return { success: true, evaluatorName: evaluator.name };
+    } catch (err) {
+      return { success: false, error: getErrorMessage(err) };
+    }
+  }
+
+  async remove(evaluatorName: string): Promise<RemovalResult> {
+    try {
+      const project = await this.readProjectSpec();
+
+      const index = project.evaluators.findIndex(e => e.name === evaluatorName);
+      if (index === -1) {
+        return { success: false, error: `Evaluator "${evaluatorName}" not found.` };
+      }
+
+      // Warn if referenced by online eval configs
+      const referencingConfigs = project.onlineEvalConfigs.filter(c => c.evaluators.includes(evaluatorName));
+      if (referencingConfigs.length > 0) {
+        const configNames = referencingConfigs.map(c => c.name).join(', ');
+        return {
+          success: false,
+          error: `Evaluator "${evaluatorName}" is referenced by online eval config(s): ${configNames}. Remove those references first.`,
+        };
+      }
+
+      project.evaluators.splice(index, 1);
+      await this.writeProjectSpec(project);
+
+      return { success: true };
+    } catch (err) {
+      return { success: false, error: getErrorMessage(err) };
+    }
+  }
+
+  async previewRemove(evaluatorName: string): Promise<RemovalPreview> {
+    const project = await this.readProjectSpec();
+
+    const evaluator = project.evaluators.find(e => e.name === evaluatorName);
+    if (!evaluator) {
+      throw new Error(`Evaluator "${evaluatorName}" not found.`);
+    }
+
+    const summary: string[] = [`Removing evaluator: ${evaluatorName}`];
+    const schemaChanges: SchemaChange[] = [];
+
+    const referencingConfigs = project.onlineEvalConfigs.filter(c => c.evaluators.includes(evaluatorName));
+    if (referencingConfigs.length > 0) {
+      summary.push(
+        `Blocked: Referenced by online eval config(s): ${referencingConfigs.map(c => c.name).join(', ')}. Remove those references first.`
+      );
+    }
+
+    const afterSpec = {
+      ...project,
+      evaluators: project.evaluators.filter(e => e.name !== evaluatorName),
+    };
+
+    schemaChanges.push({
+      file: 'agentcore/agentcore.json',
+      before: project,
+      after: afterSpec,
+    });
+
+    return { summary, directoriesToDelete: [], schemaChanges };
+  }
+
+  async getRemovable(): Promise<RemovableEvaluator[]> {
+    try {
+      const project = await this.readProjectSpec();
+      return project.evaluators.map(e => ({ name: e.name }));
+    } catch {
+      return [];
+    }
+  }
+
+  async getAllNames(): Promise<string[]> {
+    try {
+      const project = await this.readProjectSpec();
+      return project.evaluators.map(e => e.name);
+    } catch {
+      return [];
+    }
+  }
+
+  registerCommands(addCmd: Command, removeCmd: Command): void {
+    addCmd
+      .command(this.kind)
+      .description('Add a custom evaluator to the project')
+      .option('--name <name>', 'Evaluator name')
+      .option('--level <level>', 'Evaluation level: SESSION, TRACE, TOOL_CALL')
+      .option('--model <model>', 'Bedrock model ID for LLM-as-a-Judge')
+      .option('--instructions <text>', 'Evaluation prompt instructions')
+      .option('--config <path>', 'Path to evaluator config JSON file (overrides --model, --instructions)')
+      .option('--json', 'Output as JSON')
+      .action(
+        async (cliOptions: {
+          name?: string;
+          level?: string;
+          model?: string;
+          instructions?: string;
+          config?: string;
+          json?: boolean;
+        }) => {
+          try {
+            if (!findConfigRoot()) {
+              console.error('No agentcore project found. Run `agentcore create` first.');
+              process.exit(1);
+            }
+
+            if (cliOptions.name || cliOptions.json) {
+              if (!cliOptions.name || !cliOptions.level) {
+                const error = '--name and --level are required in non-interactive mode';
+                if (cliOptions.json) {
+                  console.log(JSON.stringify({ success: false, error }));
+                } else {
+                  console.error(error);
+                }
+                process.exit(1);
+              }
+
+              if (!cliOptions.config && !cliOptions.model) {
+                const error = 'Either --config or --model is required';
+                if (cliOptions.json) {
+                  console.log(JSON.stringify({ success: false, error }));
+                } else {
+                  console.error(error);
+                }
+                process.exit(1);
+              }
+
+              const levelResult = EvaluationLevelSchema.safeParse(cliOptions.level);
+              if (!levelResult.success) {
+                const error = `Invalid --level "${cliOptions.level}". Must be one of: SESSION, TRACE, TOOL_CALL`;
+                if (cliOptions.json) {
+                  console.log(JSON.stringify({ success: false, error }));
+                } else {
+                  console.error(error);
+                }
+                process.exit(1);
+              }
+
+              let configJson: EvaluatorConfig;
+              if (cliOptions.config) {
+                const { readFileSync } = await import('fs');
+                configJson = JSON.parse(readFileSync(cliOptions.config, 'utf-8')) as EvaluatorConfig;
+              } else {
+                configJson = {
+                  llmAsAJudge: {
+                    model: cliOptions.model!,
+                    instructions: cliOptions.instructions ?? `Evaluate the quality. Context: {context}`,
+                    ratingScale: {
+                      numerical: [
+                        { value: 1, label: 'Poor', definition: 'Fails to meet expectations' },
+                        { value: 2, label: 'Fair', definition: 'Partially meets expectations' },
+                        { value: 3, label: 'Good', definition: 'Meets expectations' },
+                        { value: 4, label: 'Very Good', definition: 'Exceeds expectations' },
+                        { value: 5, label: 'Excellent', definition: 'Far exceeds expectations' },
+                      ],
+                    },
+                  },
+                };
+              }
+
+              const result = await this.add({
+                name: cliOptions.name,
+                level: levelResult.data,
+                config: configJson,
+              });
+
+              if (cliOptions.json) {
+                console.log(JSON.stringify(result));
+              } else if (result.success) {
+                console.log(`Added evaluator '${result.evaluatorName}'`);
+              } else {
+                console.error(result.error);
+              }
+              process.exit(result.success ? 0 : 1);
+            } else {
+              // TUI fallback
+              const [{ render }, { default: React }, { AddFlow }] = await Promise.all([
+                import('ink'),
+                import('react'),
+                import('../tui/screens/add/AddFlow'),
+              ]);
+              const { clear, unmount } = render(
+                React.createElement(AddFlow, {
+                  isInteractive: false,
+                  onExit: () => {
+                    clear();
+                    unmount();
+                    process.exit(0);
+                  },
+                })
+              );
+            }
+          } catch (error) {
+            if (cliOptions.json) {
+              console.log(JSON.stringify({ success: false, error: getErrorMessage(error) }));
+            } else {
+              console.error(getErrorMessage(error));
+            }
+            process.exit(1);
+          }
+        }
+      );
+
+    this.registerRemoveSubcommand(removeCmd);
+  }
+
+  addScreen(): AddScreenComponent {
+    return null;
+  }
+
+  private async createEvaluator(options: AddEvaluatorOptions): Promise<Evaluator> {
+    const project = await this.readProjectSpec();
+
+    this.checkDuplicate(project.evaluators, options.name);
+
+    const evaluator: Evaluator = {
+      type: 'CustomEvaluator',
+      name: options.name,
+      level: options.level,
+      ...(options.description && { description: options.description }),
+      config: options.config,
+    };
+
+    project.evaluators.push(evaluator);
+    await this.writeProjectSpec(project);
+
+    return evaluator;
+  }
+}
diff --git a/src/cli/primitives/OnlineEvalConfigPrimitive.ts b/src/cli/primitives/OnlineEvalConfigPrimitive.ts
new file mode 100644
index 00000000..e66c0c85
--- /dev/null
+++ b/src/cli/primitives/OnlineEvalConfigPrimitive.ts
@@ -0,0 +1,226 @@
+import { findConfigRoot } from '../../lib';
+import type { OnlineEvalConfig } from '../../schema';
+import { OnlineEvalConfigSchema } from '../../schema';
+import { getErrorMessage } from '../errors';
+import type { RemovalPreview, RemovalResult, SchemaChange } from '../operations/remove/types';
+import { BasePrimitive } from './BasePrimitive';
+import type { AddResult, AddScreenComponent, RemovableResource } from './types';
+import type { Command } from '@commander-js/extra-typings';
+
+export interface AddOnlineEvalConfigOptions {
+  name: string;
+  agent: string;
+  evaluators: string[];
+  samplingRate: number;
+  enableOnCreate?: boolean;
+}
+
+export type RemovableOnlineEvalConfig = RemovableResource;
+
+/**
+ * OnlineEvalConfigPrimitive handles all online eval config add/remove operations.
+ */
+export class OnlineEvalConfigPrimitive extends BasePrimitive<AddOnlineEvalConfigOptions, RemovableOnlineEvalConfig> {
+  readonly kind = 'online-eval' as const;
+  readonly label = 'Online Eval Config';
+  override readonly article = 'an';
+  readonly primitiveSchema = OnlineEvalConfigSchema;
+
+  async add(options: AddOnlineEvalConfigOptions): Promise<AddResult<{ configName: string }>> {
+    try {
+      const config = await this.createOnlineEvalConfig(options);
+      return { success: true, configName: config.name };
+    } catch (err) {
+      return { success: false, error: getErrorMessage(err) };
+    }
+  }
+
+  async remove(configName: string): Promise<RemovalResult> {
+    try {
+      const project = await this.readProjectSpec();
+
+      const index = project.onlineEvalConfigs.findIndex(c => c.name === configName);
+      if (index === -1) {
+        return { success: false, error: `Online eval config "${configName}" not found.` };
+      }
+
+      project.onlineEvalConfigs.splice(index, 1);
+      await this.writeProjectSpec(project);
+
+      return { success: true };
+    } catch (err) {
+      return { success: false, error: getErrorMessage(err) };
+    }
+  }
+
+  async previewRemove(configName: string): Promise<RemovalPreview> {
+    const project = await this.readProjectSpec();
+
+    const config = project.onlineEvalConfigs.find(c => c.name === configName);
+    if (!config) {
+      throw new Error(`Online eval config "${configName}" not found.`);
+    }
+
+    const summary: string[] = [
+      `Removing online eval config: ${configName}`,
+      `Uses evaluators: ${config.evaluators.join(', ')}`,
+    ];
+    const schemaChanges: SchemaChange[] = [];
+
+    const afterSpec = {
+      ...project,
+      onlineEvalConfigs: project.onlineEvalConfigs.filter(c => c.name !== configName),
+    };
+
+    schemaChanges.push({
+      file: 'agentcore/agentcore.json',
+      before: project,
+      after: afterSpec,
+    });
+
+    return { summary, directoriesToDelete: [], schemaChanges };
+  }
+
+  async getRemovable(): Promise<RemovableOnlineEvalConfig[]> {
+    try {
+      const project = await this.readProjectSpec();
+      return project.onlineEvalConfigs.map(c => ({ name: c.name }));
+    } catch {
+      return [];
+    }
+  }
+
+  async getAllNames(): Promise<string[]> {
+    try {
+      const project = await this.readProjectSpec();
+      return project.onlineEvalConfigs.map(c => c.name);
+    } catch {
+      return [];
+    }
+  }
+
+  registerCommands(addCmd: Command, removeCmd: Command): void {
+    addCmd
+      .command('online-eval')
+      .description('Add an online eval config to the project')
+      .option('--name <name>', 'Config name [non-interactive]')
+      .option('-a, --agent <name>', 'Agent to monitor [non-interactive]')
+      .option('-e, --evaluator <evaluators...>', 'Evaluator name(s), Builtin.* IDs, or ARNs [non-interactive]')
+      .option('--evaluator-arn <arns...>', 'Evaluator ARN(s) [non-interactive]')
+      .option('--sampling-rate <rate>', 'Sampling percentage (0.01-100) [non-interactive]')
+      .option('--enable-on-create', 'Enable evaluation immediately after deploy [non-interactive]')
+      .option('--json', 'Output as JSON [non-interactive]')
+      .action(
+        async (cliOptions: {
+          name?: string;
+          agent?: string;
+          evaluator?: string[];
+          evaluatorArn?: string[];
+          samplingRate?: string;
+          enableOnCreate?: boolean;
+          json?: boolean;
+        }) => {
+          try {
+            if (!findConfigRoot()) {
+              console.error('No agentcore project found. Run `agentcore create` first.');
+              process.exit(1);
+            }
+
+            if (cliOptions.name || cliOptions.json) {
+              // Merge --evaluator and --evaluator-arn into a single list
+              const allEvaluators = [...(cliOptions.evaluator ?? []), ...(cliOptions.evaluatorArn ?? [])];
+
+              if (!cliOptions.name || !cliOptions.agent || allEvaluators.length === 0 || !cliOptions.samplingRate) {
+                const error =
+                  '--name, --agent, --evaluator (and/or --evaluator-arn), and --sampling-rate are all required in non-interactive mode';
+                if (cliOptions.json) {
+                  console.log(JSON.stringify({ success: false, error }));
+                } else {
+                  console.error(error);
+                }
+                process.exit(1);
+              }
+
+              const samplingRate = parseFloat(cliOptions.samplingRate);
+              if (isNaN(samplingRate) || samplingRate < 0.01 || samplingRate > 100) {
+                const error = `Invalid --sampling-rate "${cliOptions.samplingRate}". Must be a number between 0.01 and 100`;
+                if (cliOptions.json) {
+                  console.log(JSON.stringify({ success: false, error }));
+                } else {
+                  console.error(error);
+                }
+                process.exit(1);
+              }
+
+              const result = await this.add({
+                name: cliOptions.name,
+                agent: cliOptions.agent,
+                evaluators: allEvaluators,
+                samplingRate,
+                enableOnCreate: cliOptions.enableOnCreate,
+              });
+
+              if (cliOptions.json) {
+                console.log(JSON.stringify(result));
+              } else if (result.success) {
+                console.log(`Added online eval config '${result.configName}'`);
+              } else {
+                console.error(result.error);
+              }
+              process.exit(result.success ? 0 : 1);
+            } else {
+              // TUI fallback
+              const [{ render }, { default: React }, { AddFlow }] = await Promise.all([
+                import('ink'),
+                import('react'),
+                import('../tui/screens/add/AddFlow'),
+              ]);
+              const { clear, unmount } = render(
+                React.createElement(AddFlow, {
+                  isInteractive: false,
+                  onExit: () => {
+                    clear();
+                    unmount();
+                    process.exit(0);
+                  },
+                })
+              );
+            }
+          } catch (error) {
+            if (cliOptions.json) {
+              console.log(JSON.stringify({ success: false, error: getErrorMessage(error) }));
+            } else {
+              console.error(getErrorMessage(error));
+            }
+            process.exit(1);
+          }
+        }
+      );
+
+    this.registerRemoveSubcommand(removeCmd);
+  }
+
+  addScreen(): AddScreenComponent {
+    return null;
+  }
+
+  private async createOnlineEvalConfig(options: AddOnlineEvalConfigOptions): Promise<OnlineEvalConfig> {
+    const project = await this.readProjectSpec();
+
+    this.checkDuplicate(project.onlineEvalConfigs, options.name, 'Online eval config');
+
+    const config: OnlineEvalConfig = {
+      type: 'OnlineEvaluationConfig',
+      name: options.name,
+      agent: options.agent,
+      evaluators: options.evaluators,
+      samplingRate: options.samplingRate,
+      ...(options.enableOnCreate !== undefined && { enableOnCreate: options.enableOnCreate }),
+    };
+
+    project.onlineEvalConfigs.push(config);
+    await this.writeProjectSpec(project);
+
+    return config;
+  }
+}
diff --git a/src/cli/primitives/__tests__/EvaluatorPrimitive.test.ts b/src/cli/primitives/__tests__/EvaluatorPrimitive.test.ts
new file mode 100644
index 00000000..6cca7305
--- /dev/null
+++ b/src/cli/primitives/__tests__/EvaluatorPrimitive.test.ts
@@ -0,0 +1,233 @@
+import type { EvaluatorConfig } from '../../../schema';
+import { EvaluatorPrimitive } from '../EvaluatorPrimitive.js';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+const mockReadProjectSpec = vi.fn();
+const mockWriteProjectSpec = vi.fn();
+
+vi.mock('../../../lib/index.js', () => ({
+  ConfigIO: class {
+    readProjectSpec = mockReadProjectSpec;
+    writeProjectSpec = mockWriteProjectSpec;
+  },
+  findConfigRoot: () => '/fake/root',
+}));
+
+const validConfig: EvaluatorConfig = {
+  llmAsAJudge: {
+    model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
+    instructions: 'Evaluate quality. Context: {context}',
+    ratingScale: {
+      numerical: [
+        { value: 1, label: 'Poor', definition: 'Fails' },
+        { value: 5, label: 'Excellent', definition: 'Perfect' },
+      ],
+    },
+  },
+};
+
+function makeProject(
+  evaluators: { name: string }[] = [],
+  onlineEvalConfigs: { name: string; evaluators: string[] }[] = []
+) {
+  return {
+    name: 'TestProject',
+    version: 1,
+    agents: [],
+    memories: [],
+    credentials: [],
+    evaluators,
+    onlineEvalConfigs,
+  };
+}
+
+const primitive = new EvaluatorPrimitive();
+
+describe('EvaluatorPrimitive', () => {
+  afterEach(() => vi.clearAllMocks());
+
+  it('has correct kind, label, and article', () => {
+    expect(primitive.kind).toBe('evaluator');
+    expect(primitive.label).toBe('Evaluator');
+    // eslint-disable-next-line @typescript-eslint/dot-notation
+    expect(primitive['article']).toBe('an');
+  });
+
+  describe('add', () => {
+    it('adds evaluator to project spec and returns success', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+      mockWriteProjectSpec.mockResolvedValue(undefined);
+
+      const result = await primitive.add({
+        name: 'MyEval',
+        level: 'SESSION',
+        config: validConfig,
+      });
+
+      expect(result.success).toBe(true);
+      expect(result).toHaveProperty('evaluatorName', 'MyEval');
+
+      const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0];
+      expect(writtenSpec.evaluators).toHaveLength(1);
+      expect(writtenSpec.evaluators[0].name).toBe('MyEval');
+      expect(writtenSpec.evaluators[0].type).toBe('CustomEvaluator');
+      expect(writtenSpec.evaluators[0].level).toBe('SESSION');
+    });
+
+    it('includes description when provided', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+      mockWriteProjectSpec.mockResolvedValue(undefined);
+
+      await primitive.add({
+        name: 'DescEval',
+        level: 'TRACE',
+        description: 'My description',
+        config: validConfig,
+      });
+
+      const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0];
+      expect(writtenSpec.evaluators[0].description).toBe('My description');
+    });
+
+    it('returns error when evaluator name already exists', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'Existing' }]));
+
+      const result = await primitive.add({
+        name: 'Existing',
+        level: 'SESSION',
+        config: validConfig,
+      });
+
+      expect(result).toEqual(
+        expect.objectContaining({ success: false, error: expect.stringContaining('already exists') })
+      );
+    });
+
+    it('returns error when readProjectSpec fails', async () => {
+      mockReadProjectSpec.mockRejectedValue(new Error('disk read error'));
+
+      const result = await primitive.add({
+        name: 'NewEval',
+        level: 'SESSION',
+        config: validConfig,
+      });
+
+      expect(result).toEqual(expect.objectContaining({ success: false, error: 'disk read error' }));
+    });
+  });
+
+  describe('remove', () => {
+    it('removes evaluator from project spec', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'EvalA' }, { name: 'EvalB' }]));
+      mockWriteProjectSpec.mockResolvedValue(undefined);
+
+      const result = await primitive.remove('EvalA');
+
+      expect(result.success).toBe(true);
+      const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0];
+      expect(writtenSpec.evaluators).toHaveLength(1);
+      expect(writtenSpec.evaluators[0].name).toBe('EvalB');
+    });
+
+    it('returns error when evaluator not found', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+
+      const result = await primitive.remove('NonExistent');
+
+      expect(result.success).toBe(false);
+      if (!result.success) {
+        expect(result.error).toContain('NonExistent');
+        expect(result.error).toContain('not found');
+      }
+    });
+
+    it('blocks removal when referenced by online eval configs', async () => {
+      mockReadProjectSpec.mockResolvedValue(
+        makeProject([{ name: 'UsedEval' }], [{ name: 'MyOnlineConfig', evaluators: ['UsedEval'] }])
+      );
+
+      const result = await primitive.remove('UsedEval');
+
+      expect(result.success).toBe(false);
+      if (!result.success) {
+        expect(result.error).toContain('referenced by online eval config');
+        expect(result.error).toContain('MyOnlineConfig');
+      }
+      expect(mockWriteProjectSpec).not.toHaveBeenCalled();
+    });
+
+    it('returns error when readProjectSpec fails', async () => {
+      mockReadProjectSpec.mockRejectedValue(new Error('io error'));
+
+      const result = await primitive.remove('Whatever');
+
+      expect(result.success).toBe(false);
+      if (!result.success) {
+        expect(result.error).toBe('io error');
+      }
+    });
+  });
+
+  describe('previewRemove', () => {
+    it('returns preview with schema changes', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'EvalA' }]));
+
+      const preview = await primitive.previewRemove('EvalA');
+
+      expect(preview.summary[0]).toContain('Removing evaluator: EvalA');
+      expect(preview.schemaChanges).toHaveLength(1);
+      expect(preview.schemaChanges[0]!.file).toBe('agentcore/agentcore.json');
+      expect((preview.schemaChanges[0]!.after as { evaluators: unknown[] }).evaluators).toHaveLength(0);
+    });
+
+    it('throws when evaluator not found', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+
+      await expect(primitive.previewRemove('Missing')).rejects.toThrow('not found');
+    });
+
+    it('warns when evaluator is referenced by online eval configs', async () => {
+      mockReadProjectSpec.mockResolvedValue(
+        makeProject([{ name: 'UsedEval' }], [{ name: 'Config1', evaluators: ['UsedEval'] }])
+      );
+
+      const preview = await primitive.previewRemove('UsedEval');
+
+      const blocked = preview.summary.find(s => s.includes('Blocked'));
+      expect(blocked).toBeDefined();
+      expect(blocked).toContain('Config1');
+    });
+  });
+
+  describe('getRemovable', () => {
+    it('returns evaluator names', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'A' }, { name: 'B' }]));
+
+      const result = await primitive.getRemovable();
+
+      expect(result).toEqual([{ name: 'A' }, { name: 'B' }]);
+    });
+
+    it('returns empty array on error', async () => {
+      mockReadProjectSpec.mockRejectedValue(new Error('fail'));
+
+      expect(await primitive.getRemovable()).toEqual([]);
+    });
+  });
+
+  describe('getAllNames', () => {
+    it('returns evaluator names as strings', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'X' }, { name: 'Y' }]));
+
+      const result = await primitive.getAllNames();
+
+      expect(result).toEqual(['X', 'Y']);
+    });
+
+    it('returns empty array on error', async () => {
+      mockReadProjectSpec.mockRejectedValue(new Error('fail'));
+
+      expect(await primitive.getAllNames()).toEqual([]);
+    });
+  });
+});
diff --git a/src/cli/primitives/__tests__/OnlineEvalConfigPrimitive.test.ts b/src/cli/primitives/__tests__/OnlineEvalConfigPrimitive.test.ts
new file mode 100644
index 00000000..badcce2a
--- /dev/null
+++ b/src/cli/primitives/__tests__/OnlineEvalConfigPrimitive.test.ts
@@ -0,0 +1,242 @@
+import { OnlineEvalConfigPrimitive } from '../OnlineEvalConfigPrimitive.js';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+const mockReadProjectSpec = vi.fn();
+const mockWriteProjectSpec = vi.fn();
+
+vi.mock('../../../lib/index.js', () => ({
+  ConfigIO: class {
+    readProjectSpec = mockReadProjectSpec;
+    writeProjectSpec = mockWriteProjectSpec;
+  },
+  findConfigRoot: () => '/fake/root',
+}));
+
+function makeProject(
+  onlineEvalConfigs: { name: string; evaluators: string[] }[] = [],
+  evaluators: { name: string }[] = []
+) {
+  return {
+    name: 'TestProject',
+    version: 1,
+    agents: [],
+    memories: [],
+    credentials: [],
+    evaluators,
+    onlineEvalConfigs,
+  };
+}
+
+const primitive = new OnlineEvalConfigPrimitive();
+
+describe('OnlineEvalConfigPrimitive', () => {
+  afterEach(() => vi.clearAllMocks());
+
+  it('has correct kind, label, and article', () => {
+    expect(primitive.kind).toBe('online-eval');
+    expect(primitive.label).toBe('Online Eval Config');
+    // eslint-disable-next-line @typescript-eslint/dot-notation
+    expect(primitive['article']).toBe('an');
+  });
+
+  describe('add', () => {
+    it('adds config to project spec and returns success', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+      mockWriteProjectSpec.mockResolvedValue(undefined);
+
+      const result = await primitive.add({
+        name: 'MyConfig',
+        agent: 'MyAgent',
+        evaluators: ['Builtin.GoalSuccessRate'],
+        samplingRate: 10,
+      });
+
+      expect(result.success).toBe(true);
+      expect(result).toHaveProperty('configName', 'MyConfig');
+
+      const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0];
+      expect(writtenSpec.onlineEvalConfigs).toHaveLength(1);
+      const config = writtenSpec.onlineEvalConfigs[0];
+      expect(config.type).toBe('OnlineEvaluationConfig');
+      expect(config.name).toBe('MyConfig');
+      expect(config.evaluators).toEqual(['Builtin.GoalSuccessRate']);
+      expect(config.samplingRate).toBe(10);
+    });
+
+    it('stores enableOnCreate when provided', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+      mockWriteProjectSpec.mockResolvedValue(undefined);
+
+      const result = await primitive.add({
+        name: 'EnabledConfig',
+        agent: 'MyAgent',
+        evaluators: ['Builtin.GoalSuccessRate'],
+        samplingRate: 10,
+        enableOnCreate: true,
+      });
+
+      expect(result.success).toBe(true);
+      const config = mockWriteProjectSpec.mock.calls[0]![0].onlineEvalConfigs[0];
+      expect(config.enableOnCreate).toBe(true);
+    });
+
+    it('omits enableOnCreate when not provided', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+      mockWriteProjectSpec.mockResolvedValue(undefined);
+
+      await primitive.add({
+        name: 'NoEnableConfig',
+        agent: 'MyAgent',
+        evaluators: ['Builtin.GoalSuccessRate'],
+        samplingRate: 10,
+      });
+
+      const config = mockWriteProjectSpec.mock.calls[0]![0].onlineEvalConfigs[0];
+      expect(config.enableOnCreate).toBeUndefined();
+    });
+
+    it('supports multiple evaluators including ARNs', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+      mockWriteProjectSpec.mockResolvedValue(undefined);
+
+      const result = await primitive.add({
+        name: 'MultiConfig',
+        agent: 'MyAgent',
+        evaluators: ['Builtin.GoalSuccessRate', 'CustomEval', 'arn:aws:bedrock:us-east-1:123:evaluator/ext'],
+        samplingRate: 50,
+      });
+
+      expect(result.success).toBe(true);
+      const config = mockWriteProjectSpec.mock.calls[0]![0].onlineEvalConfigs[0];
+      expect(config.evaluators).toEqual([
+        'Builtin.GoalSuccessRate',
+        'CustomEval',
+        'arn:aws:bedrock:us-east-1:123:evaluator/ext',
+      ]);
+    });
+
+    it('returns error when config name already exists', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'Existing', evaluators: ['e'] }]));
+
+      const result = await primitive.add({
+        name: 'Existing',
+        agent: 'MyAgent',
+        evaluators: ['e'],
+        samplingRate: 10,
+      });
+
+      expect(result).toEqual(
+        expect.objectContaining({ success: false, error: expect.stringContaining('already exists') })
+      );
+    });
+
+    it('returns error when readProjectSpec fails', async () => {
+      mockReadProjectSpec.mockRejectedValue(new Error('no project'));
+
+      const result = await primitive.add({
+        name: 'New',
+        agent: 'MyAgent',
+        evaluators: ['e'],
+        samplingRate: 10,
+      });
+
+      expect(result).toEqual(expect.objectContaining({ success: false, error: 'no project' }));
+    });
+  });
+
+  describe('remove', () => {
+    it('removes config from project spec', async () => {
+      mockReadProjectSpec.mockResolvedValue(
+        makeProject([
+          { name: 'ConfigA', evaluators: ['e'] },
+          { name: 'ConfigB', evaluators: ['f'] },
+        ])
+      );
+      mockWriteProjectSpec.mockResolvedValue(undefined);
+
+      const result = await primitive.remove('ConfigA');
+
+      expect(result.success).toBe(true);
+      const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0];
+      expect(writtenSpec.onlineEvalConfigs).toHaveLength(1);
+      expect(writtenSpec.onlineEvalConfigs[0].name).toBe('ConfigB');
+    });
+
+    it('returns error when config not found', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+
+      const result = await primitive.remove('NonExistent');
+
+      expect(result.success).toBe(false);
+      if (!result.success) {
+        expect(result.error).toContain('NonExistent');
+        expect(result.error).toContain('not found');
+      }
+    });
+
+    it('returns error when readProjectSpec fails', async () => {
+      mockReadProjectSpec.mockRejectedValue(new Error('io error'));
+
+      const result = await primitive.remove('Whatever');
+
+      expect(result.success).toBe(false);
+      if (!result.success) {
+        expect(result.error).toBe('io error');
+      }
+    });
+  });
+
+  describe('previewRemove', () => {
+    it('returns preview with summary including evaluators', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'Config1', evaluators: ['Builtin.X', 'CustomY'] }]));
+
+      const preview = await primitive.previewRemove('Config1');
+
+      expect(preview.summary).toContain('Removing online eval config: Config1');
+      expect(preview.summary).toContain('Uses evaluators: Builtin.X, CustomY');
+      expect(preview.schemaChanges).toHaveLength(1);
+      expect((preview.schemaChanges[0]!.after as { onlineEvalConfigs: unknown[] }).onlineEvalConfigs).toHaveLength(0);
+    });
+
+    it('throws when config not found', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+
+      await expect(primitive.previewRemove('Missing')).rejects.toThrow('not found');
+    });
+  });
+
+  describe('getRemovable', () => {
+    it('returns config names', async () => {
+      mockReadProjectSpec.mockResolvedValue(
+        makeProject([
+          { name: 'C1', evaluators: ['e'] },
+          { name: 'C2', evaluators: ['f'] },
+        ])
+      );
+
+      const result = await primitive.getRemovable();
+
+      expect(result).toEqual([{ name: 'C1' }, { name: 'C2' }]);
+    });
+
+    it('returns empty array on error', async () => {
+      mockReadProjectSpec.mockRejectedValue(new Error('fail'));
+
+      expect(await primitive.getRemovable()).toEqual([]);
+    });
+  });
+
+  describe('getAllNames', () => {
+    it('returns config names as strings', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'X', evaluators: ['e'] }]));
+
+      expect(await primitive.getAllNames()).toEqual(['X']);
+    });
+
+    it('returns empty array on error', async () => {
+      mockReadProjectSpec.mockRejectedValue(new Error('fail'));
+
+      expect(await primitive.getAllNames()).toEqual([]);
+    });
+  });
+});
diff --git a/src/cli/primitives/index.ts b/src/cli/primitives/index.ts
index 0c995da6..2ef948e5 100644
--- a/src/cli/primitives/index.ts
+++ b/src/cli/primitives/index.ts
@@ -2,6 +2,8 @@ export { BasePrimitive } from './BasePrimitive';
 export { MemoryPrimitive } from './MemoryPrimitive';
 export { CredentialPrimitive } from './CredentialPrimitive';
 export { AgentPrimitive } from './AgentPrimitive';
+export { EvaluatorPrimitive } from './EvaluatorPrimitive';
+export { OnlineEvalConfigPrimitive } from './OnlineEvalConfigPrimitive';
 export { GatewayPrimitive } from './GatewayPrimitive';
 export { GatewayTargetPrimitive } from './GatewayTargetPrimitive';
 export {
@@ -9,6 +11,8 @@ export {
   agentPrimitive,
   memoryPrimitive,
   credentialPrimitive,
+  evaluatorPrimitive,
+  onlineEvalConfigPrimitive,
   gatewayPrimitive,
   gatewayTargetPrimitive,
   getPrimitive,
diff --git a/src/cli/primitives/registry.ts b/src/cli/primitives/registry.ts
index 4dd33b4d..290e0d60 100644
--- a/src/cli/primitives/registry.ts
+++ b/src/cli/primitives/registry.ts
@@ -1,9 +1,11 @@
 import { AgentPrimitive } from './AgentPrimitive';
 import type { BasePrimitive } from './BasePrimitive';
 import { CredentialPrimitive } from './CredentialPrimitive';
+import { EvaluatorPrimitive } from './EvaluatorPrimitive';
 import { GatewayPrimitive } from './GatewayPrimitive';
 import { GatewayTargetPrimitive } from './GatewayTargetPrimitive';
 import { MemoryPrimitive } from './MemoryPrimitive';
+import { OnlineEvalConfigPrimitive } from './OnlineEvalConfigPrimitive';
 import type { RemovableResource } from './types';
 
 /**
@@ -12,6 +14,8 @@ import type { RemovableResource } from './types';
 export const agentPrimitive = new AgentPrimitive();
 export const memoryPrimitive = new MemoryPrimitive();
 export const credentialPrimitive = new CredentialPrimitive();
+export const evaluatorPrimitive = new EvaluatorPrimitive();
+export const onlineEvalConfigPrimitive = new OnlineEvalConfigPrimitive();
 export const gatewayPrimitive = new GatewayPrimitive();
 export const gatewayTargetPrimitive = new GatewayTargetPrimitive();
 
@@ -22,6 +26,8 @@ export const ALL_PRIMITIVES: BasePrimitive<unknown, RemovableResource>[] = [
   agentPrimitive,
   memoryPrimitive,
   credentialPrimitive,
+  evaluatorPrimitive,
+  onlineEvalConfigPrimitive,
   gatewayPrimitive,
   gatewayTargetPrimitive,
 ];
diff --git a/src/cli/tui/App.tsx b/src/cli/tui/App.tsx
index 23511d9d..e447db72 100644
--- a/src/cli/tui/App.tsx
+++ b/src/cli/tui/App.tsx
@@ -7,10 +7,13 @@ import { AddFlow } from './screens/add/AddFlow';
 import { CreateScreen } from './screens/create';
 import { DeployScreen } from './screens/deploy/DeployScreen';
 import { DevScreen } from './screens/dev/DevScreen';
+import { EvalHubScreen, EvalScreen } from './screens/eval';
 import { HelpScreen, HomeScreen } from './screens/home';
 import { InvokeScreen } from './screens/invoke';
+import { OnlineEvalDashboard } from './screens/online-eval';
 import { PackageScreen } from './screens/package';
 import { RemoveFlow } from './screens/remove';
+import { RunEvalFlow, RunScreen } from './screens/run-eval';
 import { StatusScreen } from './screens/status/StatusScreen';
 import { UpdateScreen } from './screens/update';
 import { ValidateScreen } from './screens/validate';
@@ -32,6 +35,11 @@ type Route =
   | { name: 'add' }
   | { name: 'status' }
   | { name: 'remove' }
+  | { name: 'run' }
+  | { name: 'run-eval'; from?: 'run' | 'eval' }
+  | { name: 'eval' }
+  | { name: 'eval-runs' }
+  | { name: 'online-evals' }
   | { name: 'validate' }
   | { name: 'package' }
   | { name: 'update' };
@@ -84,6 +92,10 @@ function AppContent() {
       setRoute({ name: 'add' });
     } else if (id === 'remove') {
       setRoute({ name: 'remove' });
+    } else if (id === 'run') {
+      setRoute({ name: 'run' });
+    } else if (id === 'eval') {
+      setRoute({ name: 'eval' });
     } else if (id === 'validate') {
       setRoute({ name: 'validate' });
     } else if (id === 'package') {
@@ -179,6 +191,46 @@ function AppContent() {
     );
   }
 
+  if (route.name === 'run') {
+    return (
+      <RunScreen
+        onRunEval={() => setRoute({ name: 'run-eval', from: 'run' })}
+        onExit={() => setRoute({ name: 'help' })}
+      />
+    );
+  }
+
+  if (route.name === 'eval') {
+    return (
+      <EvalHubScreen
+        onSelect={view => {
+          if (view === 'run-eval') setRoute({ name: 'run-eval', from: 'eval' });
+          if (view === 'runs') setRoute({ name: 'eval-runs' });
+          if (view === 'online-dashboard') setRoute({ name: 'online-evals' });
+        }}
+        onExit={() => setRoute({ name: 'help' })}
+      />
+    );
+  }
+
+  if (route.name === 'run-eval') {
+    const backRoute = route.from ?? 'eval';
+    return (
+      <RunEvalFlow
+        onExit={() => setRoute({ name: backRoute } as Route)}
+        onViewRuns={() => setRoute({ name: 'eval-runs' })}
+      />
+    );
+  }
+
+  if (route.name === 'eval-runs') {
+    return <EvalScreen isInteractive={true} onExit={() => setRoute({ name: 'eval' })} />;
+  }
+
+  if (route.name === 'online-evals') {
+    return <OnlineEvalDashboard isInteractive={true} onExit={() => setRoute({ name: 'eval' })} />;
+  }
+
   if (route.name === 'validate') {
     return <ValidateScreen isInteractive={true} onExit={() => setRoute({ name: 'help' })} />;
   }
diff --git a/src/cli/tui/components/ResourceGraph.tsx b/src/cli/tui/components/ResourceGraph.tsx
index 816a96cc..11a5bce4 100644
--- a/src/cli/tui/components/ResourceGraph.tsx
+++ b/src/cli/tui/components/ResourceGraph.tsx
@@ -16,6 +16,8 @@ const ICONS = {
   gateway: '◆',
   tool: '⚙',
   runtime: '▶',
+  evaluator: '✦',
+  'online-eval': '↻',
 } as const;
 
 interface ResourceGraphProps {
@@ -103,6 +105,8 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res
   const agents = agentName ? allAgents.filter(a => a.name === agentName) : allAgents;
   const memories = project.memories ?? [];
   const credentials = project.credentials ?? [];
+  const evaluators = project.evaluators ?? [];
+  const onlineEvalConfigs = project.onlineEvalConfigs ?? [];
   const gateways = mcp?.agentCoreGateways ?? [];
   const mcpRuntimeTools = mcp?.mcpRuntimeTools ?? [];
   const unassignedTargets = mcp?.unassignedTargets ?? [];
@@ -128,6 +132,8 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res
     agents.length > 0 ||
     memories.length > 0 ||
     credentials.length > 0 ||
+    evaluators.length > 0 ||
+    onlineEvalConfigs.length > 0 ||
     gateways.length > 0 ||
     mcpRuntimeTools.length > 0 ||
     unassignedTargets.length > 0 ||
@@ -209,6 +215,55 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res
         </Box>
       )}
 
+      {/* Evaluators */}
+      {evaluators.length > 0 && (
+        <Box flexDirection="column">
+          <SectionHeader>Evaluators</SectionHeader>
+          {evaluators.map(evaluator => {
+            const rsEntry = statusMap.get(`evaluator:${evaluator.name}`);
+            const evalStatus = rsEntry?.error ? 'error' : undefined;
+            const evalStatusColor = rsEntry?.error ? 'red' : undefined;
+            return (
+              <ResourceRow
+                key={evaluator.name}
+                icon={ICONS.evaluator}
+                color="cyan"
+                name={evaluator.name}
+                detail={rsEntry?.detail ?? `${evaluator.level} — LLM-as-a-Judge`}
+                status={evalStatus}
+                statusColor={evalStatusColor}
+                deploymentState={rsEntry?.deploymentState}
+                identifier={rsEntry?.identifier}
+              />
+            );
+          })}
+        </Box>
+      )}
+
+      {/* Online Eval Configs */}
+      {onlineEvalConfigs.length > 0 && (
+        <Box flexDirection="column">
+          <SectionHeader>Online Eval Configs</SectionHeader>
+          {onlineEvalConfigs.map(config => {
+            const rsEntry = statusMap.get(`online-eval:${config.name}`);
+            const defaultDetail = `${config.evaluators.length} evaluator${config.evaluators.length !== 1 ? 's' : ''} — ${config.samplingRate}% sampling`;
+            return (
+              <ResourceRow
+                key={config.name}
+                icon={ICONS['online-eval']}
+                color="magenta"
+                name={config.name}
+                detail={rsEntry?.detail ?? defaultDetail}
+                status={rsEntry?.error ? 'error' : undefined}
+                statusColor={rsEntry?.error ? 'red' : undefined}
+                deploymentState={rsEntry?.deploymentState}
+                identifier={rsEntry?.identifier}
+              />
+            );
+          })}
+        </Box>
+      )}
+
       {/* Removed locally — still deployed in AWS, will be torn down on next deploy */}
       {pendingRemovals.length > 0 && (
         <Box flexDirection="column">
@@ -301,6 +356,8 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res
           <Text color="green">{ICONS.agent}</Text> agent{'  '}
           <Text color="blue">{ICONS.memory}</Text> memory{'  '}
           <Text color="yellow">{ICONS.credential}</Text> credential{'  '}
+          <Text color="cyan">{ICONS.evaluator}</Text> evaluator{'  '}
+          <Text color="magenta">{ICONS['online-eval']}</Text> online-eval{'  '}
           <Text color="magenta">{ICONS.gateway}</Text> gateway
         </Text>
         {resourceStatuses && resourceStatuses.length > 0 && (
diff --git a/src/cli/tui/components/SelectList.tsx b/src/cli/tui/components/SelectList.tsx
index e69c19b0..45a41952 100644
--- a/src/cli/tui/components/SelectList.tsx
+++ b/src/cli/tui/components/SelectList.tsx
@@ -29,7 +29,7 @@ export function SelectList<T extends SelectableItem>(props: {
         const disabled = item.disabled ?? false;
         return (
           <Box key={item.id}>
-            <Text wrap="truncate">
+            <Text wrap="wrap">
               <Text color={selected && !disabled ? 'cyan' : undefined} dimColor={disabled}>
                 {selected ? '❯' : ' '}{' '}
               </Text>
diff --git a/src/cli/tui/copy.ts b/src/cli/tui/copy.ts
index 507365da..8e86a0b6 100644
--- a/src/cli/tui/copy.ts
+++ b/src/cli/tui/copy.ts
@@ -40,6 +40,11 @@ export const COMMAND_DESCRIPTIONS = {
   remove: 'Remove AgentCore resources and project',
   status: 'Retrieve details of deployed AgentCore resources.',
   traces: 'View and download agent traces.',
+  eval: 'Manage evaluations and view history.',
+  pause: 'Pause a running resource.',
+  resume: 'Resume a paused resource.',
+  stop: 'Stop and delete a running resource.',
+  run: 'Run operations (eval, etc.).',
   update: 'Check for and install CLI updates',
   validate: 'Validate agentcore/ config files.',
 } as const;
diff --git a/src/cli/tui/hooks/useCreateEvaluator.ts b/src/cli/tui/hooks/useCreateEvaluator.ts
new file mode 100644
index 00000000..bf3015bd
--- /dev/null
+++ b/src/cli/tui/hooks/useCreateEvaluator.ts
@@ -0,0 +1,56 @@
+import type { EvaluatorConfig } from '../../../schema';
+import { evaluatorPrimitive } from '../../primitives/registry';
+import { useCallback, useEffect, useState } from 'react';
+
+interface CreateEvaluatorConfig {
+  name: string;
+  level: string;
+  config: EvaluatorConfig;
+}
+
+export function useCreateEvaluator() {
+  const [status, setStatus] = useState<{ state: 'idle' | 'loading' | 'success' | 'error'; error?: string }>({
+    state: 'idle',
+  });
+
+  const create = useCallback(async (config: CreateEvaluatorConfig) => {
+    setStatus({ state: 'loading' });
+    try {
+      const addResult = await evaluatorPrimitive.add({
+        name: config.name,
+        level: config.level as 'SESSION' | 'TRACE' | 'TOOL_CALL',
+        config: config.config,
+      });
+      if (!addResult.success) {
+        throw new Error(addResult.error ?? 'Failed to create evaluator');
+      }
+      setStatus({ state: 'success' });
+      return { ok: true as const, evaluatorName: config.name };
+    } catch (err) {
+      const message = err instanceof Error ? err.message : 'Failed to create evaluator.';
+      setStatus({ state: 'error', error: message });
+      return { ok: false as const, error: message };
+    }
+  }, []);
+
+  const reset = useCallback(() => {
+    setStatus({ state: 'idle' });
+  }, []);
+
+  return { status, createEvaluator: create, reset };
+}
+
+export function useExistingEvaluatorNames() {
+  const [names, setNames] = useState<string[]>([]);
+
+  useEffect(() => {
+    void evaluatorPrimitive.getAllNames().then(setNames);
+  }, []);
+
+  const refresh = useCallback(async () => {
+    const result = await evaluatorPrimitive.getAllNames();
+    setNames(result);
+  }, []);
+
+  return { names, refresh };
+}
diff --git a/src/cli/tui/hooks/useCreateOnlineEval.ts b/src/cli/tui/hooks/useCreateOnlineEval.ts
new file mode 100644
index 00000000..2d019055
--- /dev/null
+++ b/src/cli/tui/hooks/useCreateOnlineEval.ts
@@ -0,0 +1,59 @@
+import { onlineEvalConfigPrimitive } from '../../primitives/registry';
+import { useCallback, useEffect, useState } from 'react';
+
+interface CreateOnlineEvalConfig {
+  name: string;
+  agent: string;
+  evaluators: string[];
+  samplingRate: number;
+  enableOnCreate: boolean;
+}
+
+export function useCreateOnlineEval() {
+  const [status, setStatus] = useState<{ state: 'idle' | 'loading' | 'success' | 'error'; error?: string }>({
+    state: 'idle',
+  });
+
+  const create = useCallback(async (config: CreateOnlineEvalConfig) => {
+    setStatus({ state: 'loading' });
+    try {
+      const addResult = await onlineEvalConfigPrimitive.add({
+        name: config.name,
+        agent: config.agent,
+        evaluators: config.evaluators,
+        samplingRate: config.samplingRate,
+        enableOnCreate: config.enableOnCreate,
+      });
+      if (!addResult.success) {
+        throw new Error(addResult.error ?? 'Failed to create online eval config');
+      }
+      setStatus({ state: 'success' });
+      return { ok: true as const, configName: config.name };
+    } catch (err) {
+      const message = err instanceof Error ? err.message : 'Failed to create online eval config.';
+      setStatus({ state: 'error', error: message });
+      return { ok: false as const, error: message };
+    }
+  }, []);
+
+  const reset = useCallback(() => {
+    setStatus({ state: 'idle' });
+  }, []);
+
+  return { status, createOnlineEval: create, reset };
+}
+
+export function useExistingOnlineEvalNames() {
+  const [names, setNames] = useState<string[]>([]);
+
+  useEffect(() => {
+    void onlineEvalConfigPrimitive.getAllNames().then(setNames);
+  }, []);
+
+  const refresh = useCallback(async () => {
+    const result = await onlineEvalConfigPrimitive.getAllNames();
+    setNames(result);
+  }, []);
+
+  return { names, refresh };
+}
diff --git a/src/cli/tui/hooks/useRemove.ts b/src/cli/tui/hooks/useRemove.ts
index dd6b5468..31a7519f 100644
--- a/src/cli/tui/hooks/useRemove.ts
+++ b/src/cli/tui/hooks/useRemove.ts
@@ -6,9 +6,11 @@ import type { RemovableMemory } from '../../primitives/MemoryPrimitive';
 import {
   agentPrimitive,
   credentialPrimitive,
+  evaluatorPrimitive,
   gatewayPrimitive,
   gatewayTargetPrimitive,
   memoryPrimitive,
+  onlineEvalConfigPrimitive,
 } from '../../primitives/registry';
 import { useCallback, useEffect, useRef, useState } from 'react';
 
@@ -117,6 +119,16 @@ export function useRemovableIdentities() {
   return { identities, ...rest };
 }
 
+export function useRemovableEvaluators() {
+  const { items: evaluators, ...rest } = useRemovableResources(() => evaluatorPrimitive.getRemovable());
+  return { evaluators, ...rest };
+}
+
+export function useRemovableOnlineEvalConfigs() {
+  const { items: onlineEvalConfigs, ...rest } = useRemovableResources(() => onlineEvalConfigPrimitive.getRemovable());
+  return { onlineEvalConfigs, ...rest };
+}
+
 // ============================================================================
 // Preview Hook
 // ============================================================================
@@ -172,6 +184,14 @@ export function useRemovalPreview() {
     (name: string) => loadPreview(n => credentialPrimitive.previewRemove(n), name),
     [loadPreview]
   );
+  const loadEvaluatorPreview = useCallback(
+    (name: string) => loadPreview(n => evaluatorPrimitive.previewRemove(n), name),
+    [loadPreview]
+  );
+  const loadOnlineEvalPreview = useCallback(
+    (name: string) => loadPreview(n => onlineEvalConfigPrimitive.previewRemove(n), name),
+    [loadPreview]
+  );
 
   const reset = useCallback(() => {
     setState({ isLoading: false, preview: null, error: null });
@@ -184,6 +204,8 @@ export function useRemovalPreview() {
     loadGatewayTargetPreview,
     loadMemoryPreview,
     loadIdentityPreview,
+    loadEvaluatorPreview,
+    loadOnlineEvalPreview,
     reset,
   };
 }
@@ -238,3 +260,19 @@ export function useRemoveIdentity() {
     name => name
   );
 }
+
+export function useRemoveEvaluator() {
+  return useRemoveResource(
+    (name: string) => evaluatorPrimitive.remove(name),
+    'evaluator',
+    name => name
+  );
+}
+
+export function useRemoveOnlineEvalConfig() {
+  return useRemoveResource(
+    (name: string) => onlineEvalConfigPrimitive.remove(name),
+    'online-eval',
+    name => name
+  );
+}
diff --git a/src/cli/tui/screens/add/AddFlow.tsx b/src/cli/tui/screens/add/AddFlow.tsx
index 690d25af..35926ad9 100644
--- a/src/cli/tui/screens/add/AddFlow.tsx
+++ b/src/cli/tui/screens/add/AddFlow.tsx
@@ -6,9 +6,11 @@ import { AddAgentFlow } from '../agent/AddAgentFlow';
 import type { AddAgentConfig } from '../agent/types';
 import { FRAMEWORK_OPTIONS } from '../agent/types';
 import { useAddAgent } from '../agent/useAddAgent';
+import { AddEvaluatorFlow } from '../evaluator';
 import { AddIdentityFlow } from '../identity';
 import { AddGatewayFlow, AddGatewayTargetFlow } from '../mcp';
 import { AddMemoryFlow } from '../memory/AddMemoryFlow';
+import { AddOnlineEvalFlow } from '../online-eval';
 import type { AddResourceType } from './AddScreen';
 import { AddScreen } from './AddScreen';
 import { AddSuccessScreen } from './AddSuccessScreen';
@@ -23,6 +25,8 @@ type FlowState =
   | { name: 'tool-wizard' }
   | { name: 'memory-wizard' }
   | { name: 'identity-wizard' }
+  | { name: 'evaluator-wizard' }
+  | { name: 'online-eval-wizard' }
   | {
       name: 'agent-create-success';
       agentName: string;
@@ -172,6 +176,12 @@ export function AddFlow(props: AddFlowProps) {
       case 'identity':
         setFlow({ name: 'identity-wizard' });
         break;
+      case 'evaluator':
+        setFlow({ name: 'evaluator-wizard' });
+        break;
+      case 'online-eval':
+        setFlow({ name: 'online-eval-wizard' });
+        break;
     }
   }, []);
 
@@ -366,6 +376,32 @@ export function AddFlow(props: AddFlowProps) {
     );
   }
 
+  // Evaluator wizard
+  if (flow.name === 'evaluator-wizard') {
+    return (
+      <AddEvaluatorFlow
+        isInteractive={props.isInteractive}
+        onExit={props.onExit}
+        onBack={() => setFlow({ name: 'select' })}
+        onDev={props.onDev}
+        onDeploy={props.onDeploy}
+      />
+    );
+  }
+
+  // Online eval config wizard
+  if (flow.name === 'online-eval-wizard') {
+    return (
+      <AddOnlineEvalFlow
+        isInteractive={props.isInteractive}
+        onExit={props.onExit}
+        onBack={() => setFlow({ name: 'select' })}
+        onDev={props.onDev}
+        onDeploy={props.onDeploy}
+      />
+    );
+  }
+
   return (
     <ErrorPrompt
       message="Failed to add resource"
diff --git a/src/cli/tui/screens/add/AddScreen.tsx b/src/cli/tui/screens/add/AddScreen.tsx
index a96fbb53..eabc98af 100644
--- a/src/cli/tui/screens/add/AddScreen.tsx
+++ b/src/cli/tui/screens/add/AddScreen.tsx
@@ -5,6 +5,8 @@ const ADD_RESOURCES = [
   { id: 'agent', title: 'Agent', description: 'New or existing agent code' },
   { id: 'memory', title: 'Memory', description: 'Persistent context storage' },
   { id: 'identity', title: 'Identity', description: 'API key credential providers' },
+  { id: 'evaluator', title: 'Evaluator', description: 'Custom LLM-as-a-Judge evaluator' },
+  { id: 'online-eval', title: 'Online Eval Config', description: 'Continuous evaluation pipeline' },
   { id: 'gateway', title: 'Gateway', description: 'Route and manage gateway targets' },
   { id: 'gateway-target', title: 'Gateway Target', description: 'Extend agent capabilities' },
 ] as const;
diff --git a/src/cli/tui/screens/cli-only/CliOnlyScreen.tsx b/src/cli/tui/screens/cli-only/CliOnlyScreen.tsx
new file mode 100644
index 00000000..bd50d553
--- /dev/null
+++ b/src/cli/tui/screens/cli-only/CliOnlyScreen.tsx
@@ -0,0 +1,28 @@
+import { Screen } from '../../components';
+import { Box, Text } from 'ink';
+import React from 'react';
+
+interface CliOnlyScreenProps {
+  title: string;
+  description: string;
+  examples: string[];
+  onExit: () => void;
+}
+
+export function CliOnlyScreen({ title, description, examples, onExit }: CliOnlyScreenProps) {
+  return (
+    <Screen title={title} onExit={onExit}>
+      <Box flexDirection="column" marginTop={1}>
+        <Text>{description}</Text>
+        <Box marginTop={1} flexDirection="column">
+          <Text bold>Usage:</Text>
+          {examples.map((example, i) => (
+            <Text key={i} dimColor>
+              {'  '}$ {example}
+            </Text>
+          ))}
+        </Box>
+      </Box>
+    </Screen>
+  );
+}
diff --git a/src/cli/tui/screens/cli-only/index.ts b/src/cli/tui/screens/cli-only/index.ts
new file mode 100644
index 00000000..79a69101
--- /dev/null
+++ b/src/cli/tui/screens/cli-only/index.ts
@@ -0,0 +1 @@
+export { CliOnlyScreen } from './CliOnlyScreen';
diff --git a/src/cli/tui/screens/create/useCreateFlow.ts b/src/cli/tui/screens/create/useCreateFlow.ts
index 2a2bae57..157d3e07 100644
--- a/src/cli/tui/screens/create/useCreateFlow.ts
+++ b/src/cli/tui/screens/create/useCreateFlow.ts
@@ -74,6 +74,8 @@ function createDefaultProjectSpec(projectName: string): AgentCoreProjectSpec {
     agents: [],
     memories: [],
     credentials: [],
+    evaluators: [],
+    onlineEvalConfigs: [],
   };
 }
 
diff --git a/src/cli/tui/screens/deploy/useDeployFlow.ts b/src/cli/tui/screens/deploy/useDeployFlow.ts
index bb461ff8..2ceea8c1 100644
--- a/src/cli/tui/screens/deploy/useDeployFlow.ts
+++ b/src/cli/tui/screens/deploy/useDeployFlow.ts
@@ -4,8 +4,10 @@ import {
   buildDeployedState,
   getStackOutputs,
   parseAgentOutputs,
+  parseEvaluatorOutputs,
   parseGatewayOutputs,
   parseMemoryOutputs,
+  parseOnlineEvalOutputs,
 } from '../../../cloudformation';
 import { getErrorMessage, isChangesetInProgressError, isExpiredTokenError } from '../../../errors';
 import { ExecLogger } from '../../../logging';
@@ -257,6 +259,14 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState
       );
     }
 
+    // Parse evaluator outputs
+    const evaluatorNames = (ctx.projectSpec.evaluators ?? []).map((e: { name: string }) => e.name);
+    const evaluators = parseEvaluatorOutputs(outputs, evaluatorNames);
+
+    // Parse online eval config outputs
+    const onlineEvalNames = (ctx.projectSpec.onlineEvalConfigs ?? []).map((c: { name: string }) => c.name);
+    const onlineEvalConfigs = parseOnlineEvalOutputs(outputs, onlineEvalNames);
+
     // Expose outputs to UI
     setStackOutputs(outputs);
 
@@ -269,6 +279,8 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState
       existingState,
       identityKmsKeyArn,
       memories,
+      evaluators,
+      onlineEvalConfigs,
       credentials: Object.keys(oauthCredentials).length > 0 ? oauthCredentials : undefined,
     });
     await configIO.writeDeployedState(deployedState);
diff --git a/src/cli/tui/screens/eval/EvalHubScreen.tsx b/src/cli/tui/screens/eval/EvalHubScreen.tsx
new file mode 100644
index 00000000..67056413
--- /dev/null
+++ b/src/cli/tui/screens/eval/EvalHubScreen.tsx
@@ -0,0 +1,44 @@
+import { Screen, WizardSelect } from '../../components';
+import type { SelectableItem } from '../../components';
+import { HELP_TEXT } from '../../constants';
+import { useListNavigation } from '../../hooks';
+import React, { useMemo } from 'react';
+
+type EvalHubView = 'run-eval' | 'runs' | 'online-dashboard';
+
+interface EvalHubScreenProps {
+  onSelect: (view: EvalHubView) => void;
+  onExit: () => void;
+}
+
+export function EvalHubScreen({ onSelect, onExit }: EvalHubScreenProps) {
+  const items: SelectableItem[] = useMemo(
+    () => [
+      {
+        id: 'run-eval',
+        title: 'Run On-demand Evaluation',
+        description: 'Evaluate agent traces with selected evaluators',
+      },
+      { id: 'runs', title: 'Eval Runs', description: 'View past eval run results and scores' },
+      {
+        id: 'online-dashboard',
+        title: 'Online Eval Dashboard',
+        description: 'View and manage deployed online eval configs',
+      },
+    ],
+    []
+  );
+
+  const nav = useListNavigation({
+    items,
+    onSelect: item => onSelect(item.id as EvalHubView),
+    onExit,
+    isActive: true,
+  });
+
+  return (
+    <Screen title="Evaluations" onExit={onExit} helpText={HELP_TEXT.NAVIGATE_SELECT} exitEnabled={false}>
+      <WizardSelect title="Choose a view" items={items} selectedIndex={nav.selectedIndex} />
+    </Screen>
+  );
+}
diff --git a/src/cli/tui/screens/eval/EvalScreen.tsx b/src/cli/tui/screens/eval/EvalScreen.tsx
new file mode 100644
index 00000000..f5707738
--- /dev/null
+++ b/src/cli/tui/screens/eval/EvalScreen.tsx
@@ -0,0 +1,449 @@
+import { handleListEvalRuns } from '../../../operations/eval';
+import { getResultsPath } from '../../../operations/eval/storage';
+import type { EvalEvaluatorResult, EvalRunResult } from '../../../operations/eval/types';
+import { Panel, Screen } from '../../components';
+import { HELP_TEXT } from '../../constants';
+import { useListNavigation } from '../../hooks';
+import { STATUS_COLORS } from '../../theme';
+import { Box, Text, useInput, useStdout } from 'ink';
+import React, { useEffect, useMemo, useState } from 'react';
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Helpers
+// ─────────────────────────────────────────────────────────────────────────────
+
+const MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
+
+function formatShortDate(timestamp: string): string {
+  const d = new Date(timestamp);
+  const mon = MONTHS[d.getMonth()];
+  const day = d.getDate();
+  const h = d.getHours();
+  const m = d.getMinutes().toString().padStart(2, '0');
+  const ampm = h >= 12 ? 'PM' : 'AM';
+  const h12 = h % 12 || 12;
+  return `${mon} ${day} ${h12}:${m} ${ampm}`;
+}
+
+function formatFullDate(timestamp: string): string {
+  const d = new Date(timestamp);
+  return d.toLocaleDateString() + ' ' + d.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
+}
+
+function formatScore(score: number): string {
+  return score.toFixed(2);
+}
+
+function scoreColor(score: number): string {
+  if (score >= 0.8) return 'green';
+  if (score >= 0.5) return 'yellow';
+  return 'red';
+}
+
+/** Strip "Builtin." prefix from evaluator names for display */
+function shortEvalName(name: string): string {
+  return name.replace(/^Builtin\./, '');
+}
+
+// Chrome: title(1) + padding(2) + panel border(2) + help text(2) + padding(2)
+const CHROME_LINES = 9;
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Windowing hook — shared by agent list and runs list
+// ─────────────────────────────────────────────────────────────────────────────
+
+function useWindowedList<T>(items: T[], selectedIndex: number, availableHeight: number, linesPerItem: number) {
+  return useMemo(() => {
+    const total = items.length;
+    const baseMax = Math.max(1, Math.floor(availableHeight / linesPerItem));
+
+    let start = 0;
+    if (selectedIndex >= baseMax) {
+      start = selectedIndex - baseMax + 1;
+    }
+
+    const hasUp = start > 0;
+    const hasDown = start + baseMax < total;
+
+    let reservedLines = 0;
+    if (hasUp) reservedLines++;
+    if (hasDown) reservedLines++;
+    const maxItems = Math.max(1, Math.floor((availableHeight - reservedLines) / linesPerItem));
+
+    if (selectedIndex >= maxItems) {
+      start = selectedIndex - maxItems + 1;
+    }
+
+    return {
+      visible: items.slice(start, start + maxItems),
+      startIdx: start,
+      showUp: start > 0,
+      showDown: start + maxItems < total,
+      countAbove: start,
+      countBelow: Math.max(0, total - start - maxItems),
+    };
+  }, [items, selectedIndex, availableHeight, linesPerItem]);
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Agent picker view
+// ─────────────────────────────────────────────────────────────────────────────
+
+interface AgentGroup {
+  agent: string;
+  runCount: number;
+  lastRun: string;
+}
+
+function AgentPickerView({
+  groups,
+  onSelect,
+  onExit,
+  availableHeight,
+}: {
+  groups: AgentGroup[];
+  onSelect: (agent: string) => void;
+  onExit: () => void;
+  availableHeight: number;
+}) {
+  const nav = useListNavigation({
+    items: groups,
+    onSelect: item => onSelect(item.agent),
+    onExit,
+    isActive: true,
+  });
+
+  const { visible, showUp, showDown, countAbove, countBelow } = useWindowedList(
+    groups,
+    nav.selectedIndex,
+    availableHeight,
+    1
+  );
+
+  return (
+    <Panel fullWidth>
+      <Box flexDirection="column">
+        <Text bold>Select an agent</Text>
+        <Text dimColor>
+          {groups.length} agent{groups.length !== 1 ? 's' : ''} with eval runs
+        </Text>
+        <Box marginTop={1} flexDirection="column">
+          {showUp && <Text dimColor> ↑ {countAbove} more</Text>}
+          {visible.map((g, vIdx) => {
+            const idx = (showUp ? countAbove : 0) + vIdx;
+            const selected = idx === nav.selectedIndex;
+            return (
+              <Text key={g.agent}>
+                <Text color={selected ? 'cyan' : undefined}>{selected ? '❯' : ' '} </Text>
+                <Text color={selected ? 'cyan' : undefined} bold={selected}>
+                  {g.agent}
+                </Text>
+                <Text dimColor>
+                  {'  '}
+                  {g.runCount} run{g.runCount !== 1 ? 's' : ''}
+                  {'  '}last: {formatShortDate(g.lastRun)}
+                </Text>
+              </Text>
+            );
+          })}
+          {showDown && <Text dimColor> ↓ {countBelow} more</Text>}
+        </Box>
+      </Box>
+    </Panel>
+  );
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Runs list view (compact single-line per run)
+// ─────────────────────────────────────────────────────────────────────────────
+
+function RunsListView({
+  agentName,
+  runs,
+  onSelect,
+  onBack,
+  availableHeight,
+}: {
+  agentName: string;
+  runs: EvalRunResult[];
+  onSelect: (run: EvalRunResult) => void;
+  onBack: () => void;
+  availableHeight: number;
+}) {
+  const nav = useListNavigation({
+    items: runs,
+    onSelect: item => onSelect(item),
+    onExit: onBack,
+    isActive: true,
+  });
+
+  // Subtract 2 lines for the header (agent name + separator)
+  const listHeight = Math.max(4, availableHeight - 2);
+  const { visible, showUp, showDown, countAbove, countBelow } = useWindowedList(runs, nav.selectedIndex, listHeight, 1);
+
+  return (
+    <Panel fullWidth>
+      <Box flexDirection="column">
+        <Text>
+          Eval Runs —{' '}
+          <Text bold color="cyan">
+            {agentName}
+          </Text>
+          <Text dimColor>
+            {' '}
+            {runs.length} run{runs.length !== 1 ? 's' : ''}
+          </Text>
+        </Text>
+        <Text dimColor>{'─'.repeat(60)}</Text>
+        {showUp && <Text dimColor> ↑ {countAbove} more</Text>}
+        {visible.map((run, vIdx) => {
+          const idx = (showUp ? countAbove : 0) + vIdx;
+          const selected = idx === nav.selectedIndex;
+          const scores = run.results.map(r => ({ name: shortEvalName(r.evaluator), score: r.aggregateScore }));
+
+          return (
+            <Text key={run.timestamp} wrap="truncate-end">
+              <Text color={selected ? 'cyan' : undefined}>{selected ? '❯' : ' '} </Text>
+              <Text dimColor>{formatShortDate(run.timestamp).padEnd(16)}</Text>
+              <Text dimColor>
+                {String(run.sessionCount).padStart(3)} session{run.sessionCount !== 1 ? 's' : ' '}{' '}
+              </Text>
+              {scores.map((s, i) => (
+                <Text key={i}>
+                  {i > 0 && <Text dimColor>, </Text>}
+                  <Text>{s.name} </Text>
+                  <Text color={scoreColor(s.score)}>{formatScore(s.score)}</Text>
+                </Text>
+              ))}
+            </Text>
+          );
+        })}
+        {showDown && <Text dimColor> ↓ {countBelow} more</Text>}
+      </Box>
+    </Panel>
+  );
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Run detail view
+// ─────────────────────────────────────────────────────────────────────────────
+
+function EvaluatorDetail({ result }: { result: EvalEvaluatorResult }) {
+  const errCount = result.sessionScores.filter(s => s.errorMessage).length;
+  return (
+    <Box flexDirection="column" marginLeft={2}>
+      <Text>
+        <Text bold>{shortEvalName(result.evaluator)}</Text>
+        {'  '}
+        <Text color={scoreColor(result.aggregateScore)}>Score: {formatScore(result.aggregateScore)}</Text>
+        {'  '}
+        <Text dimColor>
+          ({result.sessionScores.length} session{result.sessionScores.length !== 1 ? 's' : ''}
+          {errCount > 0 ? `, ${errCount} errors` : ''})
+        </Text>
+      </Text>
+      {result.tokenUsage && (
+        <Text dimColor>
+          {'  '}Tokens: {result.tokenUsage.inputTokens.toLocaleString()} in /{' '}
+          {result.tokenUsage.outputTokens.toLocaleString()} out
+        </Text>
+      )}
+      {result.sessionScores.map((ss, i) => (
+        <Text key={i} dimColor>
+          {'  '}
+          {ss.sessionId.slice(0, 16)}…{' '}
+          {ss.errorMessage ? (
+            <Text color="red">ERROR: {ss.errorMessage.slice(0, 60)}</Text>
+          ) : (
+            <>
+              <Text color={scoreColor(ss.value)}>{formatScore(ss.value)}</Text>
+              {ss.label && <Text> ({ss.label})</Text>}
+            </>
+          )}
+        </Text>
+      ))}
+    </Box>
+  );
+}
+
+function RunDetailView({ run, onBack, maxHeight }: { run: EvalRunResult; onBack: () => void; maxHeight: number }) {
+  useInput((input, key) => {
+    if (key.escape || input === 'b') {
+      onBack();
+    }
+  });
+
+  return (
+    <Box flexDirection="column" height={maxHeight} overflowY="hidden">
+      <Box flexDirection="column" marginBottom={1}>
+        <Text>
+          <Text bold>Agent:</Text> {run.agent}
+          {'  '}
+          <Text bold>Date:</Text> {formatFullDate(run.timestamp)}
+          {'  '}
+          <Text bold>Lookback:</Text> {run.lookbackDays}d
+        </Text>
+        <Text>
+          <Text bold>Sessions:</Text> {run.sessionCount}
+          {'  '}
+          <Text bold>Evaluators:</Text> {run.evaluators.map(shortEvalName).join(', ')}
+        </Text>
+      </Box>
+      <Text color="gray">{'─'.repeat(60)}</Text>
+      {run.results.map((result, i) => (
+        <EvaluatorDetail key={i} result={result} />
+      ))}
+    </Box>
+  );
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Main screen
+// ─────────────────────────────────────────────────────────────────────────────
+
+interface EvalScreenProps {
+  isInteractive: boolean;
+  onExit: () => void;
+}
+
+type View = 'agents' | 'runs' | 'detail';
+
+interface EvalState {
+  phase: 'loading' | 'loaded' | 'error';
+  runs: EvalRunResult[];
+  error: string | null;
+}
+
+export function EvalScreen({ onExit }: EvalScreenProps) {
+  const { stdout } = useStdout();
+  const terminalHeight = stdout?.rows ?? 24;
+  const availableHeight = Math.max(6, terminalHeight - CHROME_LINES);
+
+  const [state, setState] = useState<EvalState>({
+    phase: 'loading',
+    runs: [],
+    error: null,
+  });
+  const [view, setView] = useState<View>('agents');
+  const [selectedAgent, setSelectedAgent] = useState<string | null>(null);
+  const [selectedRun, setSelectedRun] = useState<EvalRunResult | null>(null);
+  const [resultsDir, setResultsDir] = useState<string | null>(null);
+
+  useEffect(() => {
+    const load = async () => {
+      await new Promise(resolve => setTimeout(resolve, 0));
+      try {
+        setResultsDir(getResultsPath());
+      } catch {
+        // ignore — no project context
+      }
+      const result = handleListEvalRuns({});
+      if (!result.success) {
+        setState({ phase: 'error', runs: [], error: result.error ?? 'Unknown error' });
+        return;
+      }
+      setState({ phase: 'loaded', runs: result.runs ?? [], error: null });
+    };
+    void load();
+  }, []);
+
+  // Group runs by agent
+  const agentGroups: AgentGroup[] = useMemo(() => {
+    const map = new Map<string, { runs: EvalRunResult[] }>();
+    for (const run of state.runs) {
+      const entry = map.get(run.agent);
+      if (entry) {
+        entry.runs.push(run);
+      } else {
+        map.set(run.agent, { runs: [run] });
+      }
+    }
+
+    return Array.from(map.entries())
+      .map(([agent, { runs }]) => ({
+        agent,
+        runCount: runs.length,
+        lastRun: runs[0]!.timestamp,
+      }))
+      .sort((a, b) => new Date(b.lastRun).getTime() - new Date(a.lastRun).getTime());
+  }, [state.runs]);
+
+  // Runs for selected agent
+  const agentRuns = useMemo(
+    () => (selectedAgent ? state.runs.filter(r => r.agent === selectedAgent) : []),
+    [state.runs, selectedAgent]
+  );
+
+  // If only one agent, skip the picker (state sync pattern — no effect needed)
+  if (state.phase === 'loaded' && agentGroups.length === 1 && view === 'agents') {
+    setSelectedAgent(agentGroups[0]!.agent);
+    setView('runs');
+  }
+
+  const helpText =
+    view === 'detail'
+      ? 'Esc/B back to runs'
+      : view === 'runs' && agentGroups.length > 1
+        ? 'Esc back to agents'
+        : state.runs.length > 0
+          ? HELP_TEXT.NAVIGATE_SELECT
+          : HELP_TEXT.EXIT;
+
+  const screenTitle = view === 'runs' || view === 'detail' ? 'Eval Runs' : 'Eval Runs';
+
+  const noRuns = state.phase === 'loaded' && state.runs.length === 0;
+  const exitEnabled = noRuns || (view === 'agents' && agentGroups.length > 1);
+
+  return (
+    <Screen title={screenTitle} onExit={onExit} helpText={helpText} exitEnabled={exitEnabled}>
+      {state.phase === 'loading' && <Text dimColor>Loading eval runs...</Text>}
+
+      {state.phase === 'error' && <Text color={STATUS_COLORS.error}>{state.error}</Text>}
+
+      {noRuns && (
+        <Box flexDirection="column">
+          <Text dimColor>No eval runs found. Run `agentcore run eval` to create one.</Text>
+          {resultsDir && <Text dimColor>Results saved to: {resultsDir}</Text>}
+        </Box>
+      )}
+
+      {state.phase === 'loaded' && view === 'agents' && agentGroups.length > 1 && (
+        <AgentPickerView
+          groups={agentGroups}
+          onSelect={agent => {
+            setSelectedAgent(agent);
+            setView('runs');
+          }}
+          onExit={onExit}
+          availableHeight={availableHeight}
+        />
+      )}
+
+      {state.phase === 'loaded' && view === 'runs' && selectedAgent && (
+        <RunsListView
+          agentName={selectedAgent}
+          runs={agentRuns}
+          onSelect={run => {
+            setSelectedRun(run);
+            setView('detail');
+          }}
+          onBack={() => {
+            if (agentGroups.length > 1) {
+              setView('agents');
+              setSelectedAgent(null);
+            } else {
+              onExit();
+            }
+          }}
+          availableHeight={availableHeight}
+        />
+      )}
+
+      {state.phase === 'loaded' && view === 'detail' && selectedRun && (
+        <Panel fullWidth>
+          <RunDetailView run={selectedRun} onBack={() => setView('runs')} maxHeight={availableHeight} />
+        </Panel>
+      )}
+    </Screen>
+  );
+}
diff --git a/src/cli/tui/screens/eval/index.ts b/src/cli/tui/screens/eval/index.ts
new file mode 100644
index 00000000..67dc1582
--- /dev/null
+++ b/src/cli/tui/screens/eval/index.ts
@@ -0,0 +1,2 @@
+export { EvalScreen } from './EvalScreen';
+export { EvalHubScreen } from './EvalHubScreen';
diff --git a/src/cli/tui/screens/evaluator/AddEvaluatorFlow.tsx b/src/cli/tui/screens/evaluator/AddEvaluatorFlow.tsx
new file mode 100644
index 00000000..a53aacb2
--- /dev/null
+++ b/src/cli/tui/screens/evaluator/AddEvaluatorFlow.tsx
@@ -0,0 +1,76 @@
+import { ErrorPrompt } from '../../components';
+import { useCreateEvaluator, useExistingEvaluatorNames } from '../../hooks/useCreateEvaluator';
+import { AddSuccessScreen } from '../add/AddSuccessScreen';
+import { AddEvaluatorScreen } from './AddEvaluatorScreen';
+import type { AddEvaluatorConfig } from './types';
+import React, { useCallback, useEffect, useState } from 'react';
+
+type FlowState =
+  | { name: 'create-wizard' }
+  | { name: 'create-success'; evaluatorName: string }
+  | { name: 'error'; message: string };
+
+interface AddEvaluatorFlowProps {
+  isInteractive?: boolean;
+  onExit: () => void;
+  onBack: () => void;
+  onDev?: () => void;
+  onDeploy?: () => void;
+}
+
+export function AddEvaluatorFlow({ isInteractive = true, onExit, onBack, onDev, onDeploy }: AddEvaluatorFlowProps) {
+  const { createEvaluator, reset: resetCreate } = useCreateEvaluator();
+  const { names: existingNames } = useExistingEvaluatorNames();
+  const [flow, setFlow] = useState<FlowState>({ name: 'create-wizard' });
+
+  useEffect(() => {
+    if (!isInteractive && flow.name === 'create-success') {
+      onExit();
+    }
+  }, [isInteractive, flow.name, onExit]);
+
+  const handleCreateComplete = useCallback(
+    (config: AddEvaluatorConfig) => {
+      void createEvaluator(config).then(result => {
+        if (result.ok) {
+          setFlow({ name: 'create-success', evaluatorName: result.evaluatorName });
+          return;
+        }
+        setFlow({ name: 'error', message: result.error });
+      });
+    },
+    [createEvaluator]
+  );
+
+  if (flow.name === 'create-wizard') {
+    return (
+      <AddEvaluatorScreen existingEvaluatorNames={existingNames} onComplete={handleCreateComplete} onExit={onBack} />
+    );
+  }
+
+  if (flow.name === 'create-success') {
+    return (
+      <AddSuccessScreen
+        isInteractive={isInteractive}
+        message={`Added evaluator: ${flow.evaluatorName}`}
+        detail="Evaluator added to project in `agentcore/agentcore.json`. Deploy with `agentcore deploy`."
+        onAddAnother={onBack}
+        onDev={onDev}
+        onDeploy={onDeploy}
+        onExit={onExit}
+      />
+    );
+  }
+
+  return (
+    <ErrorPrompt
+      message="Failed to add evaluator"
+      detail={flow.message}
+      onBack={() => {
+        resetCreate();
+        setFlow({ name: 'create-wizard' });
+      }}
+      onExit={onExit}
+    />
+  );
+}
diff --git a/src/cli/tui/screens/evaluator/AddEvaluatorScreen.tsx b/src/cli/tui/screens/evaluator/AddEvaluatorScreen.tsx
new file mode 100644
index 00000000..d86d1818
--- /dev/null
+++ b/src/cli/tui/screens/evaluator/AddEvaluatorScreen.tsx
@@ -0,0 +1,255 @@
+import type { EvaluationLevel, EvaluatorConfig } from '../../../../schema';
+import { EvaluatorNameSchema, isValidBedrockModelId } from '../../../../schema';
+import type { SelectableItem } from '../../components';
+import { ConfirmReview, Panel, Screen, StepIndicator, TextInput, WizardSelect } from '../../components';
+import { HELP_TEXT } from '../../constants';
+import { useListNavigation } from '../../hooks';
+import { generateUniqueName } from '../../utils';
+import type { AddEvaluatorConfig, CustomRatingScaleType } from './types';
+import {
+  CUSTOM_RATING_SCALE_ID,
+  DEFAULT_INSTRUCTIONS,
+  EVALUATION_LEVEL_OPTIONS,
+  EVALUATOR_MODEL_OPTIONS,
+  EVALUATOR_STEP_LABELS,
+  LEVEL_PLACEHOLDERS,
+  RATING_SCALE_PRESETS,
+  RATING_SCALE_TYPE_OPTIONS,
+  parseCustomRatingScale,
+  validateInstructionPlaceholders,
+} from './types';
+import { useAddEvaluatorWizard } from './useAddEvaluatorWizard';
+import { Box, Text } from 'ink';
+import React, { useMemo } from 'react';
+
+interface AddEvaluatorScreenProps {
+  onComplete: (config: AddEvaluatorConfig) => void;
+  onExit: () => void;
+  existingEvaluatorNames: string[];
+}
+
+function formatRatingScale(ratingScale: EvaluatorConfig['llmAsAJudge']['ratingScale']): string {
+  if ('numerical' in ratingScale && ratingScale.numerical) {
+    return ratingScale.numerical.map(r => `${r.value}=${r.label}`).join(', ');
+  }
+  if ('categorical' in ratingScale && ratingScale.categorical) {
+    return ratingScale.categorical.map(r => r.label).join(', ');
+  }
+  return 'Unknown';
+}
+
+export function AddEvaluatorScreen({ onComplete, onExit, existingEvaluatorNames }: AddEvaluatorScreenProps) {
+  const wizard = useAddEvaluatorWizard();
+
+  const levelItems: SelectableItem[] = useMemo(
+    () => EVALUATION_LEVEL_OPTIONS.map(opt => ({ id: opt.id, title: opt.title, description: opt.description })),
+    []
+  );
+
+  const ratingScaleItems: SelectableItem[] = useMemo(
+    () => [
+      ...RATING_SCALE_PRESETS.map(opt => ({ id: opt.id, title: opt.title, description: opt.description })),
+      { id: CUSTOM_RATING_SCALE_ID, title: 'Custom', description: 'Define your own rating scale' },
+    ],
+    []
+  );
+
+  const ratingScaleTypeItems: SelectableItem[] = useMemo(
+    () => RATING_SCALE_TYPE_OPTIONS.map(opt => ({ id: opt.id, title: opt.title, description: opt.description })),
+    []
+  );
+
+  const modelItems: SelectableItem[] = useMemo(
+    () => EVALUATOR_MODEL_OPTIONS.map(opt => ({ id: opt.id, title: opt.title, description: opt.description })),
+    []
+  );
+
+  const isNameStep = wizard.step === 'name';
+  const isLevelStep = wizard.step === 'level';
+  const isModelStep = wizard.step === 'model';
+  const isModelCustomStep = wizard.step === 'model-custom';
+  const isInstructionsStep = wizard.step === 'instructions';
+  const isRatingScaleStep = wizard.step === 'ratingScale';
+  const isRatingScaleTypeStep = wizard.step === 'ratingScale-type';
+  const isRatingScaleCustomStep = wizard.step === 'ratingScale-custom';
+  const isConfirmStep = wizard.step === 'confirm';
+
+  const levelNav = useListNavigation({
+    items: levelItems,
+    onSelect: item => wizard.setLevel(item.id as EvaluationLevel),
+    onExit: () => wizard.goBack(),
+    isActive: isLevelStep,
+  });
+
+  const modelNav = useListNavigation({
+    items: modelItems,
+    onSelect: item => wizard.selectModel(item.id),
+    onExit: () => wizard.goBack(),
+    isActive: isModelStep,
+  });
+
+  const ratingScaleNav = useListNavigation({
+    items: ratingScaleItems,
+    onSelect: item => {
+      const preset = RATING_SCALE_PRESETS.find(p => p.id === item.id);
+      wizard.selectRatingScale(item.id, preset?.ratingScale);
+    },
+    onExit: () => wizard.goBack(),
+    isActive: isRatingScaleStep,
+  });
+
+  const ratingScaleTypeNav = useListNavigation({
+    items: ratingScaleTypeItems,
+    onSelect: item => wizard.selectCustomRatingScaleType(item.id as CustomRatingScaleType),
+    onExit: () => wizard.goBack(),
+    isActive: isRatingScaleTypeStep,
+  });
+
+  useListNavigation({
+    items: [{ id: 'confirm', title: 'Confirm' }],
+    onSelect: () => onComplete(wizard.config),
+    onExit: () => wizard.goBack(),
+    isActive: isConfirmStep,
+  });
+
+  const helpText =
+    isLevelStep || isRatingScaleStep || isModelStep || isRatingScaleTypeStep
+      ? HELP_TEXT.NAVIGATE_SELECT
+      : isConfirmStep
+        ? HELP_TEXT.CONFIRM_CANCEL
+        : HELP_TEXT.TEXT_INPUT;
+
+  const headerContent = <StepIndicator steps={wizard.steps} currentStep={wizard.step} labels={EVALUATOR_STEP_LABELS} />;
+
+  return (
+    <Screen title="Add Evaluator" onExit={onExit} helpText={helpText} headerContent={headerContent} exitEnabled={false}>
+      <Panel fullWidth>
+        {isNameStep && (
+          <TextInput
+            key="name"
+            prompt="Evaluator name"
+            initialValue={generateUniqueName('MyEvaluator', existingEvaluatorNames)}
+            onSubmit={wizard.setName}
+            onCancel={onExit}
+            schema={EvaluatorNameSchema}
+            customValidation={value => !existingEvaluatorNames.includes(value) || 'Evaluator name already exists'}
+          />
+        )}
+
+        {isLevelStep && (
+          <WizardSelect
+            title="Evaluation level"
+            description="Granularity of evaluation"
+            items={levelItems}
+            selectedIndex={levelNav.selectedIndex}
+          />
+        )}
+
+        {isModelStep && (
+          <WizardSelect
+            title="Select model"
+            description="Choose the LLM for evaluation judging"
+            items={modelItems}
+            selectedIndex={modelNav.selectedIndex}
+          />
+        )}
+
+        {isModelCustomStep && (
+          <TextInput
+            key="model-custom"
+            prompt="Bedrock model ID"
+            initialValue=""
+            onSubmit={wizard.setCustomModel}
+            onCancel={() => wizard.goBack()}
+            customValidation={value =>
+              isValidBedrockModelId(value) ||
+              'Must be a valid Bedrock model ID (e.g. us.anthropic.claude-sonnet-4-5-20250929-v1:0) or model ARN'
+            }
+          />
+        )}
+
+        {isInstructionsStep && (
+          <Box flexDirection="column">
+            <Text>Evaluation instructions</Text>
+            <Text dimColor>
+              Must include at least one: {LEVEL_PLACEHOLDERS[wizard.config.level].map(p => `{${p}}`).join(', ')}
+            </Text>
+            <TextInput
+              key="instructions"
+              prompt=""
+              hideArrow={false}
+              initialValue={DEFAULT_INSTRUCTIONS[wizard.config.level]}
+              onSubmit={wizard.setInstructions}
+              onCancel={() => wizard.goBack()}
+              customValidation={value => validateInstructionPlaceholders(value, wizard.config.level)}
+            />
+          </Box>
+        )}
+
+        {isRatingScaleStep && (
+          <WizardSelect
+            title="Rating scale"
+            description="Choose a preset or define your own"
+            items={ratingScaleItems}
+            selectedIndex={ratingScaleNav.selectedIndex}
+          />
+        )}
+
+        {isRatingScaleTypeStep && (
+          <WizardSelect
+            title="Scale type"
+            description="Choose the type of custom rating scale"
+            items={ratingScaleTypeItems}
+            selectedIndex={ratingScaleTypeNav.selectedIndex}
+          />
+        )}
+
+        {isRatingScaleCustomStep && (
+          <Box flexDirection="column">
+            <Text>Define rating scale entries</Text>
+            <Text dimColor>
+              {wizard.customRatingScaleType === 'numerical'
+                ? 'Format: value:label:definition, ... (e.g. 1:Poor:Fails, 3:Good:Meets, 5:Excellent:Exceeds)'
+                : 'Format: label:definition, ... (e.g. Pass:Meets criteria, Fail:Does not meet)'}
+            </Text>
+            <TextInput
+              key="ratingScale-custom"
+              prompt=""
+              hideArrow={false}
+              initialValue=""
+              onSubmit={value => {
+                const result = parseCustomRatingScale(value, wizard.customRatingScaleType);
+                if (result.success) {
+                  wizard.setCustomRatingScale(result.ratingScale);
+                }
+              }}
+              onCancel={() => wizard.goBack()}
+              customValidation={value => {
+                const result = parseCustomRatingScale(value, wizard.customRatingScaleType);
+                return result.success || result.error;
+              }}
+            />
+          </Box>
+        )}
+
+        {isConfirmStep && (
+          <ConfirmReview
+            fields={[
+              { label: 'Name', value: wizard.config.name },
+              { label: 'Level', value: wizard.config.level },
+              { label: 'Model', value: wizard.config.config.llmAsAJudge.model },
+              {
+                label: 'Instructions',
+                value:
+                  wizard.config.config.llmAsAJudge.instructions.length > 60
+                    ? wizard.config.config.llmAsAJudge.instructions.slice(0, 60) + '...'
+                    : wizard.config.config.llmAsAJudge.instructions,
+              },
+              { label: 'Rating Scale', value: formatRatingScale(wizard.config.config.llmAsAJudge.ratingScale) },
+            ]}
+          />
+        )}
+      </Panel>
+    </Screen>
+  );
+}
diff --git a/src/cli/tui/screens/evaluator/__tests__/types.test.ts b/src/cli/tui/screens/evaluator/__tests__/types.test.ts
new file mode 100644
index 00000000..aac6ab0c
--- /dev/null
+++ b/src/cli/tui/screens/evaluator/__tests__/types.test.ts
@@ -0,0 +1,161 @@
+import {
+  DEFAULT_INSTRUCTIONS,
+  DEFAULT_MODEL,
+  LEVEL_PLACEHOLDERS,
+  parseCustomRatingScale,
+  validateInstructionPlaceholders,
+} from '../types.js';
+import { describe, expect, it } from 'vitest';
+
+describe('LEVEL_PLACEHOLDERS', () => {
+  it('defines placeholders for all three levels', () => {
+    expect(LEVEL_PLACEHOLDERS).toHaveProperty('SESSION');
+    expect(LEVEL_PLACEHOLDERS).toHaveProperty('TRACE');
+    expect(LEVEL_PLACEHOLDERS).toHaveProperty('TOOL_CALL');
+  });
+
+  it('SESSION has correct public placeholders', () => {
+    expect(LEVEL_PLACEHOLDERS.SESSION).toContain('context');
+    expect(LEVEL_PLACEHOLDERS.SESSION).toContain('available_tools');
+    expect(LEVEL_PLACEHOLDERS.SESSION).toHaveLength(2);
+  });
+
+  it('TRACE has correct public placeholders', () => {
+    expect(LEVEL_PLACEHOLDERS.TRACE).toContain('context');
+    expect(LEVEL_PLACEHOLDERS.TRACE).toContain('assistant_turn');
+    expect(LEVEL_PLACEHOLDERS.TRACE).toHaveLength(2);
+  });
+
+  it('TOOL_CALL has tool-specific placeholders', () => {
+    expect(LEVEL_PLACEHOLDERS.TOOL_CALL).toContain('available_tools');
+    expect(LEVEL_PLACEHOLDERS.TOOL_CALL).toContain('context');
+    expect(LEVEL_PLACEHOLDERS.TOOL_CALL).toContain('tool_turn');
+  });
+});
+
+describe('DEFAULT_INSTRUCTIONS', () => {
+  it('each default instruction passes its own level validation', () => {
+    for (const level of ['SESSION', 'TRACE', 'TOOL_CALL'] as const) {
+      const result = validateInstructionPlaceholders(DEFAULT_INSTRUCTIONS[level], level);
+      expect(result).toBe(true);
+    }
+  });
+
+  it('SESSION default uses {context}', () => {
+    expect(DEFAULT_INSTRUCTIONS.SESSION).toContain('{context}');
+  });
+
+  it('TOOL_CALL default uses {tool_turn}', () => {
+    expect(DEFAULT_INSTRUCTIONS.TOOL_CALL).toContain('{tool_turn}');
+  });
+});
+
+describe('DEFAULT_MODEL', () => {
+  it('is a Claude Sonnet model ID', () => {
+    expect(DEFAULT_MODEL).toContain('anthropic');
+    expect(DEFAULT_MODEL).toContain('sonnet');
+  });
+});
+
+describe('validateInstructionPlaceholders', () => {
+  it('returns true when at least one valid placeholder is present for SESSION', () => {
+    expect(validateInstructionPlaceholders('Check {context} now', 'SESSION')).toBe(true);
+    expect(validateInstructionPlaceholders('See {available_tools}', 'SESSION')).toBe(true);
+    expect(validateInstructionPlaceholders('Tools: {available_tools}', 'SESSION')).toBe(true);
+  });
+
+  it('returns true when at least one valid placeholder is present for TOOL_CALL', () => {
+    expect(validateInstructionPlaceholders('Turn: {tool_turn}', 'TOOL_CALL')).toBe(true);
+    expect(validateInstructionPlaceholders('Tools: {available_tools}', 'TOOL_CALL')).toBe(true);
+  });
+
+  it('returns error string when no valid placeholders are present', () => {
+    const result = validateInstructionPlaceholders('No placeholders here', 'SESSION');
+    expect(typeof result).toBe('string');
+    expect(result).toContain('must contain at least one placeholder');
+  });
+
+  it('rejects SESSION-level placeholders for TOOL_CALL level', () => {
+    const result = validateInstructionPlaceholders('Check {context} now', 'TOOL_CALL');
+    // {context} IS valid for TOOL_CALL, so this should pass
+    expect(result).toBe(true);
+  });
+
+  it('rejects TOOL_CALL-only placeholders for SESSION level', () => {
+    const result = validateInstructionPlaceholders('Turn: {tool_turn}', 'SESSION');
+    expect(typeof result).toBe('string');
+    expect(result).toContain('must contain at least one placeholder');
+  });
+
+  it('does not match partial placeholder names', () => {
+    // {tool_names} should not match {tool_name} since includes checks for exact {placeholder}
+    const result = validateInstructionPlaceholders('Extra: {contexts}', 'SESSION');
+    expect(typeof result).toBe('string');
+  });
+
+  it('handles multiple placeholders — at least one valid is enough', () => {
+    const result = validateInstructionPlaceholders('{unknown_thing} and {context}', 'SESSION');
+    expect(result).toBe(true);
+  });
+
+  it('returns descriptive error listing allowed placeholders', () => {
+    const result = validateInstructionPlaceholders('nothing', 'TOOL_CALL');
+    expect(typeof result).toBe('string');
+    expect(result as string).toContain('{available_tools}');
+    expect(result as string).toContain('{context}');
+    expect(result as string).toContain('{tool_turn}');
+  });
+});
+
+describe('parseCustomRatingScale', () => {
+  it('parses numerical entries', () => {
+    const result = parseCustomRatingScale('1:Poor:Fails, 3:Good:Meets, 5:Excellent:Far exceeds', 'numerical');
+    expect(result.success).toBe(true);
+    if (result.success) {
+      expect(result.ratingScale.numerical).toHaveLength(3);
+      expect(result.ratingScale.numerical![0]).toEqual({ value: 1, label: 'Poor', definition: 'Fails' });
+      expect(result.ratingScale.numerical![2]).toEqual({ value: 5, label: 'Excellent', definition: 'Far exceeds' });
+    }
+  });
+
+  it('parses categorical entries', () => {
+    const result = parseCustomRatingScale('Pass:Meets criteria, Fail:Does not meet', 'categorical');
+    expect(result.success).toBe(true);
+    if (result.success) {
+      expect(result.ratingScale.categorical).toHaveLength(2);
+      expect(result.ratingScale.categorical![0]).toEqual({ label: 'Pass', definition: 'Meets criteria' });
+    }
+  });
+
+  it('rejects fewer than 2 entries', () => {
+    const result = parseCustomRatingScale('1:Poor:Fails', 'numerical');
+    expect(result.success).toBe(false);
+    if (!result.success) expect(result.error).toContain('At least 2');
+  });
+
+  it('rejects numerical entry with non-number value', () => {
+    const result = parseCustomRatingScale('abc:Poor:Fails, 2:Good:Nice', 'numerical');
+    expect(result.success).toBe(false);
+    if (!result.success) expect(result.error).toContain('not a valid number');
+  });
+
+  it('rejects numerical entry with too few parts', () => {
+    const result = parseCustomRatingScale('1:Poor, 2:Good:Nice', 'numerical');
+    expect(result.success).toBe(false);
+    if (!result.success) expect(result.error).toContain('Format');
+  });
+
+  it('rejects categorical entry with too few parts', () => {
+    const result = parseCustomRatingScale('Pass, Fail:Bad', 'categorical');
+    expect(result.success).toBe(false);
+    if (!result.success) expect(result.error).toContain('Format');
+  });
+
+  it('handles definitions containing colons', () => {
+    const result = parseCustomRatingScale('Pass:Good: meets all criteria, Fail:Bad: fails all', 'categorical');
+    expect(result.success).toBe(true);
+    if (result.success) {
+      expect(result.ratingScale.categorical![0]!.definition).toBe('Good: meets all criteria');
+    }
+  });
+});
diff --git a/src/cli/tui/screens/evaluator/index.ts b/src/cli/tui/screens/evaluator/index.ts
new file mode 100644
index 00000000..1e85211d
--- /dev/null
+++ b/src/cli/tui/screens/evaluator/index.ts
@@ -0,0 +1,2 @@
+export { AddEvaluatorFlow } from './AddEvaluatorFlow';
+export { AddEvaluatorScreen } from './AddEvaluatorScreen';
diff --git a/src/cli/tui/screens/evaluator/types.ts b/src/cli/tui/screens/evaluator/types.ts
new file mode 100644
index 00000000..70ba09c0
--- /dev/null
+++ b/src/cli/tui/screens/evaluator/types.ts
@@ -0,0 +1,243 @@
+import type { EvaluationLevel, EvaluatorConfig } from '../../../../schema';
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Evaluator Flow Types
+// ─────────────────────────────────────────────────────────────────────────────
+
+export type AddEvaluatorStep =
+  | 'name'
+  | 'level'
+  | 'model'
+  | 'model-custom'
+  | 'instructions'
+  | 'ratingScale'
+  | 'ratingScale-type'
+  | 'ratingScale-custom'
+  | 'confirm';
+
+export interface AddEvaluatorConfig {
+  name: string;
+  level: EvaluationLevel;
+  config: EvaluatorConfig;
+}
+
+export const EVALUATOR_STEP_LABELS: Record<AddEvaluatorStep, string> = {
+  name: 'Name',
+  level: 'Level',
+  model: 'Model',
+  'model-custom': 'Model',
+  instructions: 'Prompt',
+  ratingScale: 'Scale',
+  'ratingScale-type': 'Scale',
+  'ratingScale-custom': 'Scale',
+  confirm: 'Confirm',
+};
+
+// ─────────────────────────────────────────────────────────────────────────────
+// UI Option Constants
+// ─────────────────────────────────────────────────────────────────────────────
+
+export const EVALUATION_LEVEL_OPTIONS = [
+  { id: 'SESSION', title: 'Session', description: 'Evaluate entire conversation sessions' },
+  { id: 'TRACE', title: 'Trace', description: 'Evaluate individual agent traces' },
+  { id: 'TOOL_CALL', title: 'Tool Call', description: 'Evaluate individual tool calls' },
+] as const;
+
+export const DEFAULT_MODEL = 'us.anthropic.claude-sonnet-4-5-20250929-v1:0';
+
+export const CUSTOM_MODEL_ID = '__custom__';
+
+export interface EvaluatorModelOption {
+  id: string;
+  title: string;
+  description: string;
+}
+
+export const EVALUATOR_MODEL_OPTIONS: EvaluatorModelOption[] = [
+  {
+    id: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
+    title: 'Claude Sonnet 4.5',
+    description: 'Recommended — balanced speed and accuracy',
+  },
+  {
+    id: 'global.anthropic.claude-opus-4-5-20251101-v1:0',
+    title: 'Claude Opus 4.5',
+    description: 'Most capable — best for complex evaluations',
+  },
+  {
+    id: 'us.anthropic.claude-haiku-4-5-20251001-v1:0',
+    title: 'Claude Haiku 4.5',
+    description: 'Fastest — good for high-volume evaluations',
+  },
+  {
+    id: 'us.amazon.nova-pro-v1:0',
+    title: 'Amazon Nova Pro',
+    description: 'Amazon foundation model — strong reasoning',
+  },
+  {
+    id: 'us.amazon.nova-lite-v1:0',
+    title: 'Amazon Nova Lite',
+    description: 'Amazon foundation model — fast and cost-effective',
+  },
+  {
+    id: CUSTOM_MODEL_ID,
+    title: 'Other',
+    description: 'Enter a custom Bedrock model ID or ARN',
+  },
+];
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Placeholder Constants
+// ─────────────────────────────────────────────────────────────────────────────
+
+/**
+ * Allowed placeholders per evaluation level. The API requires instructions
+ * to contain at least one placeholder from the evaluator's level.
+ */
+export const LEVEL_PLACEHOLDERS: Record<EvaluationLevel, string[]> = {
+  SESSION: ['context', 'available_tools'],
+  TRACE: ['context', 'assistant_turn'],
+  TOOL_CALL: ['available_tools', 'context', 'tool_turn'],
+};
+
+/**
+ * Default instruction templates per level that include required placeholders.
+ */
+export const DEFAULT_INSTRUCTIONS: Record<EvaluationLevel, string> = {
+  SESSION:
+    'Evaluate the agent session. Context: {context}. Available tools: {available_tools}. Rate the overall quality of the session.',
+  TRACE:
+    'Evaluate the agent trace. Context: {context}. Assistant turn: {assistant_turn}. Rate the quality of this trace.',
+  TOOL_CALL: 'Evaluate the tool call. Context: {context}. Tool turn: {tool_turn}. Rate the quality of this tool usage.',
+};
+
+/**
+ * Validates that instructions contain at least one placeholder for the given level.
+ */
+export function validateInstructionPlaceholders(instructions: string, level: EvaluationLevel): string | true {
+  const placeholders = LEVEL_PLACEHOLDERS[level];
+  const hasPlaceholder = placeholders.some(p => instructions.includes(`{${p}}`));
+  if (!hasPlaceholder) {
+    return `Instructions must contain at least one placeholder: ${placeholders.map(p => `{${p}}`).join(', ')}`;
+  }
+  return true;
+}
+
+export interface RatingScalePreset {
+  id: string;
+  title: string;
+  description: string;
+  ratingScale: EvaluatorConfig['llmAsAJudge']['ratingScale'];
+}
+
+export const CUSTOM_RATING_SCALE_ID = '__custom__';
+
+export type CustomRatingScaleType = 'numerical' | 'categorical';
+
+export const RATING_SCALE_TYPE_OPTIONS = [
+  { id: 'numerical', title: 'Numerical', description: 'Scored values (e.g. 1–5)' },
+  { id: 'categorical', title: 'Categorical', description: 'Named labels (e.g. Pass/Fail)' },
+] as const;
+
+/**
+ * Parse a custom rating scale from compact text format.
+ * Numerical: "1:Poor:Fails to meet, 2:Fair:Partially meets, 5:Excellent:Far exceeds"
+ * Categorical: "Pass:Meets criteria, Fail:Does not meet"
+ */
+export function parseCustomRatingScale(
+  input: string,
+  type: CustomRatingScaleType
+): { success: true; ratingScale: EvaluatorConfig['llmAsAJudge']['ratingScale'] } | { success: false; error: string } {
+  const entries = input
+    .split(',')
+    .map(e => e.trim())
+    .filter(Boolean);
+
+  if (entries.length < 2) {
+    return { success: false, error: 'At least 2 entries required (comma-separated)' };
+  }
+
+  if (type === 'numerical') {
+    const numerical: { value: number; label: string; definition: string }[] = [];
+    for (const entry of entries) {
+      const firstColon = entry.indexOf(':');
+      const secondColon = firstColon >= 0 ? entry.indexOf(':', firstColon + 1) : -1;
+      if (firstColon < 0 || secondColon < 0) {
+        return { success: false, error: `Invalid entry "${entry}". Format: value:label:definition` };
+      }
+      const rawValue = entry.slice(0, firstColon).trim();
+      const value = Number(rawValue);
+      if (isNaN(value)) {
+        return { success: false, error: `"${rawValue}" is not a valid number in "${entry}"` };
+      }
+      const label = entry.slice(firstColon + 1, secondColon).trim();
+      const definition = entry.slice(secondColon + 1).trim();
+      numerical.push({ value, label, definition });
+    }
+    return { success: true, ratingScale: { numerical } };
+  }
+
+  const categorical: { label: string; definition: string }[] = [];
+  for (const entry of entries) {
+    const firstColon = entry.indexOf(':');
+    if (firstColon < 0) {
+      return { success: false, error: `Invalid entry "${entry}". Format: label:definition` };
+    }
+    const label = entry.slice(0, firstColon).trim();
+    const definition = entry.slice(firstColon + 1).trim();
+    categorical.push({ label, definition });
+  }
+  return { success: true, ratingScale: { categorical } };
+}
+
+export const RATING_SCALE_PRESETS: RatingScalePreset[] = [
+  {
+    id: '1-5-quality',
+    title: '1–5 Quality (Numerical)',
+    description: 'Five-point quality scale from Poor to Excellent',
+    ratingScale: {
+      numerical: [
+        { value: 1, label: 'Poor', definition: 'Fails to meet expectations' },
+        { value: 2, label: 'Fair', definition: 'Partially meets expectations' },
+        { value: 3, label: 'Good', definition: 'Meets expectations' },
+        { value: 4, label: 'Very Good', definition: 'Exceeds expectations' },
+        { value: 5, label: 'Excellent', definition: 'Far exceeds expectations' },
+      ],
+    },
+  },
+  {
+    id: '1-3-simple',
+    title: '1–3 Simple (Numerical)',
+    description: 'Three-point scale: Low, Medium, High',
+    ratingScale: {
+      numerical: [
+        { value: 1, label: 'Low', definition: 'Below acceptable quality' },
+        { value: 2, label: 'Medium', definition: 'Acceptable quality' },
+        { value: 3, label: 'High', definition: 'Above acceptable quality' },
+      ],
+    },
+  },
+  {
+    id: 'pass-fail',
+    title: 'Pass / Fail (Categorical)',
+    description: 'Binary pass or fail assessment',
+    ratingScale: {
+      categorical: [
+        { label: 'Pass', definition: 'Meets the evaluation criteria' },
+        { label: 'Fail', definition: 'Does not meet the evaluation criteria' },
+      ],
+    },
+  },
+  {
+    id: 'good-neutral-bad',
+    title: 'Good / Neutral / Bad (Categorical)',
+    description: 'Three-tier categorical assessment',
+    ratingScale: {
+      categorical: [
+        { label: 'Good', definition: 'Positive outcome, meets or exceeds criteria' },
+        { label: 'Neutral', definition: 'Acceptable but unremarkable outcome' },
+        { label: 'Bad', definition: 'Negative outcome, fails to meet criteria' },
+      ],
+    },
+  },
+];
diff --git a/src/cli/tui/screens/evaluator/useAddEvaluatorWizard.ts b/src/cli/tui/screens/evaluator/useAddEvaluatorWizard.ts
new file mode 100644
index 00000000..f0bcc33d
--- /dev/null
+++ b/src/cli/tui/screens/evaluator/useAddEvaluatorWizard.ts
@@ -0,0 +1,179 @@
+import type { EvaluationLevel, EvaluatorConfig } from '../../../../schema';
+import type { AddEvaluatorConfig, AddEvaluatorStep, CustomRatingScaleType } from './types';
+import { CUSTOM_MODEL_ID, CUSTOM_RATING_SCALE_ID, DEFAULT_MODEL } from './types';
+import { useCallback, useState } from 'react';
+
+const ALL_STEPS: AddEvaluatorStep[] = ['name', 'level', 'model', 'instructions', 'ratingScale', 'confirm'];
+
+function getDefaultConfig(): AddEvaluatorConfig {
+  return {
+    name: '',
+    level: 'SESSION',
+    config: {
+      llmAsAJudge: {
+        model: DEFAULT_MODEL,
+        instructions: '',
+        ratingScale: {
+          numerical: [
+            { value: 1, label: 'Poor', definition: 'Fails to meet expectations' },
+            { value: 5, label: 'Excellent', definition: 'Far exceeds expectations' },
+          ],
+        },
+      },
+    },
+  };
+}
+
+export function useAddEvaluatorWizard() {
+  const [config, setConfig] = useState<AddEvaluatorConfig>(getDefaultConfig);
+  const [step, setStep] = useState<AddEvaluatorStep>('name');
+  const [customRatingScaleType, setCustomRatingScaleType] = useState<CustomRatingScaleType>('numerical');
+
+  const currentIndex = ALL_STEPS.indexOf(step);
+
+  const goBack = useCallback(() => {
+    // Sub-steps not in ALL_STEPS — go back to their parent select
+    if (step === 'model-custom') {
+      setStep('model');
+      return;
+    }
+    if (step === 'ratingScale-type' || step === 'ratingScale-custom') {
+      setStep(step === 'ratingScale-custom' ? 'ratingScale-type' : 'ratingScale');
+      return;
+    }
+    const prevStep = ALL_STEPS[currentIndex - 1];
+    if (prevStep) setStep(prevStep);
+  }, [currentIndex, step]);
+
+  const nextStep = useCallback((currentStep: AddEvaluatorStep): AddEvaluatorStep | undefined => {
+    const idx = ALL_STEPS.indexOf(currentStep);
+    return ALL_STEPS[idx + 1];
+  }, []);
+
+  const setName = useCallback(
+    (name: string) => {
+      setConfig(c => ({ ...c, name }));
+      const next = nextStep('name');
+      if (next) setStep(next);
+    },
+    [nextStep]
+  );
+
+  const setLevel = useCallback(
+    (level: EvaluationLevel) => {
+      setConfig(c => ({ ...c, level }));
+      const next = nextStep('level');
+      if (next) setStep(next);
+    },
+    [nextStep]
+  );
+
+  const selectModel = useCallback(
+    (modelId: string) => {
+      if (modelId === CUSTOM_MODEL_ID) {
+        setStep('model-custom');
+        return;
+      }
+      setConfig(c => ({
+        ...c,
+        config: {
+          llmAsAJudge: { ...c.config.llmAsAJudge, model: modelId },
+        },
+      }));
+      const next = nextStep('model');
+      if (next) setStep(next);
+    },
+    [nextStep]
+  );
+
+  const setCustomModel = useCallback(
+    (model: string) => {
+      setConfig(c => ({
+        ...c,
+        config: {
+          llmAsAJudge: { ...c.config.llmAsAJudge, model },
+        },
+      }));
+      // After custom model input, go to instructions (same as after model select)
+      const next = nextStep('model');
+      if (next) setStep(next);
+    },
+    [nextStep]
+  );
+
+  const setInstructions = useCallback(
+    (instructions: string) => {
+      setConfig(c => ({
+        ...c,
+        config: {
+          llmAsAJudge: { ...c.config.llmAsAJudge, instructions },
+        },
+      }));
+      const next = nextStep('instructions');
+      if (next) setStep(next);
+    },
+    [nextStep]
+  );
+
+  const selectRatingScale = useCallback(
+    (presetIdOrCustom: string, ratingScale?: EvaluatorConfig['llmAsAJudge']['ratingScale']) => {
+      if (presetIdOrCustom === CUSTOM_RATING_SCALE_ID) {
+        setStep('ratingScale-type');
+        return;
+      }
+      if (ratingScale) {
+        setConfig(c => ({
+          ...c,
+          config: {
+            llmAsAJudge: { ...c.config.llmAsAJudge, ratingScale },
+          },
+        }));
+      }
+      const next = nextStep('ratingScale');
+      if (next) setStep(next);
+    },
+    [nextStep]
+  );
+
+  const selectCustomRatingScaleType = useCallback((type: CustomRatingScaleType) => {
+    setCustomRatingScaleType(type);
+    setStep('ratingScale-custom');
+  }, []);
+
+  const setCustomRatingScale = useCallback(
+    (ratingScale: EvaluatorConfig['llmAsAJudge']['ratingScale']) => {
+      setConfig(c => ({
+        ...c,
+        config: {
+          llmAsAJudge: { ...c.config.llmAsAJudge, ratingScale },
+        },
+      }));
+      const next = nextStep('ratingScale');
+      if (next) setStep(next);
+    },
+    [nextStep]
+  );
+
+  const reset = useCallback(() => {
+    setConfig(getDefaultConfig());
+    setStep('name');
+  }, []);
+
+  return {
+    config,
+    step,
+    steps: ALL_STEPS,
+    currentIndex,
+    customRatingScaleType,
+    goBack,
+    setName,
+    setLevel,
+    selectModel,
+    setCustomModel,
+    setInstructions,
+    selectRatingScale,
+    selectCustomRatingScaleType,
+    setCustomRatingScale,
+    reset,
+  };
+}
diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx
new file mode 100644
index 00000000..b79b9eb3
--- /dev/null
+++ b/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx
@@ -0,0 +1,143 @@
+import { ConfigIO } from '../../../../lib';
+import { validateAwsCredentials } from '../../../aws/account';
+import { listEvaluators } from '../../../aws/agentcore-control';
+import { detectRegion } from '../../../aws/region';
+import { getErrorMessage } from '../../../errors';
+import { ErrorPrompt } from '../../components';
+import { useCreateOnlineEval, useExistingOnlineEvalNames } from '../../hooks/useCreateOnlineEval';
+import { AddSuccessScreen } from '../add/AddSuccessScreen';
+import { AddOnlineEvalScreen } from './AddOnlineEvalScreen';
+import type { AddOnlineEvalConfig, EvaluatorItem } from './types';
+import React, { useCallback, useEffect, useState } from 'react';
+
+type FlowState =
+  | { name: 'loading' }
+  | { name: 'create-wizard'; evaluators: EvaluatorItem[]; agentNames: string[] }
+  | { name: 'create-success'; configName: string }
+  | { name: 'creds-error'; message: string }
+  | { name: 'error'; message: string };
+
+interface AddOnlineEvalFlowProps {
+  isInteractive?: boolean;
+  onExit: () => void;
+  onBack: () => void;
+  onDev?: () => void;
+  onDeploy?: () => void;
+}
+
+export function AddOnlineEvalFlow({ isInteractive = true, onExit, onBack, onDev, onDeploy }: AddOnlineEvalFlowProps) {
+  const { createOnlineEval, reset: resetCreate } = useCreateOnlineEval();
+  const { names: existingConfigNames } = useExistingOnlineEvalNames();
+  const [flow, setFlow] = useState<FlowState>({ name: 'loading' });
+
+  // Pre-check AWS credentials then fetch evaluators from the account
+  useEffect(() => {
+    if (flow.name !== 'loading') return;
+    let cancelled = false;
+
+    void (async () => {
+      try {
+        await validateAwsCredentials();
+      } catch (err) {
+        if (!cancelled) setFlow({ name: 'creds-error', message: getErrorMessage(err) });
+        return;
+      }
+
+      try {
+        const [{ region }, projectSpec] = await Promise.all([detectRegion(), new ConfigIO().readProjectSpec()]);
+        const result = await listEvaluators({ region });
+        if (cancelled) return;
+
+        const items: EvaluatorItem[] = result.evaluators.map(e => ({
+          arn: e.evaluatorArn,
+          name: e.evaluatorName,
+          type: e.evaluatorType,
+          description: e.description,
+        }));
+
+        const agentNames = projectSpec.agents.map(a => a.name);
+
+        if (agentNames.length === 0) {
+          setFlow({
+            name: 'error',
+            message: 'No agents found in project. Add an agent first with `agentcore add agent`.',
+          });
+          return;
+        }
+
+        setFlow({ name: 'create-wizard', evaluators: items, agentNames });
+      } catch (err) {
+        if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) });
+      }
+    })();
+
+    return () => {
+      cancelled = true;
+    };
+  }, [flow.name]);
+
+  useEffect(() => {
+    if (!isInteractive && flow.name === 'create-success') {
+      onExit();
+    }
+  }, [isInteractive, flow.name, onExit]);
+
+  const handleCreateComplete = useCallback(
+    (config: AddOnlineEvalConfig) => {
+      void createOnlineEval(config).then(result => {
+        if (result.ok) {
+          setFlow({ name: 'create-success', configName: result.configName });
+          return;
+        }
+        setFlow({ name: 'error', message: result.error });
+      });
+    },
+    [createOnlineEval]
+  );
+
+  if (flow.name === 'loading') {
+    return null;
+  }
+
+  if (flow.name === 'creds-error') {
+    return <ErrorPrompt message="AWS credentials required" detail={flow.message} onBack={onBack} onExit={onExit} />;
+  }
+
+  if (flow.name === 'create-wizard') {
+    return (
+      <AddOnlineEvalScreen
+        existingConfigNames={existingConfigNames}
+        evaluatorItems={flow.evaluators}
+        agentNames={flow.agentNames}
+        onComplete={handleCreateComplete}
+        onExit={onBack}
+      />
+    );
+  }
+
+  if (flow.name === 'create-success') {
+    return (
+      <AddSuccessScreen
+        isInteractive={isInteractive}
+        message={`Added online eval config: ${flow.configName}`}
+        detail="Online eval config added to project in `agentcore/agentcore.json`. Deploy with `agentcore deploy`."
+        onAddAnother={onBack}
+        onDev={onDev}
+        onDeploy={onDeploy}
+        onExit={onExit}
+      />
+    );
+  }
+
+  return (
+    <ErrorPrompt
+      message="Failed to add online eval config"
+      detail={flow.message}
+      onBack={() => {
+        resetCreate();
+        setFlow({ name: 'loading' });
+      }}
+      onExit={onExit}
+    />
+  );
+}
diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx
new file mode 100644
index 00000000..19eb35a4
--- /dev/null
+++ b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx
@@ -0,0 +1,191 @@
+import { OnlineEvalConfigNameSchema } from '../../../../schema';
+import type { SelectableItem } from '../../components';
+import {
+  ConfirmReview,
+  Panel,
+  Screen,
+  StepIndicator,
+  TextInput,
+  WizardMultiSelect,
+  WizardSelect,
+} from '../../components';
+import { HELP_TEXT } from '../../constants';
+import { useListNavigation, useMultiSelectNavigation } from '../../hooks';
+import { generateUniqueName } from '../../utils';
+import type { AddOnlineEvalConfig, EvaluatorItem } from './types';
+import { DEFAULT_SAMPLING_RATE, ONLINE_EVAL_STEP_LABELS } from './types';
+import { useAddOnlineEvalWizard } from './useAddOnlineEvalWizard';
+import React, { useMemo } from 'react';
+
+interface AddOnlineEvalScreenProps {
+  onComplete: (config: AddOnlineEvalConfig) => void;
+  onExit: () => void;
+  existingConfigNames: string[];
+  evaluatorItems: EvaluatorItem[];
+  agentNames: string[];
+}
+
+export function AddOnlineEvalScreen({
+  onComplete,
+  onExit,
+  existingConfigNames,
+  evaluatorItems: rawEvaluatorItems,
+  agentNames,
+}: AddOnlineEvalScreenProps) {
+  const wizard = useAddOnlineEvalWizard(agentNames.length);
+
+  // Auto-set agent when there's only one
+  const effectiveConfig = useMemo(() => {
+    if (agentNames.length === 1 && !wizard.config.agent) {
+      return { ...wizard.config, agent: agentNames[0]! };
+    }
+    return wizard.config;
+  }, [wizard.config, agentNames]);
+
+  const evaluatorItems: SelectableItem[] = useMemo(() => {
+    return rawEvaluatorItems.map(e => ({
+      id: e.arn,
+      title: e.name,
+      description: e.type === 'Builtin' ? 'Built-in evaluator' : (e.description ?? 'Custom evaluator'),
+    }));
+  }, [rawEvaluatorItems]);
+
+  const agentItems: SelectableItem[] = useMemo(() => {
+    return agentNames.map(name => ({ id: name, title: name }));
+  }, [agentNames]);
+
+  const isNameStep = wizard.step === 'name';
+  const isAgentStep = wizard.step === 'agent';
+  const isEvaluatorsStep = wizard.step === 'evaluators';
+  const isSamplingRateStep = wizard.step === 'samplingRate';
+  const isEnableOnCreateStep = wizard.step === 'enableOnCreate';
+  const isConfirmStep = wizard.step === 'confirm';
+
+  const enableOnCreateItems: SelectableItem[] = useMemo(
+    () => [
+      { id: 'yes', title: 'Yes', description: 'Enable evaluation immediately after deploy' },
+      { id: 'no', title: 'No', description: 'Deploy paused — enable later with `agentcore resume online-eval`' },
+    ],
+    []
+  );
+
+  const agentNav = useListNavigation({
+    items: agentItems,
+    onSelect: item => wizard.setAgent(item.id),
+    onExit: () => wizard.goBack(),
+    isActive: isAgentStep,
+  });
+
+  const evaluatorsNav = useMultiSelectNavigation({
+    items: evaluatorItems,
+    getId: item => item.id,
+    onConfirm: ids => wizard.setEvaluators(ids),
+    onExit: () => wizard.goBack(),
+    isActive: isEvaluatorsStep,
+    requireSelection: true,
+  });
+
+  const enableOnCreateNav = useListNavigation({
+    items: enableOnCreateItems,
+    onSelect: item => wizard.setEnableOnCreate(item.id === 'yes'),
+    onExit: () => wizard.goBack(),
+    isActive: isEnableOnCreateStep,
+  });
+
+  useListNavigation({
+    items: [{ id: 'confirm', title: 'Confirm' }],
+    onSelect: () => onComplete(effectiveConfig),
+    onExit: () => wizard.goBack(),
+    isActive: isConfirmStep,
+  });
+
+  const helpText = isEvaluatorsStep
+    ? 'Space toggle · Enter confirm · Esc back'
+    : isAgentStep || isEnableOnCreateStep
+      ? HELP_TEXT.NAVIGATE_SELECT
+      : isConfirmStep
+        ? HELP_TEXT.CONFIRM_CANCEL
+        : HELP_TEXT.TEXT_INPUT;
+
+  const headerContent = (
+    <StepIndicator steps={wizard.steps} currentStep={wizard.step} labels={ONLINE_EVAL_STEP_LABELS} />
+  );
+
+  return (
+    <Screen title="Add Online Eval Config" onExit={onExit} helpText={helpText} headerContent={headerContent}>
+      <Panel>
+        {isNameStep && (
+          <TextInput
+            key="name"
+            prompt="Config name"
+            initialValue={generateUniqueName('MyOnlineEval', existingConfigNames)}
+            onSubmit={wizard.setName}
+            onCancel={onExit}
+            schema={OnlineEvalConfigNameSchema}
+            customValidation={value => !existingConfigNames.includes(value) || 'Config name already exists'}
+          />
+        )}
+
+        {isAgentStep && (
+          <WizardSelect
+            title="Select agent to monitor"
+            description="Each online eval config monitors a single agent"
+            items={agentItems}
+            selectedIndex={agentNav.selectedIndex}
+          />
+        )}
+
+        {isEvaluatorsStep && (
+          <WizardMultiSelect
+            title="Select evaluators"
+            description="Choose custom and/or built-in evaluators"
+            items={evaluatorItems}
+            cursorIndex={evaluatorsNav.cursorIndex}
+            selectedIds={evaluatorsNav.selectedIds}
+          />
+        )}
+
+        {isSamplingRateStep && (
+          <TextInput
+            key="samplingRate"
+            prompt="Sampling rate (0.01–100%)"
+            initialValue={String(DEFAULT_SAMPLING_RATE)}
+            onSubmit={value => {
+              const rate = parseFloat(value);
+              if (isNaN(rate) || rate < 0.01 || rate > 100) return;
+              wizard.setSamplingRate(rate);
+            }}
+            onCancel={() => wizard.goBack()}
+            customValidation={value => {
+              const rate = parseFloat(value);
+              if (isNaN(rate)) return 'Must be a number';
+              if (rate < 0.01 || rate > 100) return 'Must be between 0.01 and 100';
+              return true;
+            }}
+          />
+        )}
+
+        {isEnableOnCreateStep && (
+          <WizardSelect
+            title="Enable on deploy?"
+            description="If enabled, evaluation starts automatically after `agentcore deploy`"
+            items={enableOnCreateItems}
+            selectedIndex={enableOnCreateNav.selectedIndex}
+          />
+        )}
+
+        {isConfirmStep && (
+          <ConfirmReview
+            fields={[
+              { label: 'Name', value: effectiveConfig.name },
+              { label: 'Agent', value: effectiveConfig.agent },
+              { label: 'Evaluators', value: effectiveConfig.evaluators.join(', ') },
+              { label: 'Sampling Rate', value: `${effectiveConfig.samplingRate}%` },
+              { label: 'Enable on Deploy', value: effectiveConfig.enableOnCreate ? 'Yes' : 'No' },
+            ]}
+          />
+        )}
+      </Panel>
+    </Screen>
+  );
+}
diff --git a/src/cli/tui/screens/online-eval/OnlineEvalDashboard.tsx b/src/cli/tui/screens/online-eval/OnlineEvalDashboard.tsx
new file mode 100644
index 00000000..1c7ceb49
--- /dev/null
+++ b/src/cli/tui/screens/online-eval/OnlineEvalDashboard.tsx
@@ -0,0 +1,259 @@
+import type { GetOnlineEvalConfigResult } from '../../../aws/agentcore-control';
+import { getOnlineEvaluationConfig } from '../../../aws/agentcore-control';
+import { getErrorMessage } from '../../../errors';
+import { handlePauseResume } from '../../../operations/eval';
+import { loadDeployedProjectConfig } from '../../../operations/resolve-agent';
+import { Panel, Screen } from '../../components';
+import { useListNavigation } from '../../hooks';
+import { STATUS_COLORS } from '../../theme';
+import { Box, Text, useInput } from 'ink';
+import React, { useCallback, useEffect, useRef, useState } from 'react';
+
+interface OnlineEvalDashboardProps {
+  isInteractive: boolean;
+  onExit: () => void;
+}
+
+interface DashboardConfig {
+  name: string;
+  configId: string;
+  region: string;
+  evaluators: string[];
+  samplingRate: number;
+  liveStatus?: string;
+  executionStatus?: string;
+  failureReason?: string;
+  error?: string;
+}
+
+type Phase = 'loading' | 'loaded' | 'error' | 'toggling';
+
+interface DashboardState {
+  phase: Phase;
+  configs: DashboardConfig[];
+  error: string | null;
+}
+
+function executionStatusColor(status?: string): string {
+  switch (status) {
+    case 'ENABLED':
+      return 'green';
+    case 'DISABLED':
+      return 'yellow';
+    default:
+      return 'gray';
+  }
+}
+
+function configStatusColor(status?: string): string {
+  switch (status?.toUpperCase()) {
+    case 'ACTIVE':
+      return 'green';
+    case 'CREATING':
+    case 'UPDATING':
+      return 'yellow';
+    case 'FAILED':
+      return 'red';
+    default:
+      return 'gray';
+  }
+}
+
+async function fetchDashboardConfigs(): Promise<DashboardConfig[]> {
+  const context = await loadDeployedProjectConfig();
+  const project = context.project;
+  const targetNames = Object.keys(context.deployedState.targets);
+
+  if (targetNames.length === 0) return [];
+
+  const targetName = targetNames[0]!;
+  const targetResources = context.deployedState.targets[targetName]?.resources;
+  const targetConfig = context.awsTargets.find(t => t.name === targetName);
+  const region = targetConfig?.region ?? 'us-east-1';
+  const deployedOnlineEvals = targetResources?.onlineEvalConfigs ?? {};
+
+  const localConfigs = project.onlineEvalConfigs ?? [];
+  const configs: DashboardConfig[] = [];
+
+  for (const local of localConfigs) {
+    const deployed = deployedOnlineEvals[local.name];
+    configs.push({
+      name: local.name,
+      configId: deployed?.onlineEvaluationConfigId ?? '',
+      region,
+      evaluators: local.evaluators,
+      samplingRate: local.samplingRate,
+      executionStatus: deployed?.executionStatus,
+    });
+  }
+
+  // Enrich with live status from API
+  await Promise.all(
+    configs.map(async (config, i) => {
+      if (!config.configId) return;
+      try {
+        const live: GetOnlineEvalConfigResult = await getOnlineEvaluationConfig({
+          region: config.region,
+          configId: config.configId,
+        });
+        configs[i] = {
+          ...config,
+          liveStatus: live.status,
+          executionStatus: live.executionStatus,
+          failureReason: live.failureReason,
+        };
+      } catch (err) {
+        configs[i] = { ...config, error: getErrorMessage(err) };
+      }
+    })
+  );
+
+  return configs;
+}
+
+export function OnlineEvalDashboard({ onExit }: OnlineEvalDashboardProps) {
+  const [state, setState] = useState<DashboardState>({
+    phase: 'loading',
+    configs: [],
+    error: null,
+  });
+  const [refreshKey, setRefreshKey] = useState(0);
+  const mountedRef = useRef(true);
+
+  const refresh = useCallback(() => {
+    setState(prev => ({ ...prev, phase: 'loading', error: null }));
+    setRefreshKey(k => k + 1);
+  }, []);
+
+  useInput(
+    (input, key) => {
+      if (input === 'r' && key.ctrl && state.phase === 'loaded') {
+        refresh();
+      }
+    },
+    { isActive: state.phase === 'loaded' }
+  );
+
+  useEffect(() => {
+    mountedRef.current = true;
+    fetchDashboardConfigs()
+      .then(configs => {
+        if (mountedRef.current) setState({ phase: 'loaded', configs, error: null });
+      })
+      .catch(err => {
+        if (mountedRef.current) setState({ phase: 'error', configs: [], error: getErrorMessage(err) });
+      });
+    return () => {
+      mountedRef.current = false;
+    };
+  }, [refreshKey]);
+
+  const nav = useListNavigation({
+    items: state.configs,
+    onSelect: item => {
+      if (!item.configId) return;
+      const action = item.executionStatus === 'ENABLED' ? 'pause' : 'resume';
+      setState(prev => ({ ...prev, phase: 'toggling' }));
+      void handlePauseResume({ name: item.name }, action).then(result => {
+        if (!result.success) {
+          setState(prev => ({ ...prev, phase: 'loaded', error: result.error ?? 'Toggle failed' }));
+          return;
+        }
+        return fetchDashboardConfigs().then(configs => {
+          if (mountedRef.current) setState({ phase: 'loaded', configs, error: null });
+        });
+      });
+    },
+    onExit: () => onExit(),
+    isActive: state.phase === 'loaded' && state.configs.length > 0,
+  });
+
+  const helpText =
+    state.configs.length > 0
+      ? '↑↓ navigate · Enter toggle pause/resume · Ctrl+R refresh · Esc back'
+      : 'Esc back · Ctrl+C quit';
+
+  return (
+    <Screen title="Online Eval Dashboard" onExit={onExit} helpText={helpText} exitEnabled={state.configs.length === 0}>
+      {(state.phase === 'loading' || state.phase === 'toggling') && (
+        <Text dimColor>{state.phase === 'toggling' ? 'Updating...' : 'Loading online eval configs...'}</Text>
+      )}
+
+      {state.phase === 'error' && <Text color={STATUS_COLORS.error}>{state.error}</Text>}
+
+      {state.error && state.phase === 'loaded' && (
+        <Box marginBottom={1}>
+          <Text color={STATUS_COLORS.error}>{state.error}</Text>
+        </Box>
+      )}
+
+      {state.phase === 'loaded' && state.configs.length === 0 && (
+        <Box flexDirection="column">
+          <Text dimColor>No online eval configs found.</Text>
+          <Text dimColor>Run `agentcore add online-eval` then `agentcore deploy` to get started.</Text>
+        </Box>
+      )}
+
+      {state.phase === 'loaded' && state.configs.length > 0 && (
+        <Panel fullWidth>
+          <Box flexDirection="column">
+            {state.configs.map((config, idx) => {
+              const selected = idx === nav.selectedIndex;
+              const isDeployed = Boolean(config.configId);
+              const toggleLabel = config.executionStatus === 'ENABLED' ? 'Enter to pause' : 'Enter to resume';
+              return (
+                <Box key={config.name} flexDirection="column" marginBottom={idx < state.configs.length - 1 ? 1 : 0}>
+                  <Text wrap="wrap">
+                    <Text color={selected ? 'cyan' : undefined}>{selected ? '❯' : ' '} </Text>
+                    <Text color={selected ? 'cyan' : undefined} bold={selected}>
+                      {config.name}
+                    </Text>
+                    {config.liveStatus && (
+                      <Text color={configStatusColor(config.liveStatus)}> [{config.liveStatus}]</Text>
+                    )}
+                    {config.executionStatus && (
+                      <Text color={executionStatusColor(config.executionStatus)}> {config.executionStatus}</Text>
+                    )}
+                    {!isDeployed && <Text color="yellow"> [Not deployed]</Text>}
+                  </Text>
+                  <Text wrap="wrap">
+                    <Text>{'  '}</Text>
+                    <Text dimColor>
+                      Evaluators: {config.evaluators.join(', ')}
+                      {'  '}
+                      Sampling: {config.samplingRate}%
+                    </Text>
+                  </Text>
+                  {config.failureReason && (
+                    <Text>
+                      <Text>{'  '}</Text>
+                      <Text color="red">Failure: {config.failureReason}</Text>
+                    </Text>
+                  )}
+                  {config.error && (
+                    <Text>
+                      <Text>{'  '}</Text>
+                      <Text color="red">Error: {config.error}</Text>
+                    </Text>
+                  )}
+                  {selected && isDeployed && (
+                    <Text>
+                      <Text>{'  '}</Text>
+                      <Text dimColor>{toggleLabel}</Text>
+                    </Text>
+                  )}
+                  {selected && !isDeployed && (
+                    <Text>
+                      <Text>{'  '}</Text>
+                      <Text dimColor>Run `agentcore deploy` to start this online eval config</Text>
+                    </Text>
+                  )}
+                </Box>
+              );
+            })}
+          </Box>
+        </Panel>
+      )}
+    </Screen>
+  );
+}
diff --git a/src/cli/tui/screens/online-eval/index.ts b/src/cli/tui/screens/online-eval/index.ts
new file mode 100644
index 00000000..a20c949a
--- /dev/null
+++ b/src/cli/tui/screens/online-eval/index.ts
@@ -0,0 +1,3 @@
+export { AddOnlineEvalFlow } from './AddOnlineEvalFlow';
+export { AddOnlineEvalScreen } from './AddOnlineEvalScreen';
+export { OnlineEvalDashboard } from './OnlineEvalDashboard';
diff --git a/src/cli/tui/screens/online-eval/types.ts b/src/cli/tui/screens/online-eval/types.ts
new file mode 100644
index 00000000..943eaee7
--- /dev/null
+++ b/src/cli/tui/screens/online-eval/types.ts
@@ -0,0 +1,40 @@
+// ─────────────────────────────────────────────────────────────────────────────
+// Online Eval Config Flow Types
+// ─────────────────────────────────────────────────────────────────────────────
+
+export type AddOnlineEvalStep = 'name' | 'agent' | 'evaluators' | 'samplingRate' | 'enableOnCreate' | 'confirm';
+
+export interface AddOnlineEvalConfig {
+  name: string;
+  agent: string;
+  evaluators: string[];
+  samplingRate: number;
+  enableOnCreate: boolean;
+  description?: string;
+}
+
+export const ONLINE_EVAL_STEP_LABELS: Record<AddOnlineEvalStep, string> = {
+  name: 'Name',
+  agent: 'Agent',
+  evaluators: 'Evaluators',
+  samplingRate: 'Rate',
+  enableOnCreate: 'Enable',
+  confirm: 'Confirm',
+};
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Evaluator Items (fetched from API)
+// ─────────────────────────────────────────────────────────────────────────────
+
+export interface EvaluatorItem {
+  /** ARN used as the stored identifier in the config */
+  arn: string;
+  /** Display name */
+  name: string;
+  /** 'Builtin' or 'Custom' */
+  type: string;
+  /** Optional description */
+  description?: string;
+}
+
+export const DEFAULT_SAMPLING_RATE = 10;
diff --git a/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts b/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts
new file mode 100644
index 00000000..0032469f
--- /dev/null
+++ b/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts
@@ -0,0 +1,105 @@
+import type { AddOnlineEvalConfig, AddOnlineEvalStep } from './types';
+import { DEFAULT_SAMPLING_RATE } from './types';
+import { useCallback, useState } from 'react';
+
+function getAllSteps(agentCount: number): AddOnlineEvalStep[] {
+  if (agentCount <= 1) {
+    return ['name', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm'];
+  }
+  return ['name', 'agent', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm'];
+}
+
+function getDefaultConfig(): AddOnlineEvalConfig {
+  return {
+    name: '',
+    agent: '',
+    evaluators: [],
+    samplingRate: DEFAULT_SAMPLING_RATE,
+    enableOnCreate: true,
+  };
+}
+
+export function useAddOnlineEvalWizard(agentCount: number) {
+  const allSteps = getAllSteps(agentCount);
+  const [config, setConfig] = useState<AddOnlineEvalConfig>(getDefaultConfig);
+  const [step, setStep] = useState<AddOnlineEvalStep>(allSteps[0]!);
+
+  const currentIndex = allSteps.indexOf(step);
+
+  const goBack = useCallback(() => {
+    const prevStep = allSteps[currentIndex - 1];
+    if (prevStep) setStep(prevStep);
+  }, [allSteps, currentIndex, setStep]);
+
+  const nextStep = useCallback(
+    (currentStep: AddOnlineEvalStep): AddOnlineEvalStep | undefined => {
+      const idx = allSteps.indexOf(currentStep);
+      return allSteps[idx + 1];
+    },
+    [allSteps]
+  );
+
+  const setName = useCallback(
+    (name: string) => {
+      setConfig(c => ({ ...c, name }));
+      const next = nextStep('name');
+      if (next) setStep(next);
+    },
+    [nextStep, setConfig, setStep]
+  );
+
+  const setAgent = useCallback(
+    (agent: string) => {
+      setConfig(c => ({ ...c, agent }));
+      const next = nextStep('agent');
+      if (next) setStep(next);
+    },
+    [nextStep, setConfig, setStep]
+  );
+
+  const setEvaluators = useCallback(
+    (evaluators: string[]) => {
+      setConfig(c => ({ ...c, evaluators }));
+      const next = nextStep('evaluators');
+      if (next) setStep(next);
+    },
+    [nextStep, setConfig, setStep]
+  );
+
+  const setSamplingRate = useCallback(
+    (samplingRate: number) => {
+      setConfig(c => ({ ...c, samplingRate }));
+      const next = nextStep('samplingRate');
+      if (next) setStep(next);
+    },
+    [nextStep, setConfig, setStep]
+  );
+
+  const setEnableOnCreate = useCallback(
+    (enableOnCreate: boolean) => {
+      setConfig(c => ({ ...c, enableOnCreate }));
+      const next = nextStep('enableOnCreate');
+      if (next) setStep(next);
+    },
+    [nextStep, setConfig, setStep]
+  );
+
+  const reset = useCallback(() => {
+    setConfig(getDefaultConfig());
+    setStep(allSteps[0]!);
+  }, [allSteps, setConfig, setStep]);
+
+  return {
+    config,
+    step,
+    steps: allSteps,
+    currentIndex,
+    goBack,
+    setName,
+    setAgent,
+    setEvaluators,
+    setSamplingRate,
+    setEnableOnCreate,
+    reset,
+  };
+}
diff --git a/src/cli/tui/screens/remove/RemoveEvaluatorScreen.tsx b/src/cli/tui/screens/remove/RemoveEvaluatorScreen.tsx
new file mode 100644
index 00000000..a0d5f0fa
--- /dev/null
+++ b/src/cli/tui/screens/remove/RemoveEvaluatorScreen.tsx
@@ -0,0 +1,26 @@
+import type { RemovableEvaluator } from '../../../primitives/EvaluatorPrimitive';
+import { SelectScreen } from '../../components';
+import React from 'react';
+
+interface RemoveEvaluatorScreenProps {
+  evaluators: RemovableEvaluator[];
+  onSelect: (evaluatorName: string) => void;
+  onExit: () => void;
+}
+
+export function RemoveEvaluatorScreen({ evaluators, onSelect, onExit }: RemoveEvaluatorScreenProps) {
+  const items = evaluators.map(evaluator => ({
+    id: evaluator.name,
+    title: evaluator.name,
+    description: 'Custom Evaluator',
+  }));
+
+  return (
+    <SelectScreen
+      title="Select Evaluator to Remove"
+      items={items}
+      onSelect={item => onSelect(item.id)}
+      onExit={onExit}
+    />
+  );
+}
diff --git a/src/cli/tui/screens/remove/RemoveFlow.tsx b/src/cli/tui/screens/remove/RemoveFlow.tsx
index 066874bb..635f18cb 100644
--- a/src/cli/tui/screens/remove/RemoveFlow.tsx
+++ b/src/cli/tui/screens/remove/RemoveFlow.tsx
@@ -2,24 +2,30 @@ import type { RemovableGatewayTarget, RemovalPreview } from '../../../operations
 import { ErrorPrompt, Panel, Screen } from '../../components';
 import {
   useRemovableAgents,
+  useRemovableEvaluators,
   useRemovableGatewayTargets,
   useRemovableGateways,
   useRemovableIdentities,
   useRemovableMemories,
+  useRemovableOnlineEvalConfigs,
   useRemovalPreview,
   useRemoveAgent,
+  useRemoveEvaluator,
   useRemoveGateway,
   useRemoveGatewayTarget,
   useRemoveIdentity,
   useRemoveMemory,
+  useRemoveOnlineEvalConfig,
 } from '../../hooks/useRemove';
 import { RemoveAgentScreen } from './RemoveAgentScreen';
 import { RemoveAllScreen } from './RemoveAllScreen';
 import { RemoveConfirmScreen } from './RemoveConfirmScreen';
+import { RemoveEvaluatorScreen } from './RemoveEvaluatorScreen';
 import { RemoveGatewayScreen } from './RemoveGatewayScreen';
 import { RemoveGatewayTargetScreen } from './RemoveGatewayTargetScreen';
 import { RemoveIdentityScreen } from './RemoveIdentityScreen';
 import { RemoveMemoryScreen } from './RemoveMemoryScreen';
+import { RemoveOnlineEvalScreen } from './RemoveOnlineEvalScreen';
 import type { RemoveResourceType } from './RemoveScreen';
 import { RemoveScreen } from './RemoveScreen';
 import { RemoveSuccessScreen } from './RemoveSuccessScreen';
@@ -34,17 +40,23 @@ type FlowState =
   | { name: 'select-gateway-target' }
   | { name: 'select-memory' }
   | { name: 'select-identity' }
+  | { name: 'select-evaluator' }
+  | { name: 'select-online-eval' }
   | { name: 'confirm-agent'; agentName: string; preview: RemovalPreview }
   | { name: 'confirm-gateway'; gatewayName: string; preview: RemovalPreview }
   | { name: 'confirm-gateway-target'; tool: RemovableGatewayTarget; preview: RemovalPreview }
   | { name: 'confirm-memory'; memoryName: string; preview: RemovalPreview }
   | { name: 'confirm-identity'; identityName: string; preview: RemovalPreview }
+  | { name: 'confirm-evaluator'; evaluatorName: string; preview: RemovalPreview }
+  | { name: 'confirm-online-eval'; configName: string; preview: RemovalPreview }
   | { name: 'loading'; message: string }
   | { name: 'agent-success'; agentName: string; logFilePath?: string }
   | { name: 'gateway-success'; gatewayName: string; logFilePath?: string }
   | { name: 'tool-success'; toolName: string; logFilePath?: string }
   | { name: 'memory-success'; memoryName: string; logFilePath?: string }
   | { name: 'identity-success'; identityName: string; logFilePath?: string }
+  | { name: 'evaluator-success'; evaluatorName: string; logFilePath?: string }
+  | { name: 'online-eval-success'; configName: string; logFilePath?: string }
   | { name: 'remove-all' }
   | { name: 'error'; message: string };
 
@@ -57,7 +69,7 @@ interface RemoveFlowProps {
   /** Force mode - skip confirmation */
   force?: boolean;
   /** Initial resource type to start at (for CLI subcommands) */
-  initialResourceType?: 'agent' | 'gateway' | 'gateway-target' | 'memory' | 'identity';
+  initialResourceType?: 'agent' | 'gateway' | 'gateway-target' | 'memory' | 'identity' | 'evaluator' | 'online-eval';
   /** Initial resource name to auto-select (for CLI --name flag) */
   initialResourceName?: string;
 }
@@ -83,6 +95,10 @@ export function RemoveFlow({
         return { name: 'select-memory' };
       case 'identity':
         return { name: 'select-identity' };
+      case 'evaluator':
+        return { name: 'select-evaluator' };
+      case 'online-eval':
+        return { name: 'select-online-eval' };
       default:
         return { name: 'select' };
     }
@@ -95,9 +111,22 @@ export function RemoveFlow({
   const { tools: mcpTools, isLoading: isLoadingTools, refresh: refreshTools } = useRemovableGatewayTargets();
   const { memories, isLoading: isLoadingMemories, refresh: refreshMemories } = useRemovableMemories();
   const { identities, isLoading: isLoadingIdentities, refresh: refreshIdentities } = useRemovableIdentities();
+  const { evaluators, isLoading: isLoadingEvaluators, refresh: refreshEvaluators } = useRemovableEvaluators();
+  const {
+    onlineEvalConfigs,
+    isLoading: isLoadingOnlineEvals,
+    refresh: refreshOnlineEvals,
+  } = useRemovableOnlineEvalConfigs();
 
   // Check if any data is still loading
-  const isLoading = isLoadingAgents || isLoadingGateways || isLoadingTools || isLoadingMemories || isLoadingIdentities;
+  const isLoading =
+    isLoadingAgents ||
+    isLoadingGateways ||
+    isLoadingTools ||
+    isLoadingMemories ||
+    isLoadingIdentities ||
+    isLoadingEvaluators ||
+    isLoadingOnlineEvals;
 
   // Preview hook
   const {
@@ -106,6 +135,8 @@ export function RemoveFlow({
     loadGatewayTargetPreview,
     loadMemoryPreview,
     loadIdentityPreview,
+    loadEvaluatorPreview,
+    loadOnlineEvalPreview,
     reset: resetPreview,
   } = useRemovalPreview();
 
@@ -115,6 +146,8 @@ export function RemoveFlow({
   const { remove: removeGatewayTargetOp, reset: resetRemoveGatewayTarget } = useRemoveGatewayTarget();
   const { remove: removeMemoryOp, reset: resetRemoveMemory } = useRemoveMemory();
   const { remove: removeIdentityOp, reset: resetRemoveIdentity } = useRemoveIdentity();
+  const { remove: removeEvaluatorOp, reset: resetRemoveEvaluator } = useRemoveEvaluator();
+  const { remove: removeOnlineEvalOp, reset: resetRemoveOnlineEval } = useRemoveOnlineEvalConfig();
 
   // Track pending result state
   const pendingResultRef = useRef<FlowState | null>(null);
@@ -135,7 +168,15 @@ export function RemoveFlow({
   // In non-interactive mode, exit after success
   useEffect(() => {
     if (!isInteractive) {
-      const successStates = ['agent-success', 'gateway-success', 'tool-success', 'memory-success', 'identity-success'];
+      const successStates = [
+        'agent-success',
+        'gateway-success',
+        'tool-success',
+        'memory-success',
+        'identity-success',
+        'evaluator-success',
+        'online-eval-success',
+      ];
       if (successStates.includes(flow.name)) {
         onExit();
       }
@@ -162,6 +203,12 @@ export function RemoveFlow({
       case 'identity':
         setFlow({ name: 'select-identity' });
         break;
+      case 'evaluator':
+        setFlow({ name: 'select-evaluator' });
+        break;
+      case 'online-eval':
+        setFlow({ name: 'select-online-eval' });
+        break;
       case 'all':
         setFlow({ name: 'remove-all' });
         break;
@@ -281,6 +328,50 @@ export function RemoveFlow({
     [loadIdentityPreview, force, removeIdentityOp]
   );
 
+  const handleSelectEvaluator = useCallback(
+    async (evaluatorName: string) => {
+      const result = await loadEvaluatorPreview(evaluatorName);
+      if (result.ok) {
+        if (force) {
+          setFlow({ name: 'loading', message: `Removing evaluator ${evaluatorName}...` });
+          const removeResult = await removeEvaluatorOp(evaluatorName, result.preview);
+          if (removeResult.success) {
+            setFlow({ name: 'evaluator-success', evaluatorName });
+          } else {
+            setFlow({ name: 'error', message: removeResult.error });
+          }
+        } else {
+          setFlow({ name: 'confirm-evaluator', evaluatorName, preview: result.preview });
+        }
+      } else {
+        setFlow({ name: 'error', message: result.error });
+      }
+    },
+    [loadEvaluatorPreview, force, removeEvaluatorOp]
+  );
+
+  const handleSelectOnlineEval = useCallback(
+    async (configName: string) => {
+      const result = await loadOnlineEvalPreview(configName);
+      if (result.ok) {
+        if (force) {
+          setFlow({ name: 'loading', message: `Removing online eval config ${configName}...` });
+          const removeResult = await removeOnlineEvalOp(configName, result.preview);
+          if (removeResult.success) {
+            setFlow({ name: 'online-eval-success', configName });
+          } else {
+            setFlow({ name: 'error', message: removeResult.error });
+          }
+        } else {
+          setFlow({ name: 'confirm-online-eval', configName, preview: result.preview });
+        }
+      } else {
+        setFlow({ name: 'error', message: result.error });
+      }
+    },
+    [loadOnlineEvalPreview, force, removeOnlineEvalOp]
+  );
+
   // Auto-select resource when initialResourceName is provided and data is loaded
   useEffect(() => {
     if (!initialResourceName || isLoading || hasTriggeredInitialSelection.current) {
@@ -305,6 +396,12 @@ export function RemoveFlow({
         case 'identity':
           void handleSelectIdentity(initialResourceName);
           break;
+        case 'evaluator':
+          void handleSelectEvaluator(initialResourceName);
+          break;
+        case 'online-eval':
+          void handleSelectOnlineEval(initialResourceName);
+          break;
       }
     }, 0);
   }, [
@@ -315,6 +412,8 @@ export function RemoveFlow({
     handleSelectGateway,
     handleSelectMemory,
     handleSelectIdentity,
+    handleSelectEvaluator,
+    handleSelectOnlineEval,
   ]);
 
   // Confirm handlers - pass preview for logging
@@ -398,6 +497,38 @@ export function RemoveFlow({
     [removeIdentityOp]
   );
 
+  const handleConfirmEvaluator = useCallback(
+    async (evaluatorName: string, preview: RemovalPreview) => {
+      pendingResultRef.current = null;
+      setResultReady(false);
+      setFlow({ name: 'loading', message: `Removing evaluator ${evaluatorName}...` });
+      const result = await removeEvaluatorOp(evaluatorName, preview);
+      if (result.success) {
+        pendingResultRef.current = { name: 'evaluator-success', evaluatorName, logFilePath: result.logFilePath };
+      } else {
+        pendingResultRef.current = { name: 'error', message: result.error };
+      }
+      setResultReady(true);
+    },
+    [removeEvaluatorOp]
+  );
+
+  const handleConfirmOnlineEval = useCallback(
+    async (configName: string, preview: RemovalPreview) => {
+      pendingResultRef.current = null;
+      setResultReady(false);
+      setFlow({ name: 'loading', message: `Removing online eval config ${configName}...` });
+      const result = await removeOnlineEvalOp(configName, preview);
+      if (result.success) {
+        pendingResultRef.current = { name: 'online-eval-success', configName, logFilePath: result.logFilePath };
+      } else {
+        pendingResultRef.current = { name: 'error', message: result.error };
+      }
+      setResultReady(true);
+    },
+    [removeOnlineEvalOp]
+  );
+
   const resetAll = useCallback(() => {
     resetPreview();
     resetRemoveAgent();
@@ -405,6 +536,8 @@ export function RemoveFlow({
     resetRemoveGatewayTarget();
     resetRemoveMemory();
     resetRemoveIdentity();
+    resetRemoveEvaluator();
+    resetRemoveOnlineEval();
   }, [
     resetPreview,
     resetRemoveAgent,
@@ -412,11 +545,29 @@ export function RemoveFlow({
     resetRemoveGatewayTarget,
     resetRemoveMemory,
     resetRemoveIdentity,
+    resetRemoveEvaluator,
+    resetRemoveOnlineEval,
   ]);
 
   const refreshAll = useCallback(async () => {
-    await Promise.all([refreshAgents(), refreshGateways(), refreshTools(), refreshMemories(), refreshIdentities()]);
-  }, [refreshAgents, refreshGateways, refreshTools, refreshMemories, refreshIdentities]);
+    await Promise.all([
+      refreshAgents(),
+      refreshGateways(),
+      refreshTools(),
+      refreshMemories(),
+      refreshIdentities(),
+      refreshEvaluators(),
+      refreshOnlineEvals(),
+    ]);
+  }, [
+    refreshAgents,
+    refreshGateways,
+    refreshTools,
+    refreshMemories,
+    refreshIdentities,
+    refreshEvaluators,
+    refreshOnlineEvals,
+  ]);
 
   // Select screen - wait for data to load to avoid arrow position issues
   if (flow.name === 'select') {
@@ -432,6 +583,8 @@ export function RemoveFlow({
         mcpToolCount={mcpTools.length}
         memoryCount={memories.length}
         identityCount={identities.length}
+        evaluatorCount={evaluators.length}
+        onlineEvalCount={onlineEvalConfigs.length}
       />
     );
   }
@@ -514,6 +667,32 @@ export function RemoveFlow({
     );
   }
 
+  if (flow.name === 'select-evaluator') {
+    if (initialResourceName && isLoading) {
+      return null;
+    }
+    return (
+      <RemoveEvaluatorScreen
+        evaluators={evaluators}
+        onSelect={(name: string) => void handleSelectEvaluator(name)}
+        onExit={() => setFlow({ name: 'select' })}
+      />
+    );
+  }
+
+  if (flow.name === 'select-online-eval') {
+    if (initialResourceName && isLoading) {
+      return null;
+    }
+    return (
+      <RemoveOnlineEvalScreen
+        configs={onlineEvalConfigs}
+        onSelect={(name: string) => void handleSelectOnlineEval(name)}
+        onExit={() => setFlow({ name: 'select' })}
+      />
+    );
+  }
+
   // Confirmation screens
   if (flow.name === 'confirm-agent') {
     return (
@@ -570,6 +749,28 @@ export function RemoveFlow({
     );
   }
 
+  if (flow.name === 'confirm-evaluator') {
+    return (
+      <RemoveConfirmScreen
+        title={`Remove Evaluator: ${flow.evaluatorName}`}
+        preview={flow.preview}
+        onConfirm={() => void handleConfirmEvaluator(flow.evaluatorName, flow.preview)}
+        onCancel={() => setFlow({ name: 'select-evaluator' })}
+      />
+    );
+  }
+
+  if (flow.name === 'confirm-online-eval') {
+    return (
+      <RemoveConfirmScreen
+        title={`Remove Online Eval Config: ${flow.configName}`}
+        preview={flow.preview}
+        onConfirm={() => void handleConfirmOnlineEval(flow.configName, flow.preview)}
+        onCancel={() => setFlow({ name: 'select-online-eval' })}
+      />
+    );
+  }
+
   // Success screens
   if (flow.name === 'agent-success') {
     return (
@@ -651,6 +852,38 @@ export function RemoveFlow({
     );
   }
 
+  if (flow.name === 'evaluator-success') {
+    return (
+      <RemoveSuccessScreen
+        isInteractive={isInteractive}
+        message={`Removed evaluator: ${flow.evaluatorName}`}
+        detail="Evaluator removed from agentcore.json. Deploy with `agentcore deploy` to apply changes."
+        logFilePath={flow.logFilePath}
+        onRemoveAnother={() => {
+          resetAll();
+          void refreshAll().then(() => setFlow({ name: 'select' }));
+        }}
+        onExit={onExit}
+      />
+    );
+  }
+
+  if (flow.name === 'online-eval-success') {
+    return (
+      <RemoveSuccessScreen
+        isInteractive={isInteractive}
+        message={`Removed online eval config: ${flow.configName}`}
+        detail="Online eval config removed from agentcore.json. Deploy with `agentcore deploy` to apply changes."
+        logFilePath={flow.logFilePath}
+        onRemoveAnother={() => {
+          resetAll();
+          void refreshAll().then(() => setFlow({ name: 'select' }));
+        }}
+        onExit={onExit}
+      />
+    );
+  }
+
   // Remove all screen
   if (flow.name === 'remove-all') {
     return <RemoveAllScreen isInteractive={isInteractive} onExit={onExit} onNavigate={onNavigate} />;
diff --git a/src/cli/tui/screens/remove/RemoveOnlineEvalScreen.tsx b/src/cli/tui/screens/remove/RemoveOnlineEvalScreen.tsx
new file mode 100644
index 00000000..faab02f4
--- /dev/null
+++ b/src/cli/tui/screens/remove/RemoveOnlineEvalScreen.tsx
@@ -0,0 +1,26 @@
+import type { RemovableOnlineEvalConfig } from '../../../primitives/OnlineEvalConfigPrimitive';
+import { SelectScreen } from '../../components';
+import React from 'react';
+
+interface RemoveOnlineEvalScreenProps {
+  configs: RemovableOnlineEvalConfig[];
+  onSelect: (configName: string) => void;
+  onExit: () => void;
+}
+
+export function RemoveOnlineEvalScreen({ configs, onSelect, onExit }: RemoveOnlineEvalScreenProps) {
+  const items = configs.map(config => ({
+    id: config.name,
+    title: config.name,
+    description: 'Online Eval Config',
+  }));
+
+  return (
+    <SelectScreen
+      title="Select Online Eval Config to Remove"
+      items={items}
+      onSelect={item => onSelect(item.id)}
+      onExit={onExit}
+    />
+  );
+}
diff --git a/src/cli/tui/screens/remove/RemoveScreen.tsx b/src/cli/tui/screens/remove/RemoveScreen.tsx
index bcb7307c..59441d0e 100644
--- a/src/cli/tui/screens/remove/RemoveScreen.tsx
+++ b/src/cli/tui/screens/remove/RemoveScreen.tsx
@@ -6,6 +6,8 @@ const REMOVE_RESOURCES = [
   { id: 'agent', title: 'Agent', description: 'Remove an agent from the project' },
   { id: 'memory', title: 'Memory', description: 'Remove a memory provider' },
   { id: 'identity', title: 'Identity', description: 'Remove an identity provider' },
+  { id: 'evaluator', title: 'Evaluator', description: 'Remove a custom evaluator' },
+  { id: 'online-eval', title: 'Online Eval Config', description: 'Remove an online eval config' },
   { id: 'gateway', title: 'Gateway', description: 'Remove a gateway' },
   { id: 'gateway-target', title: 'Gateway Target', description: 'Remove a gateway target' },
   { id: 'all', title: 'All', description: 'Reset entire agentcore project' },
@@ -26,6 +28,10 @@ interface RemoveScreenProps {
   memoryCount: number;
   /** Number of identities available for removal */
   identityCount: number;
+  /** Number of evaluators available for removal */
+  evaluatorCount: number;
+  /** Number of online eval configs available for removal */
+  onlineEvalCount: number;
 }
 
 export function RemoveScreen({
@@ -36,6 +42,8 @@ export function RemoveScreen({
   mcpToolCount,
   memoryCount,
   identityCount,
+  evaluatorCount,
+  onlineEvalCount,
 }: RemoveScreenProps) {
   const items: SelectableItem[] = useMemo(() => {
     return REMOVE_RESOURCES.map(r => {
@@ -73,6 +81,18 @@ export function RemoveScreen({
             description = 'No identities to remove';
           }
           break;
+        case 'evaluator':
+          if (evaluatorCount === 0) {
+            disabled = true;
+            description = 'No evaluators to remove';
+          }
+          break;
+        case 'online-eval':
+          if (onlineEvalCount === 0) {
+            disabled = true;
+            description = 'No online eval configs to remove';
+          }
+          break;
         case 'all':
           // 'all' is always available
           break;
@@ -80,7 +100,7 @@ export function RemoveScreen({
 
       return { ...r, disabled, description };
     });
-  }, [agentCount, gatewayCount, mcpToolCount, memoryCount, identityCount]);
+  }, [agentCount, gatewayCount, mcpToolCount, memoryCount, identityCount, evaluatorCount, onlineEvalCount]);
 
   const isDisabled = (item: SelectableItem) => item.disabled ?? false;
 
diff --git a/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx b/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx
index e1e32e05..4d52e68c 100644
--- a/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx
+++ b/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx
@@ -17,6 +17,8 @@ describe('RemoveScreen', () => {
         mcpToolCount={1}
         memoryCount={1}
         identityCount={1}
+        evaluatorCount={1}
+        onlineEvalCount={1}
       />
     );
 
@@ -39,6 +41,8 @@ describe('RemoveScreen', () => {
         mcpToolCount={0}
         memoryCount={0}
         identityCount={0}
+        evaluatorCount={0}
+        onlineEvalCount={0}
       />
     );
 
diff --git a/src/cli/tui/screens/remove/index.ts b/src/cli/tui/screens/remove/index.ts
index 71d78c30..4a470fff 100644
--- a/src/cli/tui/screens/remove/index.ts
+++ b/src/cli/tui/screens/remove/index.ts
@@ -1,10 +1,12 @@
 export { RemoveAgentScreen } from './RemoveAgentScreen';
 export { RemoveAllScreen } from './RemoveAllScreen';
 export { RemoveConfirmScreen } from './RemoveConfirmScreen';
+export { RemoveEvaluatorScreen } from './RemoveEvaluatorScreen';
 export { RemoveFlow } from './RemoveFlow';
 export { RemoveGatewayScreen } from './RemoveGatewayScreen';
 export { RemoveIdentityScreen } from './RemoveIdentityScreen';
 export { RemoveGatewayTargetScreen } from './RemoveGatewayTargetScreen';
 export { RemoveMemoryScreen } from './RemoveMemoryScreen';
+export { RemoveOnlineEvalScreen } from './RemoveOnlineEvalScreen';
 export { RemoveScreen, type RemoveResourceType } from './RemoveScreen';
 export { RemoveSuccessScreen } from './RemoveSuccessScreen';
diff --git a/src/cli/tui/screens/remove/useRemoveFlow.ts b/src/cli/tui/screens/remove/useRemoveFlow.ts
index 2c8fea13..114fab96 100644
--- a/src/cli/tui/screens/remove/useRemoveFlow.ts
+++ b/src/cli/tui/screens/remove/useRemoveFlow.ts
@@ -34,6 +34,8 @@ function createDefaultProjectSpec(projectName: string): AgentCoreProjectSpec {
     agents: [],
     memories: [],
     credentials: [],
+    evaluators: [],
+    onlineEvalConfigs: [],
   };
 }
 
diff --git a/src/cli/tui/screens/run-eval/RunEvalFlow.tsx b/src/cli/tui/screens/run-eval/RunEvalFlow.tsx
new file mode 100644
index 00000000..6d4c4d57
--- /dev/null
+++ b/src/cli/tui/screens/run-eval/RunEvalFlow.tsx
@@ -0,0 +1,294 @@
+import { validateAwsCredentials } from '../../../aws/account';
+import { listEvaluators } from '../../../aws/agentcore-control';
+import { detectRegion } from '../../../aws/region';
+import { getErrorMessage } from '../../../errors';
+import { handleRunEval } from '../../../operations/eval';
+import type { RunEvalResult } from '../../../operations/eval/run-eval';
+import type { EvalRunResult } from '../../../operations/eval/types';
+import { loadDeployedProjectConfig } from '../../../operations/resolve-agent';
+import { ErrorPrompt, GradientText, Panel, Screen } from '../../components';
+import { HELP_TEXT } from '../../constants';
+import { useListNavigation } from '../../hooks';
+import { STATUS_COLORS } from '../../theme';
+import type { EvaluatorItem } from '../online-eval/types';
+import { RunEvalScreen } from './RunEvalScreen';
+import type { AgentItem, RunEvalConfig, RunEvalFlowData } from './types';
+import { Box, Text } from 'ink';
+import React, { useCallback, useEffect, useState } from 'react';
+
+type FlowState =
+  | { name: 'loading' }
+  | { name: 'wizard'; data: RunEvalFlowData }
+  | { name: 'running'; config: RunEvalConfig }
+  | { name: 'results'; result: RunEvalResult; run: EvalRunResult }
+  | { name: 'creds-error'; message: string }
+  | { name: 'error'; message: string };
+
+interface RunEvalFlowProps {
+  onExit: () => void;
+  onViewRuns?: () => void;
+}
+
+function scoreColor(score: number): string {
+  if (score >= 0.8) return 'green';
+  if (score >= 0.5) return 'yellow';
+  return 'red';
+}
+
+function shortEvalName(name: string): string {
+  return name.replace(/^Builtin\./, '');
+}
+
+export function RunEvalFlow({ onExit, onViewRuns }: RunEvalFlowProps) {
+  const [flow, setFlow] = useState<FlowState>({ name: 'loading' });
+
+  useEffect(() => {
+    if (flow.name !== 'loading') return;
+    let cancelled = false;
+
+    void (async () => {
+      try {
+        await validateAwsCredentials();
+      } catch (err) {
+        if (!cancelled) setFlow({ name: 'creds-error', message: getErrorMessage(err) });
+        return;
+      }
+
+      try {
+        const { region } = await detectRegion();
+        const [evalResult, context] = await Promise.all([listEvaluators({ region }), loadDeployedProjectConfig()]);
+
+        if (cancelled) return;
+
+        const evaluators: EvaluatorItem[] = evalResult.evaluators.map(e => ({
+          arn: e.evaluatorArn,
+          name: e.evaluatorName,
+          type: e.evaluatorType,
+          description: e.description,
+        }));
+
+        // Cross-reference project agents with deployed state to only show deployed agents
+        const deployedAgentNames = new Set<string>();
+        for (const target of Object.values(context.deployedState.targets)) {
+          const agentStates = target.resources?.agents;
+          if (agentStates) {
+            for (const name of Object.keys(agentStates)) {
+              deployedAgentNames.add(name);
+            }
+          }
+        }
+
+        const agents: AgentItem[] = context.project.agents
+          .filter(a => deployedAgentNames.has(a.name))
+          .map(a => ({
+            name: a.name,
+            build: a.build,
+          }));
+
+        if (agents.length === 0) {
+          if (!cancelled) {
+            setFlow({
+              name: 'error',
+              message:
+                context.project.agents.length === 0
+                  ? 'No agents found in project. Run `agentcore add agent` first.'
+                  : 'No deployed agents found. Run `agentcore deploy` first.',
+            });
+          }
+          return;
+        }
+
+        if (evaluators.length === 0) {
+          if (!cancelled) {
+            setFlow({
+              name: 'error',
+              message: 'No evaluators found in your account. Create an evaluator first.',
+            });
+          }
+          return;
+        }
+
+        setFlow({ name: 'wizard', data: { agents, evaluators } });
+      } catch (err) {
+        if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) });
+      }
+    })();
+
+    return () => {
+      cancelled = true;
+    };
+  }, [flow.name]);
+
+  const handleRunComplete = useCallback((config: RunEvalConfig) => {
+    setFlow({ name: 'running', config });
+  }, []);
+
+  // Execute the eval when we enter 'running' state
+  useEffect(() => {
+    if (flow.name !== 'running') return;
+    let cancelled = false;
+
+    const { config } = flow;
+
+    void (async () => {
+      try {
+        const result = await handleRunEval({
+          agent: config.agent,
+          evaluator: [],
+          evaluatorArn: config.evaluators,
+          days: config.days,
+        });
+
+        if (cancelled) return;
+
+        if (!result.success || !result.run) {
+          setFlow({ name: 'error', message: result.error ?? 'Evaluation failed' });
+          return;
+        }
+
+        setFlow({ name: 'results', result, run: result.run });
+      } catch (err) {
+        if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) });
+      }
+    })();
+
+    return () => {
+      cancelled = true;
+    };
+  }, [flow.name]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  if (flow.name === 'loading') {
+    return (
+      <Screen title="Run On-demand Evaluation" onExit={onExit}>
+        <GradientText text="Loading agents and evaluators..." />
+      </Screen>
+    );
+  }
+
+  if (flow.name === 'creds-error') {
+    return <ErrorPrompt message="AWS credentials required" detail={flow.message} onBack={onExit} onExit={onExit} />;
+  }
+
+  if (flow.name === 'wizard') {
+    return (
+      <RunEvalScreen
+        agents={flow.data.agents}
+        evaluatorItems={flow.data.evaluators}
+        onComplete={handleRunComplete}
+        onExit={onExit}
+      />
+    );
+  }
+
+  if (flow.name === 'running') {
+    return (
+      <Screen title="Run On-demand Evaluation" onExit={onExit}>
+        <GradientText text="Running evaluation... this may take a few minutes" />
+      </Screen>
+    );
+  }
+
+  if (flow.name === 'results') {
+    return (
+      <ResultsView
+        run={flow.run}
+        filePath={flow.result.filePath}
+        onRunAnother={() => setFlow({ name: 'loading' })}
+        onViewRuns={onViewRuns}
+        onExit={onExit}
+      />
+    );
+  }
+
+  return (
+    <ErrorPrompt
+      message="Evaluation failed"
+      detail={flow.message}
+      onBack={() => setFlow({ name: 'loading' })}
+      onExit={onExit}
+    />
+  );
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Results view
+// ─────────────────────────────────────────────────────────────────────────────
+
+interface ResultsViewProps {
+  run: EvalRunResult;
+  filePath?: string;
+  onRunAnother: () => void;
+  onViewRuns?: () => void;
+  onExit: () => void;
+}
+
+function ResultsView({ run, filePath, onRunAnother, onViewRuns, onExit }: ResultsViewProps) {
+  const actions = [
+    { id: 'another', title: 'Run another evaluation' },
+    ...(onViewRuns ? [{ id: 'view-runs', title: 'View eval runs' }] : []),
+    { id: 'back', title: 'Back' },
+  ];
+
+  const nav = useListNavigation({
+    items: actions,
+    onSelect: item => {
+      if (item.id === 'another') onRunAnother();
+      else if (item.id === 'view-runs') onViewRuns?.();
+      else onExit();
+    },
+    onExit,
+    isActive: true,
+  });
+
+  return (
+    <Screen title="Evaluation Complete" onExit={onExit} helpText={HELP_TEXT.NAVIGATE_SELECT} exitEnabled={false}>
+      <Panel fullWidth>
+        <Box flexDirection="column">
+          <Text color="green">✓ Evaluation complete</Text>
+          <Text>
+            <Text bold>Agent:</Text> {run.agent}
+            {'  '}
+            <Text bold>Sessions:</Text> {run.sessionCount}
+            {'  '}
+            <Text bold>Lookback:</Text> {run.lookbackDays}d
+          </Text>
+
+          <Box marginTop={1} flexDirection="column">
+            {run.results.map((r, i) => {
+              const errCount = r.sessionScores.filter(s => s.errorMessage).length;
+              return (
+                <Text key={i}>
+                  {'  '}
+                  <Text bold>{shortEvalName(r.evaluator)}</Text>
+                  {'  '}
+                  <Text color={scoreColor(r.aggregateScore)}>{r.aggregateScore.toFixed(2)}</Text>
+                  {errCount > 0 && <Text color={STATUS_COLORS.error}> ({errCount} errors)</Text>}
+                </Text>
+              );
+            })}
+          </Box>
+
+          {filePath && (
+            <Box marginTop={1}>
+              <Text dimColor>Results saved to: {filePath}</Text>
+            </Box>
+          )}
+
+          <Box marginTop={1} flexDirection="column">
+            {actions.map((action, idx) => {
+              const selected = idx === nav.selectedIndex;
+              return (
+                <Text key={action.id}>
+                  <Text color={selected ? 'cyan' : undefined}>{selected ? '❯' : ' '} </Text>
+                  <Text color={selected ? 'cyan' : undefined} bold={selected}>
+                    {action.title}
+                  </Text>
+                </Text>
+              );
+            })}
+          </Box>
+        </Box>
+      </Panel>
+    </Screen>
+  );
+}
diff --git a/src/cli/tui/screens/run-eval/RunEvalScreen.tsx b/src/cli/tui/screens/run-eval/RunEvalScreen.tsx
new file mode 100644
index 00000000..fde8cb55
--- /dev/null
+++ b/src/cli/tui/screens/run-eval/RunEvalScreen.tsx
@@ -0,0 +1,142 @@
+import type { SelectableItem } from '../../components';
+import {
+  ConfirmReview,
+  Panel,
+  Screen,
+  StepIndicator,
+  TextInput,
+  WizardMultiSelect,
+  WizardSelect,
+} from '../../components';
+import { HELP_TEXT } from '../../constants';
+import { useListNavigation, useMultiSelectNavigation } from '../../hooks';
+import type { EvaluatorItem } from '../online-eval/types';
+import type { AgentItem, RunEvalConfig } from './types';
+import { DEFAULT_LOOKBACK_DAYS, RUN_EVAL_STEP_LABELS } from './types';
+import { useRunEvalWizard } from './useRunEvalWizard';
+import React, { useMemo } from 'react';
+
+interface RunEvalScreenProps {
+  agents: AgentItem[];
+  evaluatorItems: EvaluatorItem[];
+  onComplete: (config: RunEvalConfig) => void;
+  onExit: () => void;
+}
+
+export function RunEvalScreen({ agents, evaluatorItems: rawEvaluatorItems, onComplete, onExit }: RunEvalScreenProps) {
+  const wizard = useRunEvalWizard(agents.length);
+
+  // Auto-select agent if only one
+  const singleAgent = agents.length === 1 ? agents[0]!.name : null;
+  if (singleAgent && !wizard.config.agent) {
+    wizard.setAgent(singleAgent);
+  }
+
+  const agentItems: SelectableItem[] = useMemo(
+    () => agents.map(a => ({ id: a.name, title: a.name, description: a.build })),
+    [agents]
+  );
+
+  const evaluatorItems: SelectableItem[] = useMemo(
+    () =>
+      rawEvaluatorItems.map(e => ({
+        id: e.arn,
+        title: e.name,
+        description: e.type === 'Builtin' ? 'Built-in evaluator' : (e.description ?? 'Custom evaluator'),
+      })),
+    [rawEvaluatorItems]
+  );
+
+  const isAgentStep = wizard.step === 'agent';
+  const isEvaluatorsStep = wizard.step === 'evaluators';
+  const isDaysStep = wizard.step === 'days';
+  const isConfirmStep = wizard.step === 'confirm';
+
+  const agentNav = useListNavigation({
+    items: agentItems,
+    onSelect: item => wizard.setAgent(item.id),
+    onExit,
+    isActive: isAgentStep,
+  });
+
+  const evaluatorsNav = useMultiSelectNavigation({
+    items: evaluatorItems,
+    getId: item => item.id,
+    onConfirm: ids => wizard.setEvaluators(ids),
+    onExit: () => (agents.length <= 1 ? onExit() : wizard.goBack()),
+    isActive: isEvaluatorsStep,
+    requireSelection: true,
+  });
+
+  useListNavigation({
+    items: [{ id: 'confirm', title: 'Confirm' }],
+    onSelect: () => onComplete(wizard.config),
+    onExit: () => wizard.goBack(),
+    isActive: isConfirmStep,
+  });
+
+  const helpText = isAgentStep
+    ? HELP_TEXT.NAVIGATE_SELECT
+    : isEvaluatorsStep
+      ? 'Space toggle · Enter confirm · Esc back'
+      : isConfirmStep
+        ? HELP_TEXT.CONFIRM_CANCEL
+        : HELP_TEXT.TEXT_INPUT;
+
+  const headerContent = <StepIndicator steps={wizard.steps} currentStep={wizard.step} labels={RUN_EVAL_STEP_LABELS} />;
+
+  return (
+    <Screen title="Run On-demand Evaluation" onExit={onExit} helpText={helpText} headerContent={headerContent}>
+      <Panel>
+        {isAgentStep && (
+          <WizardSelect
+            title="Select agent to evaluate"
+            description="Choose a project agent"
+            items={agentItems}
+            selectedIndex={agentNav.selectedIndex}
+          />
+        )}
+
+        {isEvaluatorsStep && (
+          <WizardMultiSelect
+            title="Select evaluators"
+            description="Choose evaluators to run against agent traces"
+            items={evaluatorItems}
+            cursorIndex={evaluatorsNav.cursorIndex}
+            selectedIds={evaluatorsNav.selectedIds}
+          />
+        )}
+
+        {isDaysStep && (
+          <TextInput
+            key="days"
+            prompt="Lookback window (days)"
+            initialValue={String(DEFAULT_LOOKBACK_DAYS)}
+            onSubmit={value => {
+              const days = parseInt(value, 10);
+              if (isNaN(days) || days < 1 || days > 90) return;
+              wizard.setDays(days);
+            }}
+            onCancel={() => wizard.goBack()}
+            customValidation={value => {
+              const days = parseInt(value, 10);
+              if (isNaN(days)) return 'Must be a number';
+              if (days < 1 || days > 90) return 'Must be between 1 and 90';
+              return true;
+            }}
+          />
+        )}
+
+        {isConfirmStep && (
+          <ConfirmReview
+            fields={[
+              { label: 'Agent', value: wizard.config.agent },
+              { label: 'Evaluators', value: wizard.config.evaluators.join(', ') },
+              { label: 'Lookback', value: `${wizard.config.days} day${wizard.config.days !== 1 ? 's' : ''}` },
+            ]}
+          />
+        )}
+      </Panel>
+    </Screen>
+  );
+}
diff --git a/src/cli/tui/screens/run-eval/RunScreen.tsx b/src/cli/tui/screens/run-eval/RunScreen.tsx
new file mode 100644
index 00000000..63637bdb
--- /dev/null
+++ b/src/cli/tui/screens/run-eval/RunScreen.tsx
@@ -0,0 +1,32 @@
+import { Screen, WizardSelect } from '../../components';
+import type { SelectableItem } from '../../components';
+import { HELP_TEXT } from '../../constants';
+import { useListNavigation } from '../../hooks';
+import React, { useMemo } from 'react';
+
+interface RunScreenProps {
+  onRunEval: () => void;
+  onExit: () => void;
+}
+
+export function RunScreen({ onRunEval, onExit }: RunScreenProps) {
+  const items: SelectableItem[] = useMemo(
+    () => [
+      { id: 'run-eval', title: 'On-demand Evaluation', description: 'Evaluate agent traces with selected evaluators' },
+    ],
+    []
+  );
+
+  const nav = useListNavigation({
+    items,
+    onSelect: () => onRunEval(),
+    onExit,
+    isActive: true,
+  });
+
+  return (
+    <Screen title="Run" onExit={onExit} helpText={HELP_TEXT.NAVIGATE_SELECT} exitEnabled={false}>
+      <WizardSelect title="Choose an operation" items={items} selectedIndex={nav.selectedIndex} />
+    </Screen>
+  );
+}
diff --git a/src/cli/tui/screens/run-eval/index.ts b/src/cli/tui/screens/run-eval/index.ts
new file mode 100644
index 00000000..d76e0e08
--- /dev/null
+++ b/src/cli/tui/screens/run-eval/index.ts
@@ -0,0 +1,3 @@
+export { RunEvalFlow } from './RunEvalFlow';
+export { RunEvalScreen } from './RunEvalScreen';
+export { RunScreen } from './RunScreen';
diff --git a/src/cli/tui/screens/run-eval/types.ts b/src/cli/tui/screens/run-eval/types.ts
new file mode 100644
index 00000000..24c134b6
--- /dev/null
+++ b/src/cli/tui/screens/run-eval/types.ts
@@ -0,0 +1,28 @@
+import type { EvaluatorItem } from '../online-eval/types';
+
+export type RunEvalStep = 'agent' | 'evaluators' | 'days' | 'confirm';
+
+export interface RunEvalConfig {
+  agent: string;
+  evaluators: string[];
+  days: number;
+}
+
+export const RUN_EVAL_STEP_LABELS: Record<RunEvalStep, string> = {
+  agent: 'Agent',
+  evaluators: 'Evaluators',
+  days: 'Lookback',
+  confirm: 'Confirm',
+};
+
+export const DEFAULT_LOOKBACK_DAYS = 7;
+
+export interface AgentItem {
+  name: string;
+  build: string;
+}
+
+export interface RunEvalFlowData {
+  agents: AgentItem[];
+  evaluators: EvaluatorItem[];
+}
diff --git a/src/cli/tui/screens/run-eval/useRunEvalWizard.ts b/src/cli/tui/screens/run-eval/useRunEvalWizard.ts
new file mode 100644
index 00000000..f842cc0c
--- /dev/null
+++ b/src/cli/tui/screens/run-eval/useRunEvalWizard.ts
@@ -0,0 +1,83 @@
+import type { RunEvalConfig, RunEvalStep } from './types';
+import { DEFAULT_LOOKBACK_DAYS } from './types';
+import { useCallback, useState } from 'react';
+
+function getAllSteps(agentCount: number): RunEvalStep[] {
+  if (agentCount <= 1) {
+    return ['evaluators', 'days', 'confirm'];
+  }
+  return ['agent', 'evaluators', 'days', 'confirm'];
+}
+
+function getDefaultConfig(): RunEvalConfig {
+  return {
+    agent: '',
+    evaluators: [],
+    days: DEFAULT_LOOKBACK_DAYS,
+  };
+}
+
+export function useRunEvalWizard(agentCount: number) {
+  const allSteps = getAllSteps(agentCount);
+  const [config, setConfig] = useState<RunEvalConfig>(getDefaultConfig);
+  const [step, setStep] = useState<RunEvalStep>(allSteps[0]!);
+
+  const currentIndex = allSteps.indexOf(step);
+
+  const goBack = useCallback(() => {
+    const prevStep = allSteps[currentIndex - 1];
+    if (prevStep) setStep(prevStep);
+  }, [allSteps, currentIndex, setStep]);
+
+  const nextStep = useCallback(
+    (currentStep: RunEvalStep): RunEvalStep | undefined => {
+      const idx = allSteps.indexOf(currentStep);
+      return allSteps[idx + 1];
+    },
+    [allSteps]
+  );
+
+  const setAgent = useCallback(
+    (agent: string) => {
+      setConfig(c => ({ ...c, agent }));
+      const next = nextStep('agent');
+      if (next) setStep(next);
+    },
+    [nextStep, setConfig, setStep]
+  );
+
+  const setEvaluators = useCallback(
+    (evaluators: string[]) => {
+      setConfig(c => ({ ...c, evaluators }));
+      const next = nextStep('evaluators');
+      if (next) setStep(next);
+    },
+    [nextStep, setConfig, setStep]
+  );
+
+  const setDays = useCallback(
+    (days: number) => {
+      setConfig(c => ({ ...c, days }));
+      const next = nextStep('days');
+      if (next) setStep(next);
+    },
+    [nextStep, setConfig, setStep]
+  );
+
+  const reset = useCallback(() => {
+    setConfig(getDefaultConfig());
+    setStep(allSteps[0]!);
+  }, [allSteps, setConfig, setStep]);
+
+  return {
+    config,
+    step,
+    steps: allSteps,
+    currentIndex,
+    goBack,
+    setAgent,
+    setEvaluators,
+    setDays,
+    reset,
+  };
+}
diff --git a/src/cli/tui/utils/commands.ts b/src/cli/tui/utils/commands.ts
index 918d5afb..7e6a2784 100644
--- a/src/cli/tui/utils/commands.ts
+++ b/src/cli/tui/utils/commands.ts
@@ -11,7 +11,7 @@ export interface CommandMeta {
 /**
  * Commands hidden from TUI help but still available via CLI.
  */
-const HIDDEN_FROM_TUI = ['help', 'update', 'package', 'logs', 'traces'] as const;
+const HIDDEN_FROM_TUI = ['help', 'update', 'package', 'logs', 'traces', 'pause', 'resume', 'stop'] as const;
 
 /**
  * Commands hidden from TUI when inside an existing project.
diff --git a/src/schema/schemas/agentcore-project.ts b/src/schema/schemas/agentcore-project.ts
index fda34160..de6f137a 100644
--- a/src/schema/schemas/agentcore-project.ts
+++ b/src/schema/schemas/agentcore-project.ts
@@ -8,13 +8,20 @@
  */
 import { isReservedProjectName } from '../constants';
 import { AgentEnvSpecSchema } from './agent-env';
+import { EvaluationLevelSchema, EvaluatorConfigSchema, EvaluatorNameSchema } from './primitives/evaluator';
 import { DEFAULT_STRATEGY_NAMESPACES, MemoryStrategySchema, MemoryStrategyTypeSchema } from './primitives/memory';
+import { OnlineEvalConfigSchema } from './primitives/online-eval-config';
 import { uniqueBy } from './zod-util';
 import { z } from 'zod';
 
 // Re-export for convenience
 export { DEFAULT_STRATEGY_NAMESPACES, MemoryStrategySchema, MemoryStrategyTypeSchema };
+export { EvaluationLevelSchema };
 export type { MemoryStrategy, MemoryStrategyType } from './primitives/memory';
+export type { OnlineEvalConfig } from './primitives/online-eval-config';
+export { OnlineEvalConfigSchema, OnlineEvalConfigNameSchema } from './primitives/online-eval-config';
+export type { EvaluationLevel, EvaluatorConfig, LlmAsAJudgeConfig, RatingScale } from './primitives/evaluator';
+export { BedrockModelIdSchema, isValidBedrockModelId, EvaluatorNameSchema } from './primitives/evaluator';
 
 // ============================================================================
 // Project Name Schema
@@ -112,42 +119,109 @@ export const CredentialSchema = z.discriminatedUnion('type', [ApiKeyCredentialSc
 export type Credential = z.infer<typeof CredentialSchema>;
 
 // ============================================================================
-// Project Schema (Top Level)
+// Evaluator Schema
 // ============================================================================
 
-export const AgentCoreProjectSpecSchema = z.object({
-  name: ProjectNameSchema,
-  version: z.number().int(),
+export const EvaluatorTypeSchema = z.literal('CustomEvaluator');
+export type EvaluatorType = z.infer<typeof EvaluatorTypeSchema>;
 
-  agents: z
-    .array(AgentEnvSpecSchema)
-    .default([])
-    .superRefine(
-      uniqueBy(
-        agent => agent.name,
-        name => `Duplicate agent name: ${name}`
-      )
-    ),
+export const EvaluatorSchema = z.object({
+  type: EvaluatorTypeSchema,
+  name: EvaluatorNameSchema,
+  level: EvaluationLevelSchema,
+  description: z.string().optional(),
+  config: EvaluatorConfigSchema,
+});
 
-  memories: z
-    .array(MemorySchema)
-    .default([])
-    .superRefine(
-      uniqueBy(
-        memory => memory.name,
-        name => `Duplicate memory name: ${name}`
-      )
-    ),
+export type Evaluator = z.infer<typeof EvaluatorSchema>;
 
-  credentials: z
-    .array(CredentialSchema)
-    .default([])
-    .superRefine(
-      uniqueBy(
-        credential => credential.name,
-        name => `Duplicate credential name: ${name}`
-      )
-    ),
-});
+// ============================================================================
+// Project Schema (Top Level)
+// ============================================================================
+
+const BUILTIN_EVALUATOR_PREFIX = 'Builtin.';
+const ARN_PREFIX = 'arn:';
+
+export const AgentCoreProjectSpecSchema = z
+  .object({
+    name: ProjectNameSchema,
+    version: z.number().int(),
+
+    agents: z
+      .array(AgentEnvSpecSchema)
+      .default([])
+      .superRefine(
+        uniqueBy(
+          agent => agent.name,
+          name => `Duplicate agent name: ${name}`
+        )
+      ),
+
+    memories: z
+      .array(MemorySchema)
+      .default([])
+      .superRefine(
+        uniqueBy(
+          memory => memory.name,
+          name => `Duplicate memory name: ${name}`
+        )
+      ),
+
+    credentials: z
+      .array(CredentialSchema)
+      .default([])
+      .superRefine(
+        uniqueBy(
+          credential => credential.name,
+          name => `Duplicate credential name: ${name}`
+        )
+      ),
+
+    evaluators: z
+      .array(EvaluatorSchema)
+      .default([])
+      .superRefine(
+        uniqueBy(
+          evaluator => evaluator.name,
+          name => `Duplicate evaluator name: ${name}`
+        )
+      ),
+
+    onlineEvalConfigs: z
+      .array(OnlineEvalConfigSchema)
+      .default([])
+      .superRefine(
+        uniqueBy(
+          config => config.name,
+          name => `Duplicate online eval config name: ${name}`
+        )
+      ),
+  })
+  .superRefine((spec, ctx) => {
+    const agentNames = new Set(spec.agents.map(a => a.name));
+    const evaluatorNames = new Set(spec.evaluators.map(e => e.name));
+
+    for (const config of spec.onlineEvalConfigs) {
+      // Validate agent reference
+      if (!agentNames.has(config.agent)) {
+        ctx.addIssue({
+          code: z.ZodIssueCode.custom,
+          message: `Online eval config "${config.name}" references unknown agent "${config.agent}"`,
+        });
+      }
+
+      // Validate evaluator references
+      for (const evalName of config.evaluators) {
+        // Skip built-in evaluators and ARN references (externally managed)
+        if (evalName.startsWith(BUILTIN_EVALUATOR_PREFIX) || evalName.startsWith(ARN_PREFIX)) continue;
+        if (!evaluatorNames.has(evalName)) {
+          ctx.addIssue({
+            code: z.ZodIssueCode.custom,
+            message: `Online eval config "${config.name}" references unknown evaluator "${evalName}"`,
+          });
+        }
+      }
+    }
+  });
 
 export type AgentCoreProjectSpec = z.infer<typeof AgentCoreProjectSpecSchema>;
diff --git a/src/schema/schemas/deployed-state.ts b/src/schema/schemas/deployed-state.ts
index 9741e69d..d8a57185 100644
--- a/src/schema/schemas/deployed-state.ts
+++ b/src/schema/schemas/deployed-state.ts
@@ -119,6 +119,29 @@ export const CredentialDeployedStateSchema = z.object({
 
 export type CredentialDeployedState = z.infer<typeof CredentialDeployedStateSchema>;
 
+// ============================================================================
+// Evaluator Deployed State
+// ============================================================================
+
+export const EvaluatorDeployedStateSchema = z.object({
+  evaluatorId: z.string().min(1),
+  evaluatorArn: z.string().min(1),
+});
+
+export type EvaluatorDeployedState = z.infer<typeof EvaluatorDeployedStateSchema>;
+
+// ============================================================================
+// Online Eval Config Deployed State
+// ============================================================================
+
+export const OnlineEvalDeployedStateSchema = z.object({
+  onlineEvaluationConfigId: z.string().min(1),
+  onlineEvaluationConfigArn: z.string().min(1),
+  executionStatus: z.enum(['ENABLED', 'DISABLED']).optional(),
+});
+
+export type OnlineEvalDeployedState = z.infer<typeof OnlineEvalDeployedStateSchema>;
+
 // ============================================================================
 // Deployed Resource State
 // ============================================================================
@@ -129,6 +152,8 @@ export const DeployedResourceStateSchema = z.object({
   mcp: McpDeployedStateSchema.optional(),
   externallyManaged: ExternallyManagedStateSchema.optional(),
   credentials: z.record(z.string(), CredentialDeployedStateSchema).optional(),
+  evaluators: z.record(z.string(), EvaluatorDeployedStateSchema).optional(),
+  onlineEvalConfigs: z.record(z.string(), OnlineEvalDeployedStateSchema).optional(),
   stackName: z.string().optional(),
   identityKmsKeyArn: z.string().optional(),
 });
diff --git a/src/schema/schemas/primitives/__tests__/evaluator.test.ts b/src/schema/schemas/primitives/__tests__/evaluator.test.ts
new file mode 100644
index 00000000..9147c5cf
--- /dev/null
+++ b/src/schema/schemas/primitives/__tests__/evaluator.test.ts
@@ -0,0 +1,158 @@
+import {
+  CategoricalRatingSchema,
+  EvaluationLevelSchema,
+  EvaluatorConfigSchema,
+  EvaluatorNameSchema,
+  NumericalRatingSchema,
+  RatingScaleSchema,
+} from '../evaluator';
+import { describe, expect, it } from 'vitest';
+
+describe('EvaluationLevelSchema', () => {
+  it.each(['SESSION', 'TRACE', 'TOOL_CALL'])('accepts %s', level => {
+    expect(EvaluationLevelSchema.safeParse(level).success).toBe(true);
+  });
+
+  it.each(['session', 'INVALID', '', 'SPAN'])('rejects %s', level => {
+    expect(EvaluationLevelSchema.safeParse(level).success).toBe(false);
+  });
+});
+
+describe('EvaluatorNameSchema', () => {
+  it('accepts valid names', () => {
+    expect(EvaluatorNameSchema.safeParse('MyEval').success).toBe(true);
+    expect(EvaluatorNameSchema.safeParse('eval_1').success).toBe(true);
+    expect(EvaluatorNameSchema.safeParse('A').success).toBe(true);
+  });
+
+  it('rejects empty string', () => {
+    expect(EvaluatorNameSchema.safeParse('').success).toBe(false);
+  });
+
+  it('rejects names starting with a number', () => {
+    expect(EvaluatorNameSchema.safeParse('1eval').success).toBe(false);
+  });
+
+  it('rejects names starting with underscore', () => {
+    expect(EvaluatorNameSchema.safeParse('_eval').success).toBe(false);
+  });
+
+  it('rejects names with special characters', () => {
+    expect(EvaluatorNameSchema.safeParse('my-eval').success).toBe(false);
+    expect(EvaluatorNameSchema.safeParse('my eval').success).toBe(false);
+    expect(EvaluatorNameSchema.safeParse('my.eval').success).toBe(false);
+  });
+
+  it('rejects names longer than 48 characters', () => {
+    const longName = 'A' + 'a'.repeat(48);
+    expect(longName.length).toBe(49);
+    expect(EvaluatorNameSchema.safeParse(longName).success).toBe(false);
+  });
+
+  it('accepts names exactly 48 characters', () => {
+    const name = 'A' + 'a'.repeat(47);
+    expect(name.length).toBe(48);
+    expect(EvaluatorNameSchema.safeParse(name).success).toBe(true);
+  });
+});
+
+describe('NumericalRatingSchema', () => {
+  it('accepts valid numerical rating', () => {
+    const result = NumericalRatingSchema.safeParse({ value: 1, label: 'Poor', definition: 'Fails expectations' });
+    expect(result.success).toBe(true);
+  });
+
+  it('rejects non-integer value', () => {
+    const result = NumericalRatingSchema.safeParse({ value: 1.5, label: 'Ok', definition: 'Decent' });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects empty label', () => {
+    const result = NumericalRatingSchema.safeParse({ value: 1, label: '', definition: 'Test' });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects empty definition', () => {
+    const result = NumericalRatingSchema.safeParse({ value: 1, label: 'Test', definition: '' });
+    expect(result.success).toBe(false);
+  });
+});
+
+describe('CategoricalRatingSchema', () => {
+  it('accepts valid categorical rating', () => {
+    const result = CategoricalRatingSchema.safeParse({ label: 'Pass', definition: 'Meets criteria' });
+    expect(result.success).toBe(true);
+  });
+
+  it('rejects empty label', () => {
+    expect(CategoricalRatingSchema.safeParse({ label: '', definition: 'Test' }).success).toBe(false);
+  });
+});
+
+describe('RatingScaleSchema', () => {
+  it('accepts numerical-only scale', () => {
+    const result = RatingScaleSchema.safeParse({
+      numerical: [
+        { value: 1, label: 'Bad', definition: 'Poor' },
+        { value: 2, label: 'Good', definition: 'Nice' },
+      ],
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('accepts categorical-only scale', () => {
+    const result = RatingScaleSchema.safeParse({
+      categorical: [
+        { label: 'Pass', definition: 'Good' },
+        { label: 'Fail', definition: 'Bad' },
+      ],
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('rejects scale with both numerical and categorical', () => {
+    const result = RatingScaleSchema.safeParse({
+      numerical: [{ value: 1, label: 'Bad', definition: 'Poor' }],
+      categorical: [{ label: 'Pass', definition: 'Good' }],
+    });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects scale with neither numerical nor categorical', () => {
+    const result = RatingScaleSchema.safeParse({});
+    expect(result.success).toBe(false);
+  });
+});
+
+describe('EvaluatorConfigSchema', () => {
+  const validConfig = {
+    llmAsAJudge: {
+      model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
+      instructions: 'Evaluate the quality. Context: {context}',
+      ratingScale: {
+        numerical: [
+          { value: 1, label: 'Poor', definition: 'Fails' },
+          { value: 5, label: 'Excellent', definition: 'Perfect' },
+        ],
+      },
+    },
+  };
+
+  it('accepts valid evaluator config', () => {
+    expect(EvaluatorConfigSchema.safeParse(validConfig).success).toBe(true);
+  });
+
+  it('rejects missing model', () => {
+    const config = { llmAsAJudge: { ...validConfig.llmAsAJudge, model: '' } };
+    expect(EvaluatorConfigSchema.safeParse(config).success).toBe(false);
+  });
+
+  it('rejects missing instructions', () => {
+    const config = { llmAsAJudge: { ...validConfig.llmAsAJudge, instructions: '' } };
+    expect(EvaluatorConfigSchema.safeParse(config).success).toBe(false);
+  });
+
+  it('rejects missing llmAsAJudge key', () => {
+    expect(EvaluatorConfigSchema.safeParse({}).success).toBe(false);
+  });
+});
diff --git a/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts b/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts
new file mode 100644
index 00000000..1234bd4b
--- /dev/null
+++ b/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts
@@ -0,0 +1,107 @@
+import { OnlineEvalConfigNameSchema, OnlineEvalConfigSchema } from '../online-eval-config';
+import { describe, expect, it } from 'vitest';
+
+describe('OnlineEvalConfigNameSchema', () => {
+  it('accepts valid names', () => {
+    expect(OnlineEvalConfigNameSchema.safeParse('MyConfig').success).toBe(true);
+    expect(OnlineEvalConfigNameSchema.safeParse('config_1').success).toBe(true);
+  });
+
+  it('rejects empty string', () => {
+    expect(OnlineEvalConfigNameSchema.safeParse('').success).toBe(false);
+  });
+
+  it('rejects names starting with a number', () => {
+    expect(OnlineEvalConfigNameSchema.safeParse('1config').success).toBe(false);
+  });
+
+  it('rejects names with hyphens', () => {
+    expect(OnlineEvalConfigNameSchema.safeParse('my-config').success).toBe(false);
+  });
+
+  it('rejects names longer than 48 characters', () => {
+    const longName = 'A' + 'a'.repeat(48);
+    expect(OnlineEvalConfigNameSchema.safeParse(longName).success).toBe(false);
+  });
+});
+
+describe('OnlineEvalConfigSchema', () => {
+  const validConfig = {
+    type: 'OnlineEvaluationConfig' as const,
+    name: 'TestConfig',
+    agent: 'MyAgent',
+    evaluators: ['Builtin.GoalSuccessRate'],
+    samplingRate: 10,
+  };
+
+  it('accepts valid config', () => {
+    expect(OnlineEvalConfigSchema.safeParse(validConfig).success).toBe(true);
+  });
+
+  it('accepts multiple evaluators', () => {
+    const config = { ...validConfig, evaluators: ['Builtin.X', 'CustomEval'] };
+    expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true);
+  });
+
+  it('accepts evaluator ARNs', () => {
+    const config = {
+      ...validConfig,
+      evaluators: ['arn:aws:bedrock:us-east-1:123456:evaluator/MyEval-abc'],
+    };
+    expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true);
+  });
+
+  it('rejects wrong type literal', () => {
+    const config = { ...validConfig, type: 'WrongType' };
+    expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false);
+  });
+
+  it('rejects empty evaluators array', () => {
+    const config = { ...validConfig, evaluators: [] };
+    expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false);
+  });
+
+  it('rejects sampling rate below 0.01', () => {
+    const config = { ...validConfig, samplingRate: 0.001 };
+    expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false);
+  });
+
+  it('rejects sampling rate above 100', () => {
+    const config = { ...validConfig, samplingRate: 101 };
+    expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false);
+  });
+
+  it('accepts minimum sampling rate of 0.01', () => {
+    const config = { ...validConfig, samplingRate: 0.01 };
+    expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true);
+  });
+
+  it('accepts maximum sampling rate of 100', () => {
+    const config = { ...validConfig, samplingRate: 100 };
+    expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true);
+  });
+
+  it('rejects empty string in evaluators array', () => {
+    const config = { ...validConfig, evaluators: [''] };
+    expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false);
+  });
+
+  it('accepts optional description field', () => {
+    const config = { ...validConfig, description: 'My eval config description' };
+    expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true);
+  });
+
+  it('rejects description longer than 200 characters', () => {
+    const config = { ...validConfig, description: 'x'.repeat(201) };
+    expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false);
+  });
+
+  it('accepts optional enableOnCreate field', () => {
+    const config = { ...validConfig, enableOnCreate: false };
+    expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true);
+  });
+
+  it('accepts config without description and enableOnCreate', () => {
+    expect(OnlineEvalConfigSchema.safeParse(validConfig).success).toBe(true);
+  });
+});
diff --git a/src/schema/schemas/primitives/evaluator.ts b/src/schema/schemas/primitives/evaluator.ts
new file mode 100644
index 00000000..ced23b53
--- /dev/null
+++ b/src/schema/schemas/primitives/evaluator.ts
@@ -0,0 +1,84 @@
+import { z } from 'zod';
+
+// ============================================================================
+// Evaluator Types
+// ============================================================================
+
+export const EvaluationLevelSchema = z.enum(['SESSION', 'TRACE', 'TOOL_CALL']);
+export type EvaluationLevel = z.infer<typeof EvaluationLevelSchema>;
+
+export const EvaluatorNameSchema = z
+  .string()
+  .min(1, 'Name is required')
+  .max(48)
+  .regex(
+    /^[a-zA-Z][a-zA-Z0-9_]{0,47}$/,
+    'Must begin with a letter and contain only alphanumeric characters and underscores (max 48 chars)'
+  );
+
+// ============================================================================
+// Rating Scale
+// ============================================================================
+
+export const NumericalRatingSchema = z.object({
+  value: z.number().int(),
+  label: z.string().min(1),
+  definition: z.string().min(1),
+});
+
+export type NumericalRating = z.infer<typeof NumericalRatingSchema>;
+
+export const CategoricalRatingSchema = z.object({
+  label: z.string().min(1),
+  definition: z.string().min(1),
+});
+
+export type CategoricalRating = z.infer<typeof CategoricalRatingSchema>;
+
+export const RatingScaleSchema = z
+  .object({
+    numerical: z.array(NumericalRatingSchema).optional(),
+    categorical: z.array(CategoricalRatingSchema).optional(),
+  })
+  .refine(
+    scale => {
+      const hasNumerical = Boolean(scale.numerical);
+      const hasCategorical = Boolean(scale.categorical);
+      return hasNumerical !== hasCategorical;
+    },
+    { message: 'Rating scale must have either numerical or categorical, not both' }
+  );
+
+export type RatingScale = z.infer<typeof RatingScaleSchema>;
+
+// ============================================================================
+// LLM-as-a-Judge Config
+// ============================================================================
+
+// eslint-disable-next-line security/detect-unsafe-regex -- anchored pattern, no backtracking risk
+const BEDROCK_MODEL_ID_PATTERN = /^[a-z][a-z0-9-]*\.[a-zA-Z0-9._-]+(:[0-9]+)?$/;
+const BEDROCK_ARN_PATTERN = /^arn:aws[a-z-]*:bedrock:[a-z0-9-]+:\d{12}:(inference-profile|foundation-model)\/.+$/;
+
+export function isValidBedrockModelId(value: string): boolean {
+  return BEDROCK_MODEL_ID_PATTERN.test(value) || BEDROCK_ARN_PATTERN.test(value);
+}
+
+export const BedrockModelIdSchema = z.string().min(1, 'Model ID is required');
+
+export const LlmAsAJudgeConfigSchema = z.object({
+  model: BedrockModelIdSchema,
+  instructions: z.string().min(1, 'Evaluation instructions are required'),
+  ratingScale: RatingScaleSchema,
+});
+
+export type LlmAsAJudgeConfig = z.infer<typeof LlmAsAJudgeConfigSchema>;
+
+// ============================================================================
+// Evaluator Config
+// ============================================================================
+
+export const EvaluatorConfigSchema = z.object({
+  llmAsAJudge: LlmAsAJudgeConfigSchema,
+});
+
+export type EvaluatorConfig = z.infer<typeof EvaluatorConfigSchema>;
diff --git a/src/schema/schemas/primitives/index.ts b/src/schema/schemas/primitives/index.ts
index e7f572e8..1d0fb665 100644
--- a/src/schema/schemas/primitives/index.ts
+++ b/src/schema/schemas/primitives/index.ts
@@ -5,3 +5,26 @@ export {
   MemoryStrategySchema,
   MemoryStrategyTypeSchema,
 } from './memory';
+
+export type {
+  EvaluationLevel,
+  EvaluatorConfig,
+  LlmAsAJudgeConfig,
+  RatingScale,
+  NumericalRating,
+  CategoricalRating,
+} from './evaluator';
+export {
+  BedrockModelIdSchema,
+  isValidBedrockModelId,
+  EvaluationLevelSchema,
+  EvaluatorConfigSchema,
+  EvaluatorNameSchema,
+  LlmAsAJudgeConfigSchema,
+  RatingScaleSchema,
+  NumericalRatingSchema,
+  CategoricalRatingSchema,
+} from './evaluator';
+
+export type { OnlineEvalConfig } from './online-eval-config';
+export { OnlineEvalConfigSchema, OnlineEvalConfigNameSchema } from './online-eval-config';
diff --git a/src/schema/schemas/primitives/online-eval-config.ts b/src/schema/schemas/primitives/online-eval-config.ts
new file mode 100644
index 00000000..4c87b27c
--- /dev/null
+++ b/src/schema/schemas/primitives/online-eval-config.ts
@@ -0,0 +1,31 @@
+import { z } from 'zod';
+
+// ============================================================================
+// Online Eval Config Types
+// ============================================================================
+
+export const OnlineEvalConfigNameSchema = z
+  .string()
+  .min(1, 'Name is required')
+  .max(48)
+  .regex(
+    /^[a-zA-Z][a-zA-Z0-9_]{0,47}$/,
+    'Must begin with a letter and contain only alphanumeric characters and underscores (max 48 chars)'
+  );
+
+export const OnlineEvalConfigSchema = z.object({
+  type: z.literal('OnlineEvaluationConfig'),
+  name: OnlineEvalConfigNameSchema,
+  /** Agent name to monitor (must match a project agent) */
+  agent: z.string().min(1, 'Agent name is required'),
+  /** Evaluator names (custom), Builtin.* IDs, or evaluator ARNs */
+  evaluators: z.array(z.string().min(1)).min(1, 'At least one evaluator is required'),
+  /** Sampling rate as a percentage (0.01 to 100) */
+  samplingRate: z.number().min(0.01).max(100),
+  /** Optional description for the online eval config */
+  description: z.string().max(200).optional(),
+  /** Whether to enable execution on create (default: true) */
+  enableOnCreate: z.boolean().optional(),
+});
+
+export type OnlineEvalConfig = z.infer<typeof OnlineEvalConfigSchema>;