From 1f70c4659abe63e19cf618b7e5437c833a6ec7d5 Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Sun, 1 Mar 2026 00:00:37 -0700 Subject: [PATCH] feat: add MultiActorSession for coordinated multi-user browser testing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable multi-user browser tests where different roles (admin, partner, user) interact with the same application with isolated auth state. Each actor gets its own BrowserContext (separate cookies/localStorage) while sharing the same Browser instance. Consumers orchestrate with standard async/await for sequential flows and Promise.all() for parallel. - MultiActorSession.create() — builds isolated contexts per actor - Actor.run() — delegates to AgentRunner, accumulates results - session.parallel() — concurrent execution with named result Map - session.close() — idempotent cleanup with AggregateError on failures - Partial creation failure cleanup (setup hook throws → all contexts closed) - Config merging: shared agentConfig + per-actor overrides - onTurn callback prefixed with actor name for unified logging - Raw page/context/driver access for hybrid scripted+agent flows --- src/index.ts | 4 + src/multi-actor.ts | 253 +++++++++++++++++++ tests/multi-actor.test.ts | 519 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 776 insertions(+) create mode 100644 src/multi-actor.ts create mode 100644 tests/multi-actor.test.ts diff --git a/src/index.ts b/src/index.ts index 4860d57..f2e2393 100644 --- a/src/index.ts +++ b/src/index.ts @@ -93,6 +93,10 @@ export type { DriverConfig } from './config.js'; // JUnit reporter export { generateJUnitXml } from './reporters/junit.js'; +// Multi-actor sessions (coordinated multi-user browser testing) +export { MultiActorSession, Actor } from './multi-actor.js'; +export type { ActorConfig, MultiActorSessionConfig } from './multi-actor.js'; + // Artifact pipeline export type { Artifact, diff --git a/src/multi-actor.ts b/src/multi-actor.ts new file mode 100644 index 0000000..2112f56 --- /dev/null +++ b/src/multi-actor.ts @@ -0,0 +1,253 @@ +/** + * Multi-Actor Session — coordinated multi-user browser testing + * + * Each actor gets an isolated BrowserContext (separate cookies, localStorage, + * auth state) while sharing the same Browser instance. Consumers orchestrate + * with standard async/await for sequential flows and Promise.all() for parallel. + * + * ```typescript + * const session = await MultiActorSession.create(browser, { + * actors: { + * admin: { storageState: '.auth/admin.json' }, + * partner: { storageState: '.auth/partner.json' }, + * user1: {}, + * }, + * agentConfig: { model: 'gpt-4o', vision: true }, + * }); + * + * await session.actor('admin').run({ goal: 'Create quest', startUrl: '/admin' }); + * await session.actor('partner').run({ goal: 'Approve quest' }); + * + * await session.parallel( + * ['user1', { goal: 'Start quest' }], + * ['admin', { goal: 'Monitor dashboard' }], + * ); + * + * await session.close(); + * ``` + */ + +import type { Browser, BrowserContext, BrowserContextOptions, Page } from 'playwright'; +import { PlaywrightDriver } from './drivers/playwright.js'; +import type { PlaywrightDriverOptions } from './drivers/playwright.js'; +import { AgentRunner } from './runner.js'; +import type { RunnerOptions } from './runner.js'; +import type { Scenario, AgentConfig, AgentResult, Turn } from './types.js'; +import type { ProjectStore } from './memory/project-store.js'; + +// ── Types ── + +export interface ActorConfig { + /** Playwright storage state for pre-authenticated sessions */ + storageState?: string | BrowserContextOptions['storageState']; + /** Setup hook called after context+page creation (e.g., manual login) */ + setup?: (page: Page) => Promise; + /** Agent config overrides for this actor (merged on top of shared config) */ + agentConfig?: Partial; + /** Playwright context options (viewport, locale, etc.) */ + contextOptions?: BrowserContextOptions; + /** Playwright driver options (timeout, screenshots) */ + driverOptions?: PlaywrightDriverOptions; +} + +export interface MultiActorSessionConfig { + /** Named actors keyed by role/identity */ + actors: Record; + /** Shared agent config applied to all actors (per-actor overrides win) */ + agentConfig?: AgentConfig; + /** Turn callback receives actor name + turn for cross-actor logging */ + onTurn?: (actorName: string, turn: Turn) => void; + /** Project memory store shared across actors */ + projectStore?: ProjectStore; +} + +// ── Actor ── + +export class Actor { + private _results: AgentResult[] = []; + + constructor( + readonly name: string, + private _context: BrowserContext, + private _page: Page, + private _driver: PlaywrightDriver, + private _runner: AgentRunner, + ) {} + + /** Run a scenario with this actor's agent. Results accumulate across calls. */ + async run(scenario: Scenario): Promise { + const result = await this._runner.run(scenario); + this._results.push(result); + return result; + } + + /** Raw Playwright page for direct operations / assertions */ + get page(): Page { + return this._page; + } + + /** BrowserContext for cookie/storage inspection */ + get context(): BrowserContext { + return this._context; + } + + /** PlaywrightDriver for low-level driver access */ + get driver(): PlaywrightDriver { + return this._driver; + } + + /** All results from this actor's runs */ + get results(): readonly AgentResult[] { + return this._results; + } + + /** Most recent result, or undefined if no runs yet */ + get lastResult(): AgentResult | undefined { + return this._results[this._results.length - 1]; + } +} + +// ── MultiActorSession ── + +export class MultiActorSession { + private _actors: Map; + private _closed = false; + + private constructor(actors: Map) { + this._actors = actors; + } + + /** + * Create a session with isolated browser contexts per actor. + * + * 1. Creates BrowserContext + Page + PlaywrightDriver + AgentRunner per actor + * 2. Calls setup() hooks if provided + * 3. Returns session ready for orchestration + */ + static async create( + browser: Browser, + config: MultiActorSessionConfig, + ): Promise { + const actors = new Map(); + const createdContexts: BrowserContext[] = []; + + try { + for (const [name, actorCfg] of Object.entries(config.actors)) { + // Merge shared + per-actor agent config + const mergedAgentConfig: AgentConfig = { + ...config.agentConfig, + ...actorCfg.agentConfig, + }; + + // Build context options with storageState + const contextOptions: BrowserContextOptions = { + ...actorCfg.contextOptions, + }; + if (actorCfg.storageState) { + contextOptions.storageState = actorCfg.storageState; + } + + const context = await browser.newContext(contextOptions); + createdContexts.push(context); + const page = await context.newPage(); + const driver = new PlaywrightDriver(page, actorCfg.driverOptions); + + // Wire onTurn to prefix with actor name + const onTurn = config.onTurn + ? (turn: Turn) => config.onTurn!(name, turn) + : undefined; + + const runnerOpts: RunnerOptions = { + driver, + config: mergedAgentConfig, + onTurn, + projectStore: config.projectStore, + }; + + const runner = new AgentRunner(runnerOpts); + + // Run actor setup hook (e.g., manual login flow) + if (actorCfg.setup) { + await actorCfg.setup(page); + } + + actors.set(name, new Actor(name, context, page, driver, runner)); + } + } catch (err) { + // Clean up already-created contexts on partial failure + for (const ctx of createdContexts) { + await ctx.close().catch(() => {}); + } + throw err; + } + + return new MultiActorSession(actors); + } + + /** Get an actor by name. Throws with available names on miss. */ + actor(name: string): Actor { + const a = this._actors.get(name); + if (!a) { + const available = [...this._actors.keys()].join(', '); + throw new Error( + `Actor "${name}" not found. Available actors: ${available}`, + ); + } + return a; + } + + /** + * Run multiple actors in parallel. Returns a Map of actor name → result. + * + * If any actor fails (throws), all results collected so far are still + * available via each actor's `.results` array. + */ + async parallel( + ...tasks: [actorName: string, scenario: Scenario][] + ): Promise> { + const entries = await Promise.all( + tasks.map(async ([name, scenario]) => { + const result = await this.actor(name).run(scenario); + return [name, result] as const; + }), + ); + return new Map(entries); + } + + /** Close all browser contexts. Pages close with their contexts. */ + async close(): Promise { + if (this._closed) return; + this._closed = true; + + const errors: Error[] = []; + for (const actor of this._actors.values()) { + try { + await actor.context.close(); + } catch (err) { + errors.push(err instanceof Error ? err : new Error(String(err))); + } + } + if (errors.length > 0) { + throw new AggregateError(errors, 'Failed to close some actor contexts'); + } + } + + /** All results across all actors, keyed by actor name */ + get results(): Map { + const map = new Map(); + for (const [name, actor] of this._actors) { + map.set(name, actor.results); + } + return map; + } + + /** All actors for iteration */ + get allActors(): Actor[] { + return [...this._actors.values()]; + } + + /** Actor names */ + get actorNames(): string[] { + return [...this._actors.keys()]; + } +} diff --git a/tests/multi-actor.test.ts b/tests/multi-actor.test.ts new file mode 100644 index 0000000..c1bece1 --- /dev/null +++ b/tests/multi-actor.test.ts @@ -0,0 +1,519 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { MultiActorSession, Actor } from '../src/multi-actor.js'; +import type { MultiActorSessionConfig } from '../src/multi-actor.js'; +import type { AgentResult, Turn, Scenario } from '../src/types.js'; + +// ── Mocks ── + +/** Minimal mock that satisfies the Browser interface for context creation */ +function mockBrowser() { + const contexts: ReturnType[] = []; + + return { + contexts, + newContext: vi.fn(async (_opts?: Record) => { + const ctx = mockContext(); + contexts.push(ctx); + return ctx; + }), + }; +} + +function mockContext() { + const pages: ReturnType[] = []; + return { + pages, + newPage: vi.fn(async () => { + const p = mockPage(); + pages.push(p); + return p; + }), + close: vi.fn(async () => {}), + storageState: vi.fn(), + }; +} + +function mockPage() { + return { + url: vi.fn(() => 'http://localhost'), + title: vi.fn(async () => 'Test'), + goto: vi.fn(async () => null), + waitForLoadState: vi.fn(async () => {}), + waitForTimeout: vi.fn(async () => {}), + screenshot: vi.fn(async () => Buffer.from('fake')), + evaluate: vi.fn(async () => null), + mouse: { wheel: vi.fn(async () => {}) }, + locator: vi.fn(() => ({ + click: vi.fn(async () => {}), + fill: vi.fn(async () => {}), + })), + }; +} + +function successResult(overrides?: Partial): AgentResult { + return { + success: true, + result: 'done', + turns: [], + totalMs: 100, + ...overrides, + }; +} + +// We need to mock AgentRunner since it requires real LLM calls. +// Mock the module so create() builds runners that return controlled results. +const mockRunFn = vi.fn<(scenario: Scenario) => Promise>(); + +vi.mock('../src/runner.js', () => { + // Vitest v4 requires `function` keyword for mock constructors + const AgentRunner = vi.fn(function (this: { run: typeof mockRunFn }) { + this.run = mockRunFn; + }); + return { AgentRunner }; +}); + +// Mock PlaywrightDriver since it requires a real Page +vi.mock('../src/drivers/playwright.js', () => { + const PlaywrightDriver = vi.fn(function () { + // empty — driver methods aren't called directly in these tests + }); + return { PlaywrightDriver }; +}); + +// ── Tests ── + +describe('MultiActorSession', () => { + let browser: ReturnType; + + beforeEach(() => { + browser = mockBrowser(); + mockRunFn.mockReset(); + mockRunFn.mockResolvedValue(successResult()); + }); + + describe('create', () => { + it('creates contexts and pages for each actor', async () => { + const config: MultiActorSessionConfig = { + actors: { + admin: {}, + partner: {}, + user1: {}, + }, + }; + + const session = await MultiActorSession.create(browser as never, config); + + expect(browser.newContext).toHaveBeenCalledTimes(3); + expect(browser.contexts).toHaveLength(3); + expect(session.actorNames).toEqual(['admin', 'partner', 'user1']); + expect(session.allActors).toHaveLength(3); + + await session.close(); + }); + + it('passes storageState to context options', async () => { + const config: MultiActorSessionConfig = { + actors: { + admin: { storageState: '.auth/admin.json' }, + }, + }; + + await MultiActorSession.create(browser as never, config); + + expect(browser.newContext).toHaveBeenCalledWith( + expect.objectContaining({ storageState: '.auth/admin.json' }), + ); + }); + + it('passes contextOptions through to browser.newContext', async () => { + const config: MultiActorSessionConfig = { + actors: { + admin: { + contextOptions: { viewport: { width: 1920, height: 1080 }, locale: 'en-US' }, + }, + }, + }; + + await MultiActorSession.create(browser as never, config); + + expect(browser.newContext).toHaveBeenCalledWith( + expect.objectContaining({ + viewport: { width: 1920, height: 1080 }, + locale: 'en-US', + }), + ); + }); + + it('calls setup hooks during creation', async () => { + const setupFn = vi.fn(async () => {}); + + const config: MultiActorSessionConfig = { + actors: { + admin: { setup: setupFn }, + }, + }; + + await MultiActorSession.create(browser as never, config); + + expect(setupFn).toHaveBeenCalledTimes(1); + // Setup receives the page + expect(setupFn).toHaveBeenCalledWith(expect.objectContaining({ url: expect.any(Function) })); + }); + + it('merges shared agentConfig with per-actor overrides', async () => { + const { AgentRunner } = await import('../src/runner.js'); + vi.mocked(AgentRunner).mockClear(); + + const config: MultiActorSessionConfig = { + agentConfig: { model: 'gpt-4o', vision: true, debug: false }, + actors: { + admin: { agentConfig: { model: 'claude-sonnet-4-20250514', debug: true } }, + user: {}, + }, + }; + + await MultiActorSession.create(browser as never, config); + + // AgentRunner is called twice (once per actor) + const calls = vi.mocked(AgentRunner).mock.calls; + expect(calls).toHaveLength(2); + + // admin: per-actor model + debug override shared config + expect(calls[0][0].config).toEqual({ + model: 'claude-sonnet-4-20250514', + vision: true, + debug: true, + }); + + // user: inherits shared config as-is + expect(calls[1][0].config).toEqual({ + model: 'gpt-4o', + vision: true, + debug: false, + }); + }); + + it('passes driverOptions to PlaywrightDriver', async () => { + const { PlaywrightDriver } = await import('../src/drivers/playwright.js'); + vi.mocked(PlaywrightDriver).mockClear(); + + const driverOpts = { timeout: 5000, captureScreenshots: false }; + + await MultiActorSession.create(browser as never, { + actors: { admin: { driverOptions: driverOpts } }, + }); + + expect(vi.mocked(PlaywrightDriver)).toHaveBeenCalledWith( + expect.anything(), // page + driverOpts, + ); + }); + + it('passes projectStore to all AgentRunners', async () => { + const { AgentRunner } = await import('../src/runner.js'); + vi.mocked(AgentRunner).mockClear(); + + const fakeStore = { getKnowledgePath: vi.fn(), getSelectorCachePath: vi.fn() }; + + await MultiActorSession.create(browser as never, { + actors: { admin: {}, user: {} }, + projectStore: fakeStore as never, + }); + + const calls = vi.mocked(AgentRunner).mock.calls; + expect(calls).toHaveLength(2); + expect(calls[0][0].projectStore).toBe(fakeStore); + expect(calls[1][0].projectStore).toBe(fakeStore); + }); + + it('cleans up contexts if a setup hook throws', async () => { + const failingSetup = vi.fn(async () => { throw new Error('login failed'); }); + + await expect( + MultiActorSession.create(browser as never, { + actors: { + admin: {}, // created successfully + partner: { setup: failingSetup }, // throws during setup + }, + }), + ).rejects.toThrowError('login failed'); + + // Both contexts should be closed (cleanup) + for (const ctx of browser.contexts) { + expect(ctx.close).toHaveBeenCalledTimes(1); + } + }); + }); + + describe('actor()', () => { + it('returns the correct actor by name', async () => { + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {}, partner: {} }, + }); + + const admin = session.actor('admin'); + expect(admin).toBeInstanceOf(Actor); + expect(admin.name).toBe('admin'); + + await session.close(); + }); + + it('throws descriptive error on invalid actor name', async () => { + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {}, partner: {} }, + }); + + expect(() => session.actor('unknown')).toThrowError( + 'Actor "unknown" not found. Available actors: admin, partner', + ); + + await session.close(); + }); + }); + + describe('Actor.run()', () => { + it('delegates to AgentRunner and accumulates results', async () => { + const result1 = successResult({ result: 'first' }); + const result2 = successResult({ result: 'second' }); + mockRunFn.mockResolvedValueOnce(result1).mockResolvedValueOnce(result2); + + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {} }, + }); + + const admin = session.actor('admin'); + + const r1 = await admin.run({ goal: 'Do first thing' }); + expect(r1).toBe(result1); + expect(admin.results).toHaveLength(1); + expect(admin.lastResult).toBe(result1); + + const r2 = await admin.run({ goal: 'Do second thing' }); + expect(r2).toBe(result2); + expect(admin.results).toHaveLength(2); + expect(admin.lastResult).toBe(result2); + + await session.close(); + }); + + it('passes scenario through to runner', async () => { + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {} }, + }); + + const scenario: Scenario = { goal: 'Create quest', startUrl: '/admin', maxTurns: 10 }; + await session.actor('admin').run(scenario); + + expect(mockRunFn).toHaveBeenCalledWith(scenario); + + await session.close(); + }); + }); + + describe('parallel()', () => { + it('runs multiple actors concurrently and returns Map of results', async () => { + const adminResult = successResult({ result: 'admin done' }); + const userResult = successResult({ result: 'user done' }); + + // Track call order to verify concurrency + let callCount = 0; + mockRunFn.mockImplementation(async () => { + callCount++; + const thisCall = callCount; + // Simulate async work — both should start before either finishes + await new Promise((r) => setTimeout(r, 10)); + return thisCall <= 1 ? adminResult : userResult; + }); + + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {}, user1: {} }, + }); + + const results = await session.parallel( + ['admin', { goal: 'Monitor' }], + ['user1', { goal: 'Browse' }], + ); + + expect(results).toBeInstanceOf(Map); + expect(results.size).toBe(2); + expect(results.get('admin')).toBe(adminResult); + expect(results.get('user1')).toBe(userResult); + + // Results also accumulated on actors + expect(session.actor('admin').results).toHaveLength(1); + expect(session.actor('user1').results).toHaveLength(1); + + await session.close(); + }); + + it('throws on invalid actor name', async () => { + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {} }, + }); + + await expect( + session.parallel(['nonexistent', { goal: 'Fail' }]), + ).rejects.toThrowError(/Actor "nonexistent" not found/); + + await session.close(); + }); + }); + + describe('onTurn callback', () => { + it('receives actor name and turn data', async () => { + const turnCallback = vi.fn(); + + // We need to capture the onTurn that was passed to AgentRunner + // and invoke it to verify the wrapping behavior + const { AgentRunner } = await import('../src/runner.js'); + let capturedOnTurn: ((turn: Turn) => void) | undefined; + + vi.mocked(AgentRunner).mockImplementation(function (this: { run: typeof mockRunFn }, opts: { onTurn?: (turn: Turn) => void }) { + capturedOnTurn = opts.onTurn; + this.run = mockRunFn; + } as never); + + await MultiActorSession.create(browser as never, { + actors: { admin: {} }, + onTurn: turnCallback, + }); + + // Simulate a turn callback from the runner + const fakeTurn: Turn = { + turn: 1, + state: { url: 'http://localhost', title: 'Test', snapshot: '' }, + action: { action: 'click', selector: '@abc' }, + durationMs: 50, + }; + + expect(capturedOnTurn).toBeDefined(); + capturedOnTurn!(fakeTurn); + + expect(turnCallback).toHaveBeenCalledWith('admin', fakeTurn); + }); + }); + + describe('close()', () => { + it('closes all browser contexts', async () => { + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {}, partner: {}, user1: {} }, + }); + + await session.close(); + + for (const ctx of browser.contexts) { + expect(ctx.close).toHaveBeenCalledTimes(1); + } + }); + + it('is idempotent — second close is a no-op', async () => { + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {} }, + }); + + await session.close(); + await session.close(); + + expect(browser.contexts[0].close).toHaveBeenCalledTimes(1); + }); + + it('throws AggregateError if contexts fail to close', async () => { + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {}, partner: {} }, + }); + + browser.contexts[0].close.mockRejectedValue(new Error('context 0 stuck')); + browser.contexts[1].close.mockRejectedValue(new Error('context 1 stuck')); + + await expect(session.close()).rejects.toThrowError(/Failed to close some actor contexts/); + }); + }); + + describe('results', () => { + it('aggregates results across all actors', async () => { + const r1 = successResult({ result: 'admin-1' }); + const r2 = successResult({ result: 'partner-1' }); + mockRunFn.mockResolvedValueOnce(r1).mockResolvedValueOnce(r2); + + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {}, partner: {} }, + }); + + await session.actor('admin').run({ goal: 'A' }); + await session.actor('partner').run({ goal: 'B' }); + + const results = session.results; + expect(results.get('admin')).toEqual([r1]); + expect(results.get('partner')).toEqual([r2]); + + await session.close(); + }); + }); + + describe('Actor page/context/driver access', () => { + it('exposes raw page, context, and driver for direct operations', async () => { + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {} }, + }); + + const admin = session.actor('admin'); + + expect(admin.page).toBeDefined(); + expect(admin.page.url).toBeDefined(); + expect(admin.context).toBeDefined(); + expect(admin.context.close).toBeDefined(); + expect(admin.driver).toBeDefined(); + + await session.close(); + }); + }); + + describe('parallel() partial failure', () => { + it('rejects but still accumulates results on actors that succeeded', async () => { + const adminResult = successResult({ result: 'admin done' }); + + let resolveAdmin: () => void; + const adminPromise = new Promise((r) => { resolveAdmin = r; }); + + mockRunFn.mockImplementation(async (scenario) => { + if (scenario.goal === 'Monitor') { + // Admin finishes first + resolveAdmin!(); + return adminResult; + } + // User throws after admin finishes + await adminPromise; + throw new Error('Agent failed'); + }); + + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {}, user1: {} }, + }); + + await expect( + session.parallel( + ['admin', { goal: 'Monitor' }], + ['user1', { goal: 'Browse' }], + ), + ).rejects.toThrowError('Agent failed'); + + // Admin's result is still accessible + expect(session.actor('admin').results).toHaveLength(1); + expect(session.actor('admin').lastResult).toBe(adminResult); + + await session.close(); + }); + }); + + describe('lastResult', () => { + it('returns undefined when no runs have occurred', async () => { + const session = await MultiActorSession.create(browser as never, { + actors: { admin: {} }, + }); + + expect(session.actor('admin').lastResult).toBeUndefined(); + + await session.close(); + }); + }); +});