diff --git a/README.md b/README.md index 90b7a10..857ed4d 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ The Browserbase MCP server accepts the following command-line flags: | `--browserWidth ` | Browser viewport width (default: 1024) | | `--browserHeight ` | Browser viewport height (default: 768) | | `--modelName ` | The model to use for Stagehand (default: google/gemini-2.5-flash-lite) | -| `--modelApiKey ` | API key for the custom model provider (required when using custom models) | +| `--modelApiKey ` | Optional API key for a custom model provider | | `--experimental` | Enable experimental features (default: false) | These flags can be passed directly to the CLI or configured in your MCP configuration file. @@ -180,7 +180,7 @@ These flags can be passed directly to the CLI or configured in your MCP configur Stagehand defaults to using Google's Gemini 2.5 Flash Lite model, but you can configure it to use other models like GPT-4o, Claude, or other providers. -**Important**: When using any custom model (non-default), you must provide your own API key for that model provider using the `--modelApiKey` flag. +When `--modelApiKey` is omitted, Browserbase routes supported provider/model names through the Browserbase model gateway using your Browserbase API key. Provide `--modelApiKey` only when you want to bring your own provider key. ```json { @@ -190,9 +190,7 @@ Stagehand defaults to using Google's Gemini 2.5 Flash Lite model, but you can co "args": [ "@browserbasehq/mcp", "--modelName", - "anthropic/claude-sonnet-4.5", - "--modelApiKey", - "your-anthropic-api-key" + "anthropic/claude-sonnet-4.5" ], "env": { "BROWSERBASE_API_KEY": "", diff --git a/config.d.ts b/config.d.ts index e1db151..9118890 100644 --- a/config.d.ts +++ b/config.d.ts @@ -96,12 +96,12 @@ export type Config = { * The Model that Stagehand uses * Available models: OpenAI, Claude, Gemini, Cerebras, Groq, and other providers * - * @default "gemini-2.0-flash" + * @default "google/gemini-2.5-flash-lite" */ modelName?: z.infer; /** - * API key for the custom model provider - * Required when using a model other than the default gemini-2.0-flash + * Optional API key for a custom model provider. + * When omitted, Browserbase routes supported provider/model names through the model gateway. */ modelApiKey?: string; /** diff --git a/src/config.test.ts b/src/config.test.ts index cf91bcd..41a37a2 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -1,6 +1,10 @@ import { describe, expect, it } from "vitest"; -import { configFromCLIOptions, normalizeVerifiedConfig } from "./config.js"; +import { + configFromCLIOptions, + normalizeVerifiedConfig, + resolveConfig, +} from "./config.js"; import { configSchema } from "./index.js"; describe("verified config compatibility", () => { @@ -31,3 +35,55 @@ describe("verified config compatibility", () => { expect(config.advancedStealth).toBe(true); }); }); + +describe("model gateway config", () => { + it("accepts a provider/model modelName without modelApiKey", () => { + const config = configSchema.parse({ + browserbaseApiKey: "test-key", + browserbaseProjectId: "test-project", + modelName: "openai/gpt-4.1", + }); + + expect(config.modelName).toBe("openai/gpt-4.1"); + expect(config.modelApiKey).toBeUndefined(); + }); + + it("keeps modelApiKey undefined when no provider key is configured", async () => { + const originalGeminiApiKey = process.env.GEMINI_API_KEY; + const originalGoogleApiKey = process.env.GOOGLE_API_KEY; + const originalBrowserbaseApiKey = process.env.BROWSERBASE_API_KEY; + const originalBrowserbaseProjectId = process.env.BROWSERBASE_PROJECT_ID; + + delete process.env.GEMINI_API_KEY; + delete process.env.GOOGLE_API_KEY; + process.env.BROWSERBASE_API_KEY = "test-browserbase-key"; + process.env.BROWSERBASE_PROJECT_ID = "test-project"; + + try { + const config = await resolveConfig({ + modelName: "openai/gpt-4.1", + }); + + expect(config.modelName).toBe("openai/gpt-4.1"); + expect(config.modelApiKey).toBeUndefined(); + } finally { + if (originalGeminiApiKey === undefined) delete process.env.GEMINI_API_KEY; + else process.env.GEMINI_API_KEY = originalGeminiApiKey; + + if (originalGoogleApiKey === undefined) delete process.env.GOOGLE_API_KEY; + else process.env.GOOGLE_API_KEY = originalGoogleApiKey; + + if (originalBrowserbaseApiKey === undefined) { + delete process.env.BROWSERBASE_API_KEY; + } else { + process.env.BROWSERBASE_API_KEY = originalBrowserbaseApiKey; + } + + if (originalBrowserbaseProjectId === undefined) { + delete process.env.BROWSERBASE_PROJECT_ID; + } else { + process.env.BROWSERBASE_PROJECT_ID = originalBrowserbaseProjectId; + } + } + }); +}); diff --git a/src/config.ts b/src/config.ts index fc2b87c..6b012c5 100644 --- a/src/config.ts +++ b/src/config.ts @@ -65,13 +65,6 @@ export async function resolveConfig(cliOptions: CLIOptions): Promise { mergedConfig.browserbaseProjectId = "dummy-browserbase-project-id"; } - if (!mergedConfig.modelApiKey) { - console.warn( - "Warning: MODEL_API_KEY environment variable not set. Using dummy value.", - ); - mergedConfig.modelApiKey = "dummy-api-key"; - } - return mergedConfig; } diff --git a/src/index.ts b/src/index.ts index 38c0086..7fc6a38 100644 --- a/src/index.ts +++ b/src/index.ts @@ -20,102 +20,78 @@ import { } from "@modelcontextprotocol/sdk/types.js"; // Configuration schema - matches existing Config interface -export const configSchema = z - .object({ - browserbaseApiKey: z.string().describe("The Browserbase API Key to use"), - browserbaseProjectId: z - .string() - .describe("The Browserbase Project ID to use"), - proxies: z - .boolean() - .optional() - .describe("Whether or not to use Browserbase proxies"), - verified: z - .boolean() - .optional() - .describe( - "Use Browserbase Verified Identity. Only available to Browserbase Scale Plan users", - ), - advancedStealth: z - .boolean() - .optional() - .describe("Deprecated alias for verified"), - keepAlive: z - .boolean() - .optional() - .describe("Whether or not to keep the Browserbase session alive"), - context: z - .object({ - contextId: z - .string() - .optional() - .describe("The ID of the context to use"), - persist: z - .boolean() - .optional() - .describe("Whether or not to persist the context"), - }) - .optional(), - viewPort: z - .object({ - browserWidth: z - .number() - .optional() - .describe("The width of the browser"), - browserHeight: z - .number() - .optional() - .describe("The height of the browser"), - }) - .optional(), - server: z - .object({ - port: z - .number() - .optional() - .describe("The port to listen on for SHTTP or MCP transport"), - host: z - .string() - .optional() - .describe( - "The host to bind the server to. Default is localhost. Use 0.0.0.0 to bind to all interfaces", - ), - }) - .optional(), - modelName: z - .string() - .optional() - .describe( - "The model to use for Stagehand (default: google/gemini-2.5-flash-lite)", - ), - modelApiKey: z - .string() - .optional() - .describe( - "API key for the custom model provider. Required when using a model other than the default google/gemini-2.5-flash-lite", - ), - experimental: z - .boolean() - .optional() - .describe("Enable experimental Stagehand features"), - }) - .refine( - (data) => { - // If a non-default model is explicitly specified, API key is required - if (data.modelName && data.modelName !== "google/gemini-2.5-flash-lite") { - return ( - data.modelApiKey !== undefined && - typeof data.modelApiKey === "string" && - data.modelApiKey.length > 0 - ); - } - return true; - }, - { - message: "modelApiKey is required when specifying a custom model", - path: ["modelApiKey"], - }, - ); +export const configSchema = z.object({ + browserbaseApiKey: z.string().describe("The Browserbase API Key to use"), + browserbaseProjectId: z + .string() + .describe("The Browserbase Project ID to use"), + proxies: z + .boolean() + .optional() + .describe("Whether or not to use Browserbase proxies"), + verified: z + .boolean() + .optional() + .describe( + "Use Browserbase Verified Identity. Only available to Browserbase Scale Plan users", + ), + advancedStealth: z + .boolean() + .optional() + .describe("Deprecated alias for verified"), + keepAlive: z + .boolean() + .optional() + .describe("Whether or not to keep the Browserbase session alive"), + context: z + .object({ + contextId: z.string().optional().describe("The ID of the context to use"), + persist: z + .boolean() + .optional() + .describe("Whether or not to persist the context"), + }) + .optional(), + viewPort: z + .object({ + browserWidth: z.number().optional().describe("The width of the browser"), + browserHeight: z + .number() + .optional() + .describe("The height of the browser"), + }) + .optional(), + server: z + .object({ + port: z + .number() + .optional() + .describe("The port to listen on for SHTTP or MCP transport"), + host: z + .string() + .optional() + .describe( + "The host to bind the server to. Default is localhost. Use 0.0.0.0 to bind to all interfaces", + ), + }) + .optional(), + modelName: z + .string() + .optional() + .describe( + "The model to use for Stagehand (default: google/gemini-2.5-flash-lite)", + ), + modelApiKey: z + .string() + .optional() + .describe( + "Optional API key for a custom model provider. When omitted, Browserbase routes supported provider/model names through the model gateway.", + ), + experimental: z + .boolean() + .optional() + .describe("Enable experimental Stagehand features"), +}); // Default function for creating MCP server instance export default function ({ config }: { config: z.infer }) { diff --git a/src/program.ts b/src/program.ts index 58795f4..c3eac38 100644 --- a/src/program.ts +++ b/src/program.ts @@ -55,11 +55,11 @@ program .option("--browserHeight ", "Browser height to use for the browser.") .option( "--modelName ", - "The model to use for Stagehand (default: gemini-2.0-flash)", + "The model to use for Stagehand (default: google/gemini-2.5-flash-lite)", ) .option( "--modelApiKey ", - "API key for the custom model provider (required when using custom models)", + "Optional API key for a custom model provider. When omitted, Browserbase routes supported provider/model names through the model gateway.", ) .option("--keepAlive", "Enable Browserbase Keep Alive Session") .option("--experimental", "Enable experimental features") @@ -74,7 +74,7 @@ program if (options.port) startHttpTransport(+options.port, options.host, serverList); - else await startStdioTransport(serverList, config); + else await startStdioTransport(serverList); }); function setupExitWatchdog(serverList: ServerList) { diff --git a/src/sessionManager.test.ts b/src/sessionManager.test.ts new file mode 100644 index 0000000..1900baf --- /dev/null +++ b/src/sessionManager.test.ts @@ -0,0 +1,82 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +import type { Config } from "../config.d.ts"; + +const stagehandMock = vi.hoisted(() => { + return { + constructor: vi.fn(), + init: vi.fn().mockResolvedValue(undefined), + }; +}); + +vi.mock("@browserbasehq/stagehand", () => { + return { + Stagehand: stagehandMock.constructor.mockImplementation(function (options) { + return { + init: stagehandMock.init, + context: { + pages: () => [{ id: "test-page" }], + }, + browserbaseSessionId: "bb-session-id", + connectURL: () => "wss://connect.example.test", + close: vi.fn().mockResolvedValue(undefined), + __options: options, + }; + }), + }; +}); + +describe("createStagehandInstance model gateway behavior", () => { + beforeEach(() => { + stagehandMock.constructor.mockClear(); + stagehandMock.init.mockClear(); + delete process.env.GEMINI_API_KEY; + delete process.env.GOOGLE_API_KEY; + }); + + it("uses Browserbase model gateway when modelName is set and modelApiKey is omitted", async () => { + const { createStagehandInstance } = await import("./sessionManager.js"); + const config: Config = { + browserbaseApiKey: "bb-test-key", + browserbaseProjectId: "test-project", + modelName: "openai/gpt-4.1", + }; + + await createStagehandInstance(config, {}, "test-session-id"); + + expect(stagehandMock.constructor).toHaveBeenCalledOnce(); + expect(stagehandMock.constructor).toHaveBeenCalledWith( + expect.objectContaining({ + env: "BROWSERBASE", + apiKey: "bb-test-key", + projectId: "test-project", + model: { + modelName: "gateway/openai/gpt-4.1", + apiKey: "bb-test-key", + }, + }), + ); + }); + + it("prefers an explicit modelApiKey over Browserbase model gateway", async () => { + const { createStagehandInstance } = await import("./sessionManager.js"); + const config: Config = { + browserbaseApiKey: "bb-test-key", + browserbaseProjectId: "test-project", + modelName: "openai/gpt-4.1", + modelApiKey: "sk-provider-key", + }; + + await createStagehandInstance(config, {}, "test-session-id"); + + expect(stagehandMock.constructor).toHaveBeenCalledOnce(); + expect(stagehandMock.constructor).toHaveBeenCalledWith( + expect.objectContaining({ + model: { + modelName: "openai/gpt-4.1", + apiKey: "sk-provider-key", + }, + }), + ); + }); +}); diff --git a/src/sessionManager.ts b/src/sessionManager.ts index 3b1f538..9feb9c2 100644 --- a/src/sessionManager.ts +++ b/src/sessionManager.ts @@ -4,6 +4,9 @@ import type { Config } from "../config.d.ts"; import type { BrowserSession, CreateSessionParams } from "./types/types.js"; import { randomUUID } from "crypto"; +const DEFAULT_MODEL_NAME = "google/gemini-2.5-flash-lite"; +const GATEWAY_MODEL_PREFIX = "gateway/"; + /** * Create a configured Stagehand instance * This is used internally by SessionManager to initialize browser sessions @@ -21,22 +24,28 @@ export const createStagehandInstance = async ( throw new Error("Browserbase API Key and Project ID are required"); } - const modelName = params.modelName || config.modelName || "gemini-2.0-flash"; + const modelName = params.modelName || config.modelName || DEFAULT_MODEL_NAME; const modelApiKey = config.modelApiKey || process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY; + const model = modelApiKey + ? { + apiKey: modelApiKey, + modelName, + } + : { + apiKey, + modelName: modelName.startsWith(GATEWAY_MODEL_PREFIX) + ? modelName + : `${GATEWAY_MODEL_PREFIX}${modelName}`, + }; const stagehand = new Stagehand({ env: "BROWSERBASE", apiKey, projectId, - model: modelApiKey - ? { - apiKey: modelApiKey, - modelName: modelName, - } - : modelName, + model, ...(params.browserbaseSessionID && { browserbaseSessionID: params.browserbaseSessionID, }), diff --git a/src/transport.ts b/src/transport.ts index 4885bf9..a20086f 100644 --- a/src/transport.ts +++ b/src/transport.ts @@ -5,27 +5,8 @@ import crypto from "node:crypto"; import { ServerList } from "./server.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; -import type { Config } from "../config.d.ts"; - -export async function startStdioTransport( - serverList: ServerList, - config?: Config, -) { - // Check if we're using the default model without an API key - if (config) { - const modelName = config.modelName || "gemini-2.0-flash"; - const hasModelApiKey = - config.modelApiKey || - process.env.GEMINI_API_KEY || - process.env.GOOGLE_API_KEY; - - if (modelName.includes("gemini") && !hasModelApiKey) { - console.error( - `Need to set GEMINI_API_KEY or GOOGLE_API_KEY in your environment variables`, - ); - } - } +export async function startStdioTransport(serverList: ServerList) { const server = await serverList.create(); await server.connect(new StdioServerTransport()); }