braintrustdata · Abhijeet Prasad (AbhiPrasad) · Mar 3, 2026 · Mar 4, 2026
diff --git a/.github/workflows/bun-test.yaml b/.github/workflows/bun-test.yaml
@@ -0,0 +1,43 @@
+name: bun-test
+
+on:
+  pull_request:
+    paths:
+      - "js/**"
+      - ".github/workflows/bun-test.yaml"
+      - "pnpm-lock.yaml"
+  push:
+    branches: [main]
+
+jobs:
+  bun-test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      - uses: pnpm/action-setup@v4
+
+      - uses: oven-sh/setup-bun@v2
+
+      - name: Install dependencies
+        run: pnpm install --frozen-lockfile
+
+      - name: Build
+        run: pnpm run build
+
+      # Unit tests (bun test)
+      - name: Run unit tests
+        run: cd js && bun test src/wrappers/bun-test/
+
+      # Integration tests (bun test) - need bun runtime + API keys
+      - name: Run bun integration tests
+        env:
+          BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: cd js/examples/bun-test && bun test
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -2,7 +2,7 @@
 
 JavaScript client for Braintrust, plus wrapper libraries for OpenAI, Anthropic, and other AI providers.
 
-This repo uses `pnpm` as it's package manager.
+This repo uses `pnpm` as its package manager and [mise](https://mise.jdx.dev/) to manage tool versions.
 
 ## Structure
 
@@ -23,7 +23,13 @@ sdk/
 
 ## Setup
 
+This repo uses [mise](https://mise.jdx.dev/) to manage tool versions (e.g. `pnpm`). The root `mise.toml` pins versions and runs `pnpm install` automatically on `mise install`.
+
 ```bash
+mise install      # Install tools and dependencies (recommended)
+# or manually:
 pnpm install      # Install dependencies
 pnpm run build    # Build all packages
 ```
+
+mise also auto-loads a `.env` file if present — see `.env.example` to configure API keys.
diff --git a/js/Makefile b/js/Makefile
@@ -17,6 +17,7 @@ help:
 	@echo "  make test-ai-sdk-v6     - Run AI SDK v6 wrapper tests"
 	@echo "  make test-claude-agent-sdk - Run Claude Agent SDK wrapper tests"
 	@echo "  make test-vitest       - Run Vitest wrapper tests"
+	@echo "  make test-bun-test     - Run Bun test runner wrapper tests"
 	@echo "  make test-api-compat    - Run API compatibility tests"
 	@echo "  make bench              - Run queue performance benchmarks"
 	@echo "  make test-latest        - Run core + latest versions of wrappers"
@@ -28,7 +29,7 @@ help:
 	@echo ""
 	@echo "See smoke/README.md for details on smoke test infrastructure"
 
-.PHONY: help bench build clean test test-core test-openai test-anthropic test-google-genai test-ai-sdk test-ai-sdk-v5 test-ai-sdk-v6 test-claude-agent-sdk test-vitest test-latest install-optional-deps publish-beta-local test-smoke
+.PHONY: help bench build clean test test-core test-openai test-anthropic test-google-genai test-ai-sdk test-ai-sdk-v5 test-ai-sdk-v6 test-claude-agent-sdk test-vitest test-bun-test test-latest install-optional-deps publish-beta-local test-smoke
 
 # -------------------------------------------------------------------------------------------------	#
 # Anthropic testing
@@ -82,6 +83,13 @@ test-claude-agent-sdk:
 test-vitest:
 	cd src/wrappers/vitest && pnpm install && pnpm test
 
+# -------------------------------------------------------------------------------------------------
+# Bun test runner testing
+# -------------------------------------------------------------------------------------------------
+
+test-bun-test:
+	bun test src/wrappers/bun-test/
+
 # -------------------------------------------------------------------------------------------------
 # OpenAI testing
 # -------------------------------------------------------------------------------------------------

diff --git a/js/examples/bun-test/bun-test-example.test.ts b/js/examples/bun-test/bun-test-example.test.ts
@@ -0,0 +1,244 @@
+/**
+ * Bun Test Runner + Braintrust Example
+ *
+ * Demonstrates using initBunTestSuite to track test results as
+ * Braintrust experiments using the Bun test runner.
+ *
+ * Run with: bun test
+ * Requires: BRAINTRUST_API_KEY and OPENAI_API_KEY environment variables
+ */
+
+import { test, describe, afterAll } from "bun:test";
+import { configureNode } from "../../src/node";
+import { initBunTestSuite } from "../../src/wrappers/bun-test/index";
+import { _exportsForTestingOnly, login, currentSpan } from "../../src/logger";
+import { wrapOpenAI } from "../../src/wrappers/oai";
+import OpenAI from "openai";
+
+configureNode();
+
+_exportsForTestingOnly.setInitialTestState();
+await login({ apiKey: process.env.BRAINTRUST_API_KEY });
+
+if (!process.env.OPENAI_API_KEY) {
+  throw new Error(
+    "OPENAI_API_KEY environment variable must be set to run examples/bun-test/bun-test-example.test.ts",
+  );
+}
+
+const openai = wrapOpenAI(new OpenAI({ apiKey: process.env.OPENAI_API_KEY }));
+
+// ============================================================
+// Basic Usage — scorers, data expansion, logging
+// ============================================================
+
+describe("Translation Evaluation", () => {
+  const suite = initBunTestSuite({
+    projectName: "example-bun-test",
+    afterAll,
+    test,
+  });
+
+  // --- Single test with input/expected and a scorer ---
+
+  suite.test(
+    "basic translation test",
+    {
+      input: { text: "Hello", targetLang: "Spanish" },
+      expected: "Hola",
+      metadata: { difficulty: "easy" },
+      tags: ["translation", "spanish"],
+      scorers: [
+        ({ output, expected }) => ({
+          name: "exact_match",
+          score:
+            String(output).toLowerCase().trim() ===
+            String(expected).toLowerCase().trim()
+              ? 1
+              : 0,
+        }),
+      ],
+    },
+    async ({ input }) => {
+      const { text, targetLang } = input as {
+        text: string;
+        targetLang: string;
+      };
+      const response = await openai.chat.completions.create({
+        model: "gpt-3.5-turbo",
+        messages: [
+          {
+            role: "user",
+            content: `Translate "${text}" to ${targetLang}. Respond with ONLY the translation.`,
+          },
+        ],
+        temperature: 0,
+      });
+      return response.choices[0]?.message?.content?.trim() || "";
+    },
+  );
+
+  // --- Data expansion with a loop ---
+
+  const translationCases = [
+    {
+      input: { text: "Good morning", targetLang: "Spanish" },
+      expected: "Buenos días",
+    },
+    {
+      input: { text: "Thank you very much", targetLang: "Spanish" },
+      expected: "Muchas gracias",
+    },
+    {
+      input: { text: "Goodbye", targetLang: "French" },
+      expected: "Au revoir",
+    },
+  ];
+
+  for (const [i, record] of translationCases.entries()) {
+    suite.test(
+      `translation [${i}]: "${record.input.text}" → ${record.input.targetLang}`,
+      {
+        ...record,
+        scorers: [
+          ({ output, expected }) => {
+            const outputStr = String(output).toLowerCase().trim();
+            const expectedStr = String(expected).toLowerCase().trim();
+            const outputWords = new Set(outputStr.split(" "));
+            const expectedWords = expectedStr.split(" ");
+            const matches = expectedWords.filter((w) =>
+              outputWords.has(w),
+            ).length;
+            return {
+              name: "word_overlap",
+              score: matches / expectedWords.length,
+              metadata: { matches, total: expectedWords.length },
+            };
+          },
+        ],
+      },
+      async ({ input }) => {
+        const { text, targetLang } = input as {
+          text: string;
+          targetLang: string;
+        };
+        const response = await openai.chat.completions.create({
+          model: "gpt-3.5-turbo",
+          messages: [
+            {
+              role: "user",
+              content: `Translate "${text}" to ${targetLang}. Respond with ONLY the translation.`,
+            },
+          ],
+          temperature: 0,
+        });
+        return response.choices[0]?.message?.content?.trim() || "";
+      },
+    );
+  }
+
+  // --- currentSpan() for custom logging ---
+
+  suite.test(
+    "translation with extra logging",
+    {
+      input: { text: "How are you?", targetLang: "Spanish" },
+      expected: "¿Cómo estás?",
+    },
+    async ({ input }) => {
+      const { text, targetLang } = input as {
+        text: string;
+        targetLang: string;
+      };
+      const response = await openai.chat.completions.create({
+        model: "gpt-3.5-turbo",
+        messages: [
+          {
+            role: "user",
+            content: `Translate "${text}" to ${targetLang}. Respond with ONLY the translation.`,
+          },
+        ],
+        temperature: 0,
+      });
+
+      const result = response.choices[0]?.message?.content?.trim() || "";
+
+      currentSpan().log({
+        output: { tokens: response.usage, model: response.model },
+        scores: { human_quality: 0.95 },
+        metadata: { evaluator: "example" },
+      });
+
+      return result;
+    },
+  );
+});
+
+// ============================================================
+// Multiple Scorers
+// ============================================================
+
+describe("Multiple Scorers", () => {
+  const suite = initBunTestSuite({
+    projectName: "example-bun-test",
+    afterAll,
+    test,
+  });
+
+  suite.test(
+    "translation with multiple custom scorers",
+    {
+      input: { text: "Hello world", targetLang: "Spanish" },
+      expected: "Hola mundo",
+      scorers: [
+        ({ output, expected }) => ({
+          name: "exact_match",
+          score:
+            String(output).toLowerCase().trim() ===
+            String(expected).toLowerCase().trim()
+              ? 1
+              : 0,
+        }),
+        ({ output, expected }) => {
+          const outputWords = new Set(
+            String(output).toLowerCase().trim().split(" "),
+          );
+          const expectedWords = String(expected)
+            .toLowerCase()
+            .trim()
+            .split(" ");
+          const matches = expectedWords.filter((w) =>
+            outputWords.has(w),
+          ).length;
+          return {
+            name: "word_overlap",
+            score: matches / expectedWords.length,
+            metadata: { matches, total: expectedWords.length },
+          };
+        },
+        ({ output }) => ({
+          name: "conciseness",
+          score: String(output).length < 20 ? 1 : 0.7,
+          metadata: { length: String(output).length },
+        }),
+      ],
+    },
+    async ({ input }) => {
+      const { text, targetLang } = input as {
+        text: string;
+        targetLang: string;
+      };
+      const response = await openai.chat.completions.create({
+        model: "gpt-3.5-turbo",
+        messages: [
+          {
+            role: "user",
+            content: `Translate "${text}" to ${targetLang}. Respond with ONLY the translation.`,
+          },
+        ],
+        temperature: 0,
+      });
+      return response.choices[0]?.message?.content?.trim() || "";
+    },
+  );
+});
diff --git a/js/package.json b/js/package.json
@@ -140,6 +140,7 @@
     "@openai/agents": "^0.0.14",
     "@types/argparse": "^2.0.14",
     "@types/async": "^3.2.24",
+    "@types/bun": "^1.3.10",
     "@types/cli-progress": "^3.11.5",
     "@types/cors": "^2.8.17",
     "@types/express": "^5.0.0",
@@ -160,8 +161,6 @@
     "jiti": "^2.6.1",
     "openapi-zod-client": "^1.18.3",
     "rollup": "^4.28.1",
-    "vite": "^5.4.14",
-    "webpack": "^5.97.1",
     "tar": "^7.5.2",
     "tinybench": "^4.0.1",
     "ts-jest": "^29.1.4",
@@ -170,8 +169,10 @@
     "typedoc": "^0.25.13",
     "typedoc-plugin-markdown": "^3.17.1",
     "typescript": "5.4.4",
+    "vite": "^5.4.14",
     "vite-tsconfig-paths": "^4.3.2",
     "vitest": "^2.1.9",
+    "webpack": "^5.97.1",
     "zod": "^3.25.34"
   },
   "dependencies": {

diff --git a/js/src/exports.ts b/js/src/exports.ts
@@ -175,6 +175,7 @@ export { wrapClaudeAgentSDK } from "./wrappers/claude-agent-sdk/claude-agent-sdk
 export { wrapGoogleGenAI } from "./wrappers/google-genai";
 export { wrapVitest } from "./wrappers/vitest";
 export { initNodeTestSuite } from "./wrappers/node-test";
+export { initBunTestSuite } from "./wrappers/bun-test";
 
 export * as graph from "./graph-framework";