Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions .github/workflows/bun-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: bun-test

on:
pull_request:
paths:
- "js/**"
- ".github/workflows/bun-test.yaml"
- "pnpm-lock.yaml"
push:
branches: [main]

jobs:
bun-test:
runs-on: ubuntu-latest
timeout-minutes: 15

steps:
- uses: actions/checkout@v4

- uses: actions/setup-node@v4
with:
node-version: 22

- uses: pnpm/action-setup@v4

- uses: oven-sh/setup-bun@v2

- name: Install dependencies
run: pnpm install --frozen-lockfile

- name: Build
run: pnpm run build

# Unit tests (bun test)
- name: Run unit tests
run: cd js && bun test src/wrappers/bun-test/

# Integration tests (bun test) - need bun runtime + API keys
- name: Run bun integration tests
env:
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: cd js/examples/bun-test && bun test
8 changes: 7 additions & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

JavaScript client for Braintrust, plus wrapper libraries for OpenAI, Anthropic, and other AI providers.

This repo uses `pnpm` as it's package manager.
This repo uses `pnpm` as its package manager and [mise](https://mise.jdx.dev/) to manage tool versions.

## Structure

Expand All @@ -23,7 +23,13 @@ sdk/

## Setup

This repo uses [mise](https://mise.jdx.dev/) to manage tool versions (e.g. `pnpm`). The root `mise.toml` pins versions and runs `pnpm install` automatically on `mise install`.

```bash
mise install # Install tools and dependencies (recommended)
# or manually:
pnpm install # Install dependencies
pnpm run build # Build all packages
```

mise also auto-loads a `.env` file if present — see `.env.example` to configure API keys.
10 changes: 9 additions & 1 deletion js/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ help:
@echo " make test-ai-sdk-v6 - Run AI SDK v6 wrapper tests"
@echo " make test-claude-agent-sdk - Run Claude Agent SDK wrapper tests"
@echo " make test-vitest - Run Vitest wrapper tests"
@echo " make test-bun-test - Run Bun test runner wrapper tests"
@echo " make test-api-compat - Run API compatibility tests"
@echo " make bench - Run queue performance benchmarks"
@echo " make test-latest - Run core + latest versions of wrappers"
Expand All @@ -28,7 +29,7 @@ help:
@echo ""
@echo "See smoke/README.md for details on smoke test infrastructure"

.PHONY: help bench build clean test test-core test-openai test-anthropic test-google-genai test-ai-sdk test-ai-sdk-v5 test-ai-sdk-v6 test-claude-agent-sdk test-vitest test-latest install-optional-deps publish-beta-local test-smoke
.PHONY: help bench build clean test test-core test-openai test-anthropic test-google-genai test-ai-sdk test-ai-sdk-v5 test-ai-sdk-v6 test-claude-agent-sdk test-vitest test-bun-test test-latest install-optional-deps publish-beta-local test-smoke

# ------------------------------------------------------------------------------------------------- #
# Anthropic testing
Expand Down Expand Up @@ -82,6 +83,13 @@ test-claude-agent-sdk:
test-vitest:
cd src/wrappers/vitest && pnpm install && pnpm test

# -------------------------------------------------------------------------------------------------
# Bun test runner testing
# -------------------------------------------------------------------------------------------------

test-bun-test:
bun test src/wrappers/bun-test/

# -------------------------------------------------------------------------------------------------
# OpenAI testing
# -------------------------------------------------------------------------------------------------
Expand Down
244 changes: 244 additions & 0 deletions js/examples/bun-test/bun-test-example.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
/**
* Bun Test Runner + Braintrust Example
*
* Demonstrates using initBunTestSuite to track test results as
* Braintrust experiments using the Bun test runner.
*
* Run with: bun test
* Requires: BRAINTRUST_API_KEY and OPENAI_API_KEY environment variables
*/

import { test, describe, afterAll } from "bun:test";
import { configureNode } from "../../src/node";
import { initBunTestSuite } from "../../src/wrappers/bun-test/index";
import { _exportsForTestingOnly, login, currentSpan } from "../../src/logger";
import { wrapOpenAI } from "../../src/wrappers/oai";
import OpenAI from "openai";

configureNode();

_exportsForTestingOnly.setInitialTestState();
await login({ apiKey: process.env.BRAINTRUST_API_KEY });

if (!process.env.OPENAI_API_KEY) {
throw new Error(
"OPENAI_API_KEY environment variable must be set to run examples/bun-test/bun-test-example.test.ts",
);
}

const openai = wrapOpenAI(new OpenAI({ apiKey: process.env.OPENAI_API_KEY }));

// ============================================================
// Basic Usage — scorers, data expansion, logging
// ============================================================

describe("Translation Evaluation", () => {
const suite = initBunTestSuite({
projectName: "example-bun-test",
afterAll,
test,
});

// --- Single test with input/expected and a scorer ---

suite.test(
"basic translation test",
{
input: { text: "Hello", targetLang: "Spanish" },
expected: "Hola",
metadata: { difficulty: "easy" },
tags: ["translation", "spanish"],
scorers: [
({ output, expected }) => ({
name: "exact_match",
score:
String(output).toLowerCase().trim() ===
String(expected).toLowerCase().trim()
? 1
: 0,
}),
],
},
async ({ input }) => {
const { text, targetLang } = input as {
text: string;
targetLang: string;
};
const response = await openai.chat.completions.create({
model: "gpt-3.5-turbo",
messages: [
{
role: "user",
content: `Translate "${text}" to ${targetLang}. Respond with ONLY the translation.`,
},
],
temperature: 0,
});
return response.choices[0]?.message?.content?.trim() || "";
},
);

// --- Data expansion with a loop ---

const translationCases = [
{
input: { text: "Good morning", targetLang: "Spanish" },
expected: "Buenos días",
},
{
input: { text: "Thank you very much", targetLang: "Spanish" },
expected: "Muchas gracias",
},
{
input: { text: "Goodbye", targetLang: "French" },
expected: "Au revoir",
},
];

for (const [i, record] of translationCases.entries()) {
suite.test(
`translation [${i}]: "${record.input.text}" → ${record.input.targetLang}`,
{
...record,
scorers: [
({ output, expected }) => {
const outputStr = String(output).toLowerCase().trim();
const expectedStr = String(expected).toLowerCase().trim();
const outputWords = new Set(outputStr.split(" "));
const expectedWords = expectedStr.split(" ");
const matches = expectedWords.filter((w) =>
outputWords.has(w),
).length;
return {
name: "word_overlap",
score: matches / expectedWords.length,
metadata: { matches, total: expectedWords.length },
};
},
],
},
async ({ input }) => {
const { text, targetLang } = input as {
text: string;
targetLang: string;
};
const response = await openai.chat.completions.create({
model: "gpt-3.5-turbo",
messages: [
{
role: "user",
content: `Translate "${text}" to ${targetLang}. Respond with ONLY the translation.`,
},
],
temperature: 0,
});
return response.choices[0]?.message?.content?.trim() || "";
},
);
}

// --- currentSpan() for custom logging ---

suite.test(
"translation with extra logging",
{
input: { text: "How are you?", targetLang: "Spanish" },
expected: "¿Cómo estás?",
},
async ({ input }) => {
const { text, targetLang } = input as {
text: string;
targetLang: string;
};
const response = await openai.chat.completions.create({
model: "gpt-3.5-turbo",
messages: [
{
role: "user",
content: `Translate "${text}" to ${targetLang}. Respond with ONLY the translation.`,
},
],
temperature: 0,
});

const result = response.choices[0]?.message?.content?.trim() || "";

currentSpan().log({
output: { tokens: response.usage, model: response.model },
scores: { human_quality: 0.95 },
metadata: { evaluator: "example" },
});

return result;
},
);
});

// ============================================================
// Multiple Scorers
// ============================================================

describe("Multiple Scorers", () => {
const suite = initBunTestSuite({
projectName: "example-bun-test",
afterAll,
test,
});

suite.test(
"translation with multiple custom scorers",
{
input: { text: "Hello world", targetLang: "Spanish" },
expected: "Hola mundo",
scorers: [
({ output, expected }) => ({
name: "exact_match",
score:
String(output).toLowerCase().trim() ===
String(expected).toLowerCase().trim()
? 1
: 0,
}),
({ output, expected }) => {
const outputWords = new Set(
String(output).toLowerCase().trim().split(" "),
);
const expectedWords = String(expected)
.toLowerCase()
.trim()
.split(" ");
const matches = expectedWords.filter((w) =>
outputWords.has(w),
).length;
return {
name: "word_overlap",
score: matches / expectedWords.length,
metadata: { matches, total: expectedWords.length },
};
},
({ output }) => ({
name: "conciseness",
score: String(output).length < 20 ? 1 : 0.7,
metadata: { length: String(output).length },
}),
],
},
async ({ input }) => {
const { text, targetLang } = input as {
text: string;
targetLang: string;
};
const response = await openai.chat.completions.create({
model: "gpt-3.5-turbo",
messages: [
{
role: "user",
content: `Translate "${text}" to ${targetLang}. Respond with ONLY the translation.`,
},
],
temperature: 0,
});
return response.choices[0]?.message?.content?.trim() || "";
},
);
});
5 changes: 3 additions & 2 deletions js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@
"@openai/agents": "^0.0.14",
"@types/argparse": "^2.0.14",
"@types/async": "^3.2.24",
"@types/bun": "^1.3.10",
"@types/cli-progress": "^3.11.5",
"@types/cors": "^2.8.17",
"@types/express": "^5.0.0",
Expand All @@ -160,8 +161,6 @@
"jiti": "^2.6.1",
"openapi-zod-client": "^1.18.3",
"rollup": "^4.28.1",
"vite": "^5.4.14",
"webpack": "^5.97.1",
"tar": "^7.5.2",
"tinybench": "^4.0.1",
"ts-jest": "^29.1.4",
Expand All @@ -170,8 +169,10 @@
"typedoc": "^0.25.13",
"typedoc-plugin-markdown": "^3.17.1",
"typescript": "5.4.4",
"vite": "^5.4.14",
"vite-tsconfig-paths": "^4.3.2",
"vitest": "^2.1.9",
"webpack": "^5.97.1",
"zod": "^3.25.34"
},
"dependencies": {
Expand Down
1 change: 1 addition & 0 deletions js/src/exports.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ export { wrapClaudeAgentSDK } from "./wrappers/claude-agent-sdk/claude-agent-sdk
export { wrapGoogleGenAI } from "./wrappers/google-genai";
export { wrapVitest } from "./wrappers/vitest";
export { initNodeTestSuite } from "./wrappers/node-test";
export { initBunTestSuite } from "./wrappers/bun-test";

export * as graph from "./graph-framework";

Expand Down
Loading
Loading