braintrustdata
diff --git a/‎btx/build.gradle‎
Lines changed: 64 additions & 0 deletions b/‎btx/build.gradle‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎btx/health-check-btx-server‎
Lines changed: 118 additions & 0 deletions b/‎btx/health-check-btx-server‎
Lines changed: 118 additions & 0 deletions
diff --git a/‎btx/spec/README.md‎
Lines changed: 9 additions & 0 deletions b/‎btx/spec/README.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎btx/spec/llm_span/README.md‎
Lines changed: 3 additions & 0 deletions b/‎btx/spec/llm_span/README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎btx/spec/llm_span/anthropic/messages.yaml‎
Lines changed: 34 additions & 0 deletions b/‎btx/spec/llm_span/anthropic/messages.yaml‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎btx/spec/llm_span/google/attachments.yaml‎
Lines changed: 44 additions & 0 deletions b/‎btx/spec/llm_span/google/attachments.yaml‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎btx/spec/llm_span/google/generate_content.yaml‎
Lines changed: 34 additions & 0 deletions b/‎btx/spec/llm_span/google/generate_content.yaml‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎btx/spec/llm_span/openai/attachments.yaml‎
Lines changed: 52 additions & 0 deletions b/‎btx/spec/llm_span/openai/attachments.yaml‎
Lines changed: 52 additions & 0 deletions
@@ -0,0 +1,64 @@
+plugins {
+    id 'java'
+}
+
+java {
+    toolchain {
+        languageVersion = JavaLanguageVersion.of(17)
+    }
+}
+
+repositories {
+    mavenCentral()
+    mavenLocal()
+}
+
+dependencies {
+    // Braintrust SDK (local project dependencies)
+    testImplementation project(':braintrust-sdk')
+    testImplementation project(':braintrust-sdk:instrumentation:openai_2_8_0')
+    testImplementation project(':braintrust-sdk:instrumentation:anthropic_2_2_0')
+    testImplementation project(':braintrust-sdk:instrumentation:genai_1_18_0')
+    testImplementation project(':braintrust-sdk:instrumentation:langchain_1_8_0')
+
+    // Jackson for JSON processing
+    testImplementation 'com.fasterxml.jackson.core:jackson-databind:2.16.1'
+
+    // OpenAI SDK
+    testImplementation 'com.openai:openai-java:2.8.1'
+
+    // Anthropic SDK
+    testImplementation 'com.anthropic:anthropic-java:2.10.0'
+
+    // Gemini SDK
+    testImplementation 'org.springframework.ai:spring-ai-google-genai:1.1.0'
+
+    // LangChain4j
+    testImplementation 'dev.langchain4j:langchain4j:1.9.1'
+    testImplementation 'dev.langchain4j:langchain4j-http-client:1.9.1'
+    testImplementation 'dev.langchain4j:langchain4j-open-ai:1.9.1'
+
+    // OpenTelemetry
+    testImplementation 'io.opentelemetry:opentelemetry-api:1.54.1'
+
+    // YAML parsing for spec files
+    testImplementation 'org.yaml:snakeyaml:2.3'
+
+    // Test framework
+    testImplementation(testFixtures(project(":test-harness")))
+    testImplementation "org.junit.jupiter:junit-jupiter:${rootProject.ext.junitVersion}"
+    testImplementation "org.junit.jupiter:junit-jupiter-params:${rootProject.ext.junitVersion}"
+    testImplementation "io.opentelemetry:opentelemetry-sdk:${rootProject.ext.otelVersion}"
+    testRuntimeOnly 'org.slf4j:slf4j-simple:2.0.17'
+    testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
+}
+
+test {
+    useJUnitPlatform()
+    workingDir = rootProject.projectDir
+    testLogging {
+        events "passed", "skipped", "failed"
+        showStandardStreams = true
+        exceptionFormat "full"
+    }
+}
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# BTX Server Health Check Script
+# Usage: ./scripts/health-check-btx-server <runner-directory>
+# Example: ./scripts/health-check-btx-server ./src/btx/runners/java
+#
+# Exits with:
+#   0 - Server started successfully and health check passed
+#   1 - Server failed to start or health check failed
+
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <runner-directory>" >&2
+    echo "Example: $0 ./src/btx/runners/java" >&2
+    exit 1
+fi
+
+RUNNER_DIR="$1"
+START_SCRIPT="$RUNNER_DIR/start.sh"
+
+# Validate runner directory
+if [ ! -d "$RUNNER_DIR" ]; then
+    echo "Error: Runner directory does not exist: $RUNNER_DIR" >&2
+    exit 1
+fi
+
+if [ ! -f "$START_SCRIPT" ]; then
+    echo "Error: start.sh not found in $RUNNER_DIR" >&2
+    exit 1
+fi
+
+if [ ! -x "$START_SCRIPT" ]; then
+    echo "Error: start.sh is not executable: $START_SCRIPT" >&2
+    exit 1
+fi
+
+# Find a random open port
+find_open_port() {
+    python3 -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1]); s.close()'
+}
+
+PORT=$(find_open_port)
+HEALTH_URL="http://localhost:$PORT/health"
+MAX_WAIT=30  # Maximum seconds to wait for health check
+POLL_INTERVAL=0.5  # Seconds between health check attempts
+
+echo "Starting BTX server on port $PORT..."
+
+# Start the server in background, redirecting output to temp files
+TMPDIR=$(mktemp -d)
+LOG_FILE="$TMPDIR/btx-server.log"
+PID_FILE="$TMPDIR/btx-server.pid"
+
+# Cleanup function
+cleanup() {
+    if [ -f "$PID_FILE" ]; then
+        PID=$(cat "$PID_FILE")
+        if kill -0 "$PID" 2>/dev/null; then
+            echo "Stopping server (PID: $PID)..."
+            kill "$PID" 2>/dev/null || true
+            # Give it a moment to shut down gracefully
+            sleep 0.5
+            # Force kill if still running
+            if kill -0 "$PID" 2>/dev/null; then
+                kill -9 "$PID" 2>/dev/null || true
+            fi
+        fi
+    fi
+    rm -rf "$TMPDIR"
+}
+
+trap cleanup EXIT
+
+# Start the server
+"$START_SCRIPT" "$PORT" > "$LOG_FILE" 2>&1 &
+SERVER_PID=$!
+echo $SERVER_PID > "$PID_FILE"
+
+echo "Server started with PID: $SERVER_PID"
+echo "Log file: $LOG_FILE"
+
+# Wait for server to be healthy
+echo "Waiting for health check at $HEALTH_URL..."
+ATTEMPTS=0
+MAX_ATTEMPTS=$((MAX_WAIT * 2))  # Poll twice per second
+
+while [ $ATTEMPTS -lt $MAX_ATTEMPTS ]; do
+    # Check if process is still alive
+    if ! kill -0 $SERVER_PID 2>/dev/null; then
+        echo "Error: Server process died unexpectedly" >&2
+        echo "Last 20 lines of log:" >&2
+        tail -20 "$LOG_FILE" >&2
+        exit 1
+    fi
+
+    # Try health check
+    if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
+        RESPONSE=$(curl -s "$HEALTH_URL")
+        echo "Health check passed!"
+        echo "Response: $RESPONSE"
+        exit 0
+    fi
+
+    sleep $POLL_INTERVAL
+    ATTEMPTS=$((ATTEMPTS + 1))
+
+    # Show progress every 10 attempts (5 seconds)
+    if [ $((ATTEMPTS % 10)) -eq 0 ]; then
+        ELAPSED=$((ATTEMPTS / 2))
+        echo "Still waiting... (${ELAPSED}s elapsed)"
+    fi
+done
+
+# Timeout reached
+echo "Error: Health check timed out after ${MAX_WAIT}s" >&2
+echo "Last 20 lines of log:" >&2
+tail -20 "$LOG_FILE" >&2
+exit 1
@@ -0,0 +1,9 @@
+# Braintrust Spec
+
+Cross language specs for implementing a Braintrust SDK.
+
+Contains:
+
+- markdown files describing complex features
+- yaml describing end-to-end tests and assertions
+- yaml describing cross-language constants (envars, string attributes)
@@ -0,0 +1,3 @@
+# llm span end-to-end tests
+
+TODO: document this
@@ -0,0 +1,34 @@
+name: messages
+type: llm_span_test
+provider: anthropic
+endpoint: /v1/messages
+enabled_runners: ["python", "typescript", "java", "csharp"]
+requests:
+  - model: claude-haiku-4-5-20251001
+    temperature: 0.0
+    max_tokens: 128
+    system: "You are a helpful assistant."
+    messages:
+      - role: user
+        content: What is the capital of France?
+expected_brainstore_spans:
+  - metrics:
+      tokens: !fn is_non_negative_number
+      prompt_tokens: !fn is_non_negative_number
+      completion_tokens: !fn is_non_negative_number
+    metadata:
+      model: claude-haiku-4-5-20251001
+      provider: anthropic
+    span_attributes:
+      name: anthropic.messages.create
+      type: llm
+    input:
+      - content: What is the capital of France?
+        role: user
+      - content: "You are a helpful assistant."
+        role: system
+    output:
+      content:
+        - text: The capital of France is Paris.
+          type: text
+      role: assistant
@@ -0,0 +1,44 @@
+name: attachments
+type: llm_span_test
+provider: google
+endpoint: /v1/models/gemini-2.0-flash:generateContent
+enabled_runners: ["python", "typescript", "java", "go"]
+requests:
+  - contents:
+      - role: user
+        parts:
+          - text: What color is this image?
+          - inline_data:
+              mime_type: image/png
+              # 1x1 red pixel
+              data: iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==
+    generationConfig:
+      temperature: 0.0
+expected_brainstore_spans:
+  - metrics:
+      tokens: !fn is_non_negative_number
+      prompt_tokens: !fn is_non_negative_number
+      completion_tokens: !fn is_non_negative_number
+    metadata:
+      model: gemini-2.0-flash
+    span_attributes:
+      name: generate_content
+      type: llm
+    input:
+      model: gemini-2.0-flash
+      contents:
+        - role: user
+          parts:
+            - text: What color is this image?
+            - image_url:
+                url:
+                  content_type: image/png
+                  filename: !fn is_non_empty_string
+                  key: !fn is_non_empty_string
+                  type: braintrust_attachment
+    output:
+      candidates:
+        - content:
+            parts:
+              - text: !fn is_non_empty_string
+            role: model
@@ -0,0 +1,34 @@
+name: generate_content
+type: llm_span_test
+provider: google
+endpoint: /v1/models/gemini-2.5-flash:generateContent
+enabled_runners: ["python", "typescript", "java", "go"]
+requests:
+  - contents:
+      - role: user
+        parts:
+          - text: What is the capital of France?
+    generationConfig:
+      temperature: 0.0
+expected_brainstore_spans:
+  - metrics:
+      tokens: !fn is_non_negative_number
+      prompt_tokens: !fn is_non_negative_number
+      completion_tokens: !fn is_non_negative_number
+    metadata:
+      model: gemini-2.5-flash
+    span_attributes:
+      name: generate_content
+      type: llm
+    input:
+      model: gemini-2.5-flash
+      contents:
+        - role: user
+          parts:
+            - text: What is the capital of France?
+    output:
+      candidates:
+        - content:
+            parts:
+              - text: !fn is_non_empty_string
+            role: model
@@ -0,0 +1,52 @@
+name: attachments
+type: llm_span_test
+provider: openai
+endpoint: /v1/chat/completions
+requests:
+  - model: gpt-4o-mini
+    temperature: 0.0
+    messages:
+      - role: system
+        content: you are a helpful assistant
+      - role: user
+        content:
+        - type: text
+          text: What color is this image?
+        - type: image_url
+          image_url:
+            # 1x1 red pixel
+            url: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==
+expected_brainstore_spans:
+  - metrics:
+      tokens: !fn is_non_negative_number
+      prompt_tokens: !fn is_non_negative_number
+      completion_tokens: !fn is_non_negative_number
+    metadata:
+      model: !starts_with "gpt-4o-mini"
+      provider: openai
+    span_attributes:
+      name: Chat Completion
+      type: llm
+    input:
+      - role: system
+        content: you are a helpful assistant
+      - role: user
+        content:
+          - text: What color is this image?
+            type: text
+          - image_url:
+              url:
+                content_type: image/png
+                filename: !fn is_non_empty_string
+                key: !fn is_non_empty_string
+                type: braintrust_attachment
+            type: image_url
+    output:
+      - !or
+        - finish_reason: stop
+          index: 0
+          message:
+            role: assistant
+            content: !fn is_non_empty_string
+        - role: assistant
+          content: !fn is_non_empty_string
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# llm span end-to-end tests`
	`2`	`+`
	`3`	`+TODO: document this`