Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
d0a79ff
Add shared infrastructure for functions push/pull
parkerhendo Mar 6, 2026
0e3d5d5
Add functions push/pull command structure, API layer, and report types
parkerhendo Mar 6, 2026
17bc639
Implement bt functions push
parkerhendo Mar 6, 2026
0937129
Implement bt functions pull
parkerhendo Mar 6, 2026
4f5c904
Add functions push/pull tests and CLI fixtures
parkerhendo Mar 6, 2026
f635890
feat(functions): add positional file arguments to push command
parkerhendo Mar 6, 2026
4487907
refactor(push): improve push confirmation prompt with file and projec…
parkerhendo Mar 6, 2026
fd96ed5
refactor(functions): add multi-slug support to pull command
parkerhendo Mar 6, 2026
6547198
feat(functions): add version filter and legacy compatibility flags
parkerhendo Mar 6, 2026
8974925
feat(functions): add JS bundling support with esbuild
parkerhendo Mar 6, 2026
b751e13
fix(pull): use correct variable names for project resolution
parkerhendo Mar 7, 2026
c9622eb
refactor(functions): remove create-missing-projects flag and confirma…
parkerhendo Mar 9, 2026
a6f1062
feat(functions): add progress indicator for pull command and cleanup …
parkerhendo Mar 9, 2026
4c55188
refactor(push): use file_type instead of path.is_file() for consistency
parkerhendo Mar 9, 2026
615392f
refactor(functions): remove legacy compatibility code and aliases
parkerhendo Mar 10, 2026
b37e5e1
fix: add legacy prompt support with tool function resolution
parkerhendo Mar 10, 2026
8e91881
fix(functions-runner): force re-evaluation of imported input files
parkerhendo Mar 10, 2026
4c40773
fix(functions-push): restore runner, bundler, and project parity
parkerhendo Mar 17, 2026
5f7d51f
fix(functions-pull): preserve prompt serialization and identity metadata
parkerhendo Mar 17, 2026
0cd82ad
test(ci): add functions test to GitHub Actions workflow and fix more …
parkerhendo Mar 17, 2026
1e8e17c
add support for pushing sandboxes
nselvidge Mar 13, 2026
9a70a3d
fixes
nselvidge Mar 13, 2026
0aafaf1
fixes
nselvidge Mar 13, 2026
9628bbc
WIP
nselvidge Mar 16, 2026
1ff2094
fix CI
nselvidge Mar 18, 2026
8eb180f
add --sandbox flag
nselvidge Mar 19, 2026
1df96cc
Add tests
nselvidge Mar 19, 2026
a68e4ca
use unix socket for communication and split data generation to its ow…
nselvidge Mar 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ jobs:
corepack prepare pnpm@10.28.2 --activate
- name: Run eval fixtures
run: cargo test --test eval_fixtures
- name: Run functions fixtures
run: cargo test --test functions

eval-tests-python:
name: eval-tests-python (py ${{ matrix.python-version }})
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ tests/evals/js/eval-bun/test-data.txt
__pycache__

bt-sync
*.env
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ chrono = { version = "0.4.40", features = ["clock"] }
dirs = "5"
pathdiff = "0.2.3"
glob = "0.3"
flate2 = "1.1.2"

[profile.dist]
inherits = "release"
Expand Down Expand Up @@ -73,3 +74,6 @@ install-success-msg = ""

[dev-dependencies]
tempfile = "3"

[target.'cfg(windows)'.dependencies]
windows-sys = { version = "0.59", features = ["Win32_Storage_FileSystem"] }
201 changes: 201 additions & 0 deletions scripts/data-runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
#!/usr/bin/env python3
from __future__ import annotations

import argparse
import asyncio
import json
import os
import socket
import sys
import time
from dataclasses import dataclass
from typing import Any

try:
from braintrust.util import eprint
from runner_common import call_evaluator_data, load_evaluators, to_async_iterator
except Exception as exc: # pragma: no cover - runtime guard
print(
"Unable to import the braintrust package. Please install it in your Python environment.",
file=sys.stderr,
)
print(str(exc), file=sys.stderr)
sys.exit(1)


@dataclass
class PullChannel:
sock: socket.socket

def send(self, payload: Any) -> None:
self.sock.sendall((json.dumps(payload) + "\n").encode("utf-8"))

async def lines(self):
buffer = ""
while True:
chunk = await asyncio.to_thread(self.sock.recv, 4096)
if not chunk:
break
buffer += chunk.decode("utf-8")
while True:
newline = buffer.find("\n")
if newline == -1:
break
line = buffer[:newline].strip()
buffer = buffer[newline + 1 :]
if line:
yield line

trailing = buffer.strip()
if trailing:
yield trailing

def close(self) -> None:
try:
self.sock.shutdown(socket.SHUT_RDWR)
except OSError:
pass
self.sock.close()


def create_pull_channel() -> PullChannel:
sock_path = os.getenv("BT_EVAL_PULL_SOCK")
if not sock_path:
raise ValueError("Missing BT_EVAL_PULL_SOCK")

sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
sock.connect(sock_path)
return PullChannel(sock)


def parse_start_request(raw: str) -> str:
parsed = json.loads(raw)
if not isinstance(parsed, dict):
raise ValueError("Start request must be a JSON object.")
if parsed.get("type") != "start":
raise ValueError("Expected initial start command.")
name = parsed.get("name")
if not isinstance(name, str) or not name:
raise ValueError("Start request must include a non-empty evaluator name.")
return name


def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Stream eval rows over a unix socket for bt.")
parser.add_argument("files", nargs="*", help="Eval files or directories to load.")
return parser


async def run(files: list[str]) -> int:
evaluators, _reporters = load_evaluators(files)
channel = create_pull_channel()

try:
line_iter = channel.lines()
try:
start_line = await anext(line_iter)
except StopAsyncIteration:
return 0

try:
target_name = parse_start_request(start_line)
except Exception as exc:
channel.send({"type": "error", "message": str(exc)})
return 1

evaluator_instance = next(
(candidate for candidate in evaluators if candidate.evaluator.eval_name == target_name),
None,
)
if evaluator_instance is None:
channel.send({"type": "error", "message": f"Evaluator '{target_name}' not found"})
return 1

evaluator = evaluator_instance.evaluator
raw_data, _base_experiment_name = await call_evaluator_data(evaluator.data)
data_iterator = to_async_iterator(raw_data)
iterator = data_iterator.__aiter__()

trial_count = getattr(evaluator, "trial_count", 1)
try:
trial_count = int(trial_count)
except Exception:
trial_count = 1
if trial_count < 1:
trial_count = 1

max_concurrency = getattr(evaluator, "max_concurrency", None)
try:
max_concurrency = int(max_concurrency) if max_concurrency is not None else 10
except Exception:
max_concurrency = 10
if max_concurrency < 1:
max_concurrency = 1

experiment_name = getattr(evaluator, "experiment_name", None)
if not isinstance(experiment_name, str) or not experiment_name:
experiment_name = f"{evaluator.eval_name}-{int(time.time() * 1000)}"

channel.send(
{
"type": "ready",
"evaluator_name": evaluator.eval_name,
"max_concurrency": max_concurrency,
"experiment_name": experiment_name,
}
)

current_datum = None
trial_index = 0
async for line in line_iter:
parsed = json.loads(line)
command_type = parsed.get("type") if isinstance(parsed, dict) else None
if command_type == "close":
break
if command_type != "next":
channel.send(
{
"type": "error",
"message": f"Unsupported pull command '{command_type}'",
}
)
return 1

if current_datum is None:
try:
current_datum = await iterator.__anext__()
trial_index = 0
except StopAsyncIteration:
channel.send({"type": "eof"})
continue

channel.send(
{
"type": "row",
"datum": current_datum,
"trial_index": trial_index,
}
)
trial_index += 1
if trial_index >= trial_count:
current_datum = None

return 0
finally:
channel.close()


def main(argv: list[str] | None = None) -> int:
parser = build_parser()
args = parser.parse_args(argv)
files = args.files or ["."]

try:
return asyncio.run(run(files))
except Exception as exc:
eprint(str(exc))
return 1


if __name__ == "__main__":
sys.exit(main())
Loading
Loading