Skip to content

Commit 8de1749

Browse files
committed
add support for pushing sandboxes
1 parent b9cc7d1 commit 8de1749

13 files changed

Lines changed: 602 additions & 34 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ tests/evals/js/eval-bun/test-data.txt
2323
__pycache__
2424

2525
bt-sync
26+
*.env

scripts/eval-runner.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,21 @@ def load_evaluators(files: list[str]) -> tuple[list[EvaluatorInstance], dict[str
435435
cwd = os.getcwd()
436436
if cwd not in sys.path:
437437
sys.path.insert(0, cwd)
438+
439+
# Add the project root inferred from input files to sys.path so that
440+
# sibling-package imports work when files live outside CWD (e.g.
441+
# sandbox bundles extracted to a temp directory). Walk up from each
442+
# file's directory looking for a register.py (bundle marker) or the
443+
# filesystem root, whichever comes first.
444+
for f in files:
445+
d = os.path.dirname(os.path.abspath(f))
446+
while d and d != os.path.dirname(d):
447+
if os.path.isfile(os.path.join(d, "register.py")):
448+
if d not in sys.path:
449+
sys.path.insert(0, d)
450+
break
451+
d = os.path.dirname(d)
452+
438453
unique_files: set[str] = set()
439454
for file_path in files:
440455
for candidate in collect_files(file_path):

scripts/functions-bundler.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,10 @@ async function main(): Promise<void> {
275275
const externalPackages = parseExternalPackages(
276276
process.env.BT_FUNCTIONS_PUSH_EXTERNAL_PACKAGES,
277277
);
278-
const external = buildExternalPackagePatterns(externalPackages);
278+
const selfContained = process.env.BT_FUNCTIONS_PUSH_SELF_CONTAINED === "1";
279+
const external = selfContained
280+
? ["fsevents", "chokidar"]
281+
: buildExternalPackagePatterns(externalPackages);
279282
const tsconfig = loadTsconfigPath();
280283

281284
const outputDir = path.dirname(outputFile);

scripts/functions-runner.py

Lines changed: 130 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import inspect
44
import json
55
import os
6+
import re
67
import sys
78
from contextlib import nullcontext
89
from typing import Any
@@ -28,9 +29,9 @@ def to_json_value(value: Any) -> Any:
2829
return [to_json_value(item) for item in value]
2930
if isinstance(value, dict):
3031
return {str(key): to_json_value(val) for key, val in value.items()}
31-
if hasattr(value, "model_dump"):
32+
if hasattr(value, "model_dump") and not isinstance(value, type):
3233
return to_json_value(value.model_dump())
33-
if hasattr(value, "dict"):
34+
if hasattr(value, "dict") and not isinstance(value, type):
3435
return to_json_value(value.dict())
3536
if hasattr(value, "__dict__"):
3637
result: dict[str, Any] = {}
@@ -42,21 +43,31 @@ def to_json_value(value: Any) -> Any:
4243
return str(value)
4344

4445

45-
def load_framework_globals() -> tuple[Any, Any, Any]:
46+
def load_framework_globals() -> tuple[Any, Any, Any, Any]:
4647
# Prefer current SDK layout first:
4748
# - braintrust.framework2 exposes module-level `global_`
4849
# - braintrust.framework exposes `_set_lazy_load`
4950
try:
5051
from braintrust.framework import _set_lazy_load as lazy
5152
from braintrust.framework2 import global_ as global_state
5253

53-
return global_state.functions, global_state.prompts, lazy
54+
try:
55+
from braintrust.framework import _evals
56+
except (ImportError, ModuleNotFoundError):
57+
_evals = None
58+
59+
return global_state.functions, global_state.prompts, lazy, _evals
5460
except (ImportError, ModuleNotFoundError):
5561
# Backward compatibility with older SDK layout.
5662
from braintrust.framework2.global_ import functions, prompts
5763
from braintrust.framework2.lazy_load import _set_lazy_load as lazy
5864

59-
return functions, prompts, lazy
65+
try:
66+
from braintrust.framework import _evals
67+
except (ImportError, ModuleNotFoundError):
68+
_evals = None
69+
70+
return functions, prompts, lazy, _evals
6071

6172

6273
def normalize_project_selector(project: Any) -> tuple[str | None, str | None]:
@@ -277,16 +288,113 @@ async def collect_function_event_entries(prompts_registry: Any) -> list[dict[str
277288
return entries
278289

279290

291+
def slugify(text: str) -> str:
292+
return re.sub(r"^-|-$", "", re.sub(r"[^a-z0-9]+", "-", text.lower()))
293+
294+
295+
def collect_evaluator_entries(evals_registry: Any, source_file: str) -> list[dict[str, Any]]:
296+
if evals_registry is None:
297+
return []
298+
299+
evaluators = getattr(evals_registry, "evaluators", None)
300+
if not evaluators or not isinstance(evaluators, dict):
301+
return []
302+
303+
entries: list[dict[str, Any]] = []
304+
stem_base, _ = os.path.splitext(os.path.basename(source_file))
305+
stem = re.sub(r"\.eval$", "", stem_base)
306+
307+
for eval_name, instance in evaluators.items():
308+
if instance is None:
309+
continue
310+
evaluator = getattr(instance, "evaluator", None)
311+
if evaluator is None:
312+
continue
313+
314+
project_name = getattr(evaluator, "project_name", None)
315+
project_id, proj_name = normalize_project_selector(
316+
{"project_name": project_name} if isinstance(project_name, str) else None
317+
)
318+
319+
scores = getattr(evaluator, "scores", []) or []
320+
score_descriptors = [
321+
{"name": getattr(score, "__name__", f"scorer_{i}")}
322+
for i, score in enumerate(scores)
323+
]
324+
325+
evaluator_definition: dict[str, Any] = {"scores": score_descriptors}
326+
327+
raw_params = getattr(evaluator, "parameters", None)
328+
if raw_params is not None:
329+
marker = getattr(raw_params, "__braintrust_parameters_marker", None)
330+
if marker is True:
331+
evaluator_definition["parameters"] = {
332+
"type": "braintrust.parameters",
333+
"schema": getattr(raw_params, "schema", None),
334+
"source": {
335+
"parametersId": getattr(raw_params, "id", None),
336+
"slug": getattr(raw_params, "slug", None),
337+
"name": getattr(raw_params, "name", None),
338+
"projectId": getattr(raw_params, "projectId", None),
339+
"version": getattr(raw_params, "version", None),
340+
},
341+
}
342+
else:
343+
# Use the braintrust SDK's parameters_to_json_schema when
344+
# available so that Pydantic model classes are converted to
345+
# proper staticParametersSchema entries (type: "data" with a
346+
# JSON Schema) that the UI can parse.
347+
try:
348+
from braintrust.parameters import parameters_to_json_schema
349+
serialized = parameters_to_json_schema(raw_params)
350+
except Exception:
351+
serialized = to_json_value(raw_params)
352+
if serialized is not None:
353+
evaluator_definition["parameters"] = serialized
354+
355+
base_entry: dict[str, Any] = {"kind": "code"}
356+
if project_id:
357+
base_entry["project_id"] = project_id
358+
if proj_name:
359+
base_entry["project_name"] = proj_name
360+
361+
# Sandbox entry only — task and scorer entries are pushed separately
362+
# when the eval is actually run, matching the Python SDK behavior.
363+
sandbox_entry = {
364+
**base_entry,
365+
"name": f"Eval {eval_name} sandbox",
366+
"slug": slugify(f"{stem}-{eval_name}-sandbox"),
367+
"function_type": "sandbox",
368+
"location": {
369+
"type": "sandbox",
370+
"sandbox_spec": {"provider": "lambda"},
371+
"entrypoints": [os.path.relpath(source_file)],
372+
"eval_name": eval_name,
373+
"evaluator_definition": evaluator_definition,
374+
},
375+
"metadata": {"_bt_sandbox_group_name": stem},
376+
}
377+
entries.append(sandbox_entry)
378+
379+
return entries
380+
381+
280382
async def process_file(file_path: str) -> dict[str, Any]:
281383
abs_path = os.path.abspath(file_path)
282384
cwd = os.getcwd()
283385
if cwd not in sys.path:
284386
sys.path.insert(0, cwd)
285387

286-
purge_local_modules(cwd, preserve_modules={__name__, "python_runner_common"})
287-
functions_registry, prompts_registry, lazy_loader = load_framework_globals()
388+
functions_registry, prompts_registry, lazy_loader, evals_registry = load_framework_globals()
288389
clear_registry(functions_registry)
289390
clear_registry(prompts_registry)
391+
if (
392+
evals_registry is not None
393+
and hasattr(evals_registry, "evaluators")
394+
and isinstance(evals_registry.evaluators, dict)
395+
):
396+
evals_registry.evaluators.clear()
397+
purge_local_modules(cwd, preserve_modules={__name__, "python_runner_common"})
290398

291399
module_name = import_module_name_from_cwd(cwd, abs_path)
292400
if module_name is None:
@@ -298,12 +406,13 @@ async def process_file(file_path: str) -> dict[str, Any]:
298406
import_file(module_name, abs_path, extra_paths)
299407
code_entries = collect_code_entries(functions_registry)
300408
event_entries = await collect_function_event_entries(prompts_registry)
301-
entries = [*code_entries, *event_entries]
409+
evaluator_entries = collect_evaluator_entries(evals_registry, abs_path)
410+
entries = [*code_entries, *event_entries, *evaluator_entries]
302411
file_manifest: dict[str, Any] = {
303412
"source_file": abs_path,
304413
"entries": entries,
305414
}
306-
if code_entries:
415+
if code_entries or evaluator_entries:
307416
runner_root = os.path.dirname(os.path.abspath(__file__))
308417
project_root = os.path.abspath(cwd)
309418
path_rest: list[str] = []
@@ -350,13 +459,24 @@ async def process_file(file_path: str) -> dict[str, Any]:
350459
continue
351460
seen_sources.add(init_source)
352461
bundled_sources.append(init_source)
462+
# Compute entry_module as a CWD-relative dotted path so that the
463+
# archive root inferred by push.rs walks back to CWD, matching
464+
# the Python SDK behavior and allowing sibling-package imports.
465+
rel_path = os.path.relpath(abs_path, cwd)
466+
archive_module = re.sub(r"\.py$", "", rel_path).replace("-", "_").replace(os.sep, ".")
353467
file_manifest["python_bundle"] = {
354-
"entry_module": module_name,
468+
"entry_module": archive_module,
355469
"sources": bundled_sources,
356470
}
357471

358472
clear_registry(functions_registry)
359473
clear_registry(prompts_registry)
474+
if (
475+
evals_registry is not None
476+
and hasattr(evals_registry, "evaluators")
477+
and isinstance(evals_registry.evaluators, dict)
478+
):
479+
evals_registry.evaluators.clear()
360480
return file_manifest
361481

362482

scripts/functions-runner.ts

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,20 @@ type Manifest = {
8888
files: ManifestFile[];
8989
};
9090

91+
function slugify(input: string): string {
92+
return input
93+
.toLowerCase()
94+
.replace(/[^a-z0-9]+/g, "-")
95+
.replace(/^-|-$/g, "");
96+
}
97+
98+
function extractScoreName(score: unknown, idx: number): string {
99+
if (typeof score === "function" && typeof score.name === "string") {
100+
return score.name || `scorer_${idx}`;
101+
}
102+
return `scorer_${idx}`;
103+
}
104+
91105
type EvalRegistry = NonNullable<typeof globalThis._evals>;
92106
type ZodToJsonSchemaFn = (schema: unknown) => unknown;
93107
type ZodV4ToJsonSchemaFn = (
@@ -558,6 +572,97 @@ function collectCodeEntries(items: CodeRegistryItem[]): CodeEntry[] {
558572
return entries;
559573
}
560574

575+
function collectEvaluatorEntries(
576+
evaluators: Record<string, unknown>,
577+
sourceFilePath: string,
578+
): CodeEntry[] {
579+
const entries: CodeEntry[] = [];
580+
const ext = path.extname(sourceFilePath);
581+
const stem = path.basename(sourceFilePath, ext).replace(/\.eval$/, "");
582+
583+
for (const [evalName, entry] of Object.entries(evaluators)) {
584+
if (!entry || typeof entry !== "object") {
585+
continue;
586+
}
587+
588+
const evaluator = (entry as Record<string, unknown>).evaluator;
589+
if (!evaluator || typeof evaluator !== "object") {
590+
continue;
591+
}
592+
593+
const evalObj = evaluator as Record<string, unknown>;
594+
const projectName =
595+
typeof evalObj.project_name === "string" ? evalObj.project_name : undefined;
596+
const scores = Array.isArray(evalObj.scores) ? evalObj.scores : [];
597+
598+
const selector = asProjectSelector(
599+
typeof projectName === "string" ? { name: projectName } : undefined,
600+
);
601+
const projectId =
602+
typeof selector.project_id === "string" ? selector.project_id : undefined;
603+
const selectorProjectName =
604+
typeof selector.project_name === "string"
605+
? selector.project_name
606+
: undefined;
607+
608+
const scoreDescriptors = scores.map((s: unknown, i: number) => ({
609+
name: extractScoreName(s, i),
610+
}));
611+
612+
const evaluatorDefinition: JsonObject = {
613+
scores: scoreDescriptors as JsonValue,
614+
};
615+
616+
const rawParams = evalObj.parameters;
617+
if (rawParams !== undefined && rawParams !== null) {
618+
const marker =
619+
rawParams !== null &&
620+
typeof rawParams === "object" &&
621+
(rawParams as Record<string, unknown>).__braintrust_parameters_marker === true;
622+
if (marker) {
623+
const paramObj = rawParams as Record<string, unknown>;
624+
evaluatorDefinition.parameters = toJsonValue({
625+
type: "braintrust.parameters",
626+
schema: paramObj.schema,
627+
source: {
628+
parametersId: paramObj.id,
629+
slug: paramObj.slug,
630+
name: paramObj.name,
631+
projectId: paramObj.projectId,
632+
version: paramObj.version,
633+
},
634+
} as JsonValue);
635+
} else {
636+
const serialized = toJsonValue(rawParams as JsonValue);
637+
if (serialized !== undefined) {
638+
evaluatorDefinition.parameters = serialized;
639+
}
640+
}
641+
}
642+
643+
// Sandbox entry only — task and scorer entries are pushed separately
644+
// when the eval is actually run, matching the Python SDK behavior.
645+
entries.push({
646+
kind: "code",
647+
project_id: projectId,
648+
project_name: selectorProjectName,
649+
name: `Eval ${evalName} sandbox`,
650+
slug: slugify(`${stem}-${evalName}-sandbox`),
651+
function_type: "sandbox",
652+
location: {
653+
type: "sandbox",
654+
sandbox_spec: { provider: "lambda" },
655+
entrypoints: [path.relative(process.cwd(), sourceFilePath)],
656+
eval_name: evalName,
657+
evaluator_definition: evaluatorDefinition as JsonValue,
658+
} as JsonValue,
659+
metadata: { _bt_sandbox_group_name: stem },
660+
});
661+
}
662+
663+
return entries;
664+
}
665+
561666
async function processFile(filePath: string): Promise<ManifestFile> {
562667
const absolutePath = path.resolve(process.cwd(), filePath);
563668
const fallbackRegistry = freshRegistry();
@@ -577,6 +682,10 @@ async function processFile(filePath: string): Promise<ManifestFile> {
577682
registry.parameters as EventRegistryItem[],
578683
false,
579684
)),
685+
...collectEvaluatorEntries(
686+
registry.evaluators as Record<string, unknown>,
687+
absolutePath,
688+
),
580689
];
581690

582691
return {

0 commit comments

Comments
 (0)