33import inspect
44import json
55import os
6+ import re
67import sys
78from contextlib import nullcontext
89from typing import Any
@@ -28,9 +29,9 @@ def to_json_value(value: Any) -> Any:
2829 return [to_json_value (item ) for item in value ]
2930 if isinstance (value , dict ):
3031 return {str (key ): to_json_value (val ) for key , val in value .items ()}
31- if hasattr (value , "model_dump" ):
32+ if hasattr (value , "model_dump" ) and not isinstance ( value , type ) :
3233 return to_json_value (value .model_dump ())
33- if hasattr (value , "dict" ):
34+ if hasattr (value , "dict" ) and not isinstance ( value , type ) :
3435 return to_json_value (value .dict ())
3536 if hasattr (value , "__dict__" ):
3637 result : dict [str , Any ] = {}
@@ -42,21 +43,31 @@ def to_json_value(value: Any) -> Any:
4243 return str (value )
4344
4445
45- def load_framework_globals () -> tuple [Any , Any , Any ]:
46+ def load_framework_globals () -> tuple [Any , Any , Any , Any ]:
4647 # Prefer current SDK layout first:
4748 # - braintrust.framework2 exposes module-level `global_`
4849 # - braintrust.framework exposes `_set_lazy_load`
4950 try :
5051 from braintrust .framework import _set_lazy_load as lazy
5152 from braintrust .framework2 import global_ as global_state
5253
53- return global_state .functions , global_state .prompts , lazy
54+ try :
55+ from braintrust .framework import _evals
56+ except (ImportError , ModuleNotFoundError ):
57+ _evals = None
58+
59+ return global_state .functions , global_state .prompts , lazy , _evals
5460 except (ImportError , ModuleNotFoundError ):
5561 # Backward compatibility with older SDK layout.
5662 from braintrust .framework2 .global_ import functions , prompts
5763 from braintrust .framework2 .lazy_load import _set_lazy_load as lazy
5864
59- return functions , prompts , lazy
65+ try :
66+ from braintrust .framework import _evals
67+ except (ImportError , ModuleNotFoundError ):
68+ _evals = None
69+
70+ return functions , prompts , lazy , _evals
6071
6172
6273def normalize_project_selector (project : Any ) -> tuple [str | None , str | None ]:
@@ -277,16 +288,113 @@ async def collect_function_event_entries(prompts_registry: Any) -> list[dict[str
277288 return entries
278289
279290
291+ def slugify (text : str ) -> str :
292+ return re .sub (r"^-|-$" , "" , re .sub (r"[^a-z0-9]+" , "-" , text .lower ()))
293+
294+
295+ def collect_evaluator_entries (evals_registry : Any , source_file : str ) -> list [dict [str , Any ]]:
296+ if evals_registry is None :
297+ return []
298+
299+ evaluators = getattr (evals_registry , "evaluators" , None )
300+ if not evaluators or not isinstance (evaluators , dict ):
301+ return []
302+
303+ entries : list [dict [str , Any ]] = []
304+ stem_base , _ = os .path .splitext (os .path .basename (source_file ))
305+ stem = re .sub (r"\.eval$" , "" , stem_base )
306+
307+ for eval_name , instance in evaluators .items ():
308+ if instance is None :
309+ continue
310+ evaluator = getattr (instance , "evaluator" , None )
311+ if evaluator is None :
312+ continue
313+
314+ project_name = getattr (evaluator , "project_name" , None )
315+ project_id , proj_name = normalize_project_selector (
316+ {"project_name" : project_name } if isinstance (project_name , str ) else None
317+ )
318+
319+ scores = getattr (evaluator , "scores" , []) or []
320+ score_descriptors = [
321+ {"name" : getattr (score , "__name__" , f"scorer_{ i } " )}
322+ for i , score in enumerate (scores )
323+ ]
324+
325+ evaluator_definition : dict [str , Any ] = {"scores" : score_descriptors }
326+
327+ raw_params = getattr (evaluator , "parameters" , None )
328+ if raw_params is not None :
329+ marker = getattr (raw_params , "__braintrust_parameters_marker" , None )
330+ if marker is True :
331+ evaluator_definition ["parameters" ] = {
332+ "type" : "braintrust.parameters" ,
333+ "schema" : getattr (raw_params , "schema" , None ),
334+ "source" : {
335+ "parametersId" : getattr (raw_params , "id" , None ),
336+ "slug" : getattr (raw_params , "slug" , None ),
337+ "name" : getattr (raw_params , "name" , None ),
338+ "projectId" : getattr (raw_params , "projectId" , None ),
339+ "version" : getattr (raw_params , "version" , None ),
340+ },
341+ }
342+ else :
343+ # Use the braintrust SDK's parameters_to_json_schema when
344+ # available so that Pydantic model classes are converted to
345+ # proper staticParametersSchema entries (type: "data" with a
346+ # JSON Schema) that the UI can parse.
347+ try :
348+ from braintrust .parameters import parameters_to_json_schema
349+ serialized = parameters_to_json_schema (raw_params )
350+ except Exception :
351+ serialized = to_json_value (raw_params )
352+ if serialized is not None :
353+ evaluator_definition ["parameters" ] = serialized
354+
355+ base_entry : dict [str , Any ] = {"kind" : "code" }
356+ if project_id :
357+ base_entry ["project_id" ] = project_id
358+ if proj_name :
359+ base_entry ["project_name" ] = proj_name
360+
361+ # Sandbox entry only — task and scorer entries are pushed separately
362+ # when the eval is actually run, matching the Python SDK behavior.
363+ sandbox_entry = {
364+ ** base_entry ,
365+ "name" : f"Eval { eval_name } sandbox" ,
366+ "slug" : slugify (f"{ stem } -{ eval_name } -sandbox" ),
367+ "function_type" : "sandbox" ,
368+ "location" : {
369+ "type" : "sandbox" ,
370+ "sandbox_spec" : {"provider" : "lambda" },
371+ "entrypoints" : [os .path .relpath (source_file )],
372+ "eval_name" : eval_name ,
373+ "evaluator_definition" : evaluator_definition ,
374+ },
375+ "metadata" : {"_bt_sandbox_group_name" : stem },
376+ }
377+ entries .append (sandbox_entry )
378+
379+ return entries
380+
381+
280382async def process_file (file_path : str ) -> dict [str , Any ]:
281383 abs_path = os .path .abspath (file_path )
282384 cwd = os .getcwd ()
283385 if cwd not in sys .path :
284386 sys .path .insert (0 , cwd )
285387
286- purge_local_modules (cwd , preserve_modules = {__name__ , "python_runner_common" })
287- functions_registry , prompts_registry , lazy_loader = load_framework_globals ()
388+ functions_registry , prompts_registry , lazy_loader , evals_registry = load_framework_globals ()
288389 clear_registry (functions_registry )
289390 clear_registry (prompts_registry )
391+ if (
392+ evals_registry is not None
393+ and hasattr (evals_registry , "evaluators" )
394+ and isinstance (evals_registry .evaluators , dict )
395+ ):
396+ evals_registry .evaluators .clear ()
397+ purge_local_modules (cwd , preserve_modules = {__name__ , "python_runner_common" })
290398
291399 module_name = import_module_name_from_cwd (cwd , abs_path )
292400 if module_name is None :
@@ -298,12 +406,13 @@ async def process_file(file_path: str) -> dict[str, Any]:
298406 import_file (module_name , abs_path , extra_paths )
299407 code_entries = collect_code_entries (functions_registry )
300408 event_entries = await collect_function_event_entries (prompts_registry )
301- entries = [* code_entries , * event_entries ]
409+ evaluator_entries = collect_evaluator_entries (evals_registry , abs_path )
410+ entries = [* code_entries , * event_entries , * evaluator_entries ]
302411 file_manifest : dict [str , Any ] = {
303412 "source_file" : abs_path ,
304413 "entries" : entries ,
305414 }
306- if code_entries :
415+ if code_entries or evaluator_entries :
307416 runner_root = os .path .dirname (os .path .abspath (__file__ ))
308417 project_root = os .path .abspath (cwd )
309418 path_rest : list [str ] = []
@@ -350,13 +459,24 @@ async def process_file(file_path: str) -> dict[str, Any]:
350459 continue
351460 seen_sources .add (init_source )
352461 bundled_sources .append (init_source )
462+ # Compute entry_module as a CWD-relative dotted path so that the
463+ # archive root inferred by push.rs walks back to CWD, matching
464+ # the Python SDK behavior and allowing sibling-package imports.
465+ rel_path = os .path .relpath (abs_path , cwd )
466+ archive_module = re .sub (r"\.py$" , "" , rel_path ).replace ("-" , "_" ).replace (os .sep , "." )
353467 file_manifest ["python_bundle" ] = {
354- "entry_module" : module_name ,
468+ "entry_module" : archive_module ,
355469 "sources" : bundled_sources ,
356470 }
357471
358472 clear_registry (functions_registry )
359473 clear_registry (prompts_registry )
474+ if (
475+ evals_registry is not None
476+ and hasattr (evals_registry , "evaluators" )
477+ and isinstance (evals_registry .evaluators , dict )
478+ ):
479+ evals_registry .evaluators .clear ()
360480 return file_manifest
361481
362482
0 commit comments