@@ -87,7 +87,7 @@ class File(Identifiers):
8787 """A File on Humanloop (Flow, Prompt, Tool, Evaluator)."""
8888
8989 type : NotRequired [FileType ]
90- """The type of File this function relates to on Humanloop."""
90+ """The type of File this callable relates to on Humanloop."""
9191 version : NotRequired [Version ]
9292 """The contents uniquely define the version of the File on Humanloop."""
9393 callable : Callable
@@ -143,8 +143,9 @@ class EvaluatorCheck(BaseModel):
143143
144144 path : str
145145 """The path of the Evaluator used in the check."""
146- improvement_check : bool
147- """Whether the latest version of your function has improved across the Dataset for a specific Evaluator."""
146+ # TODO: Add number valence and improvement check
147+ # improvement_check: bool
148+ # """Whether the latest version of your function has improved across the Dataset for a specific Evaluator."""
148149 score : float
149150 """The score of the latest version of your function for a specific Evaluator."""
150151 delta : float
@@ -415,7 +416,7 @@ def process_datapoint(datapoint: Datapoint):
415416 # (Or the logs would not be helpful)
416417 return checks
417418 for evaluator in evaluators :
418- improvement_check , score , delta = check_evaluation_improvement (
419+ _ , score , delta = check_evaluation_improvement (
419420 evaluation = evaluation ,
420421 stats = stats ,
421422 evaluator_path = evaluator ["path" ],
@@ -434,7 +435,8 @@ def process_datapoint(datapoint: Datapoint):
434435 checks .append (
435436 EvaluatorCheck (
436437 path = evaluator ["path" ],
437- improvement_check = improvement_check ,
438+ # TODO: Add back in with number valence on Evaluators
439+ # improvement_check=improvement_check,
438440 score = score ,
439441 delta = delta ,
440442 threshold = threshold ,
@@ -590,10 +592,10 @@ def check_evaluation_improvement(
590592 previous_score = get_score_from_evaluator_stat (stat = previous_evaluator_stat )
591593 diff = round (latest_score - previous_score , 2 )
592594 if diff >= 0 :
593- logger .info (f"{ GREEN } ✅ Improvement of [{ diff } ] for evaluator { evaluator_path } { RESET } " )
595+ logger .info (f"{ CYAN } Change of [{ diff } ] for Evaluator { evaluator_path } { RESET } " )
594596 return True , latest_score , diff
595597 else :
596- logger .info (f"{ RED } ❌ Regression of [{ diff } ] for evaluator { evaluator_path } { RESET } " )
598+ logger .info (f"{ CYAN } Change of [{ diff } ] for Evaluator { evaluator_path } { RESET } " )
597599 return False , latest_score , diff
598600 else :
599601 raise ValueError (f"Evaluator { evaluator_path } not found in the stats." )
0 commit comments