Skip to content

Commit a97d2c8

Browse files
committed
Avoid returning improvement on number evaluators. Add back in when we return number valence from the API.
1 parent 1637bf5 commit a97d2c8

File tree

1 file changed

+9
-7
lines changed

1 file changed

+9
-7
lines changed

src/humanloop/eval_utils.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class File(Identifiers):
8787
"""A File on Humanloop (Flow, Prompt, Tool, Evaluator)."""
8888

8989
type: NotRequired[FileType]
90-
"""The type of File this function relates to on Humanloop."""
90+
"""The type of File this callable relates to on Humanloop."""
9191
version: NotRequired[Version]
9292
"""The contents uniquely define the version of the File on Humanloop."""
9393
callable: Callable
@@ -143,8 +143,9 @@ class EvaluatorCheck(BaseModel):
143143

144144
path: str
145145
"""The path of the Evaluator used in the check."""
146-
improvement_check: bool
147-
"""Whether the latest version of your function has improved across the Dataset for a specific Evaluator."""
146+
# TODO: Add number valence and improvement check
147+
# improvement_check: bool
148+
# """Whether the latest version of your function has improved across the Dataset for a specific Evaluator."""
148149
score: float
149150
"""The score of the latest version of your function for a specific Evaluator."""
150151
delta: float
@@ -415,7 +416,7 @@ def process_datapoint(datapoint: Datapoint):
415416
# (Or the logs would not be helpful)
416417
return checks
417418
for evaluator in evaluators:
418-
improvement_check, score, delta = check_evaluation_improvement(
419+
_, score, delta = check_evaluation_improvement(
419420
evaluation=evaluation,
420421
stats=stats,
421422
evaluator_path=evaluator["path"],
@@ -434,7 +435,8 @@ def process_datapoint(datapoint: Datapoint):
434435
checks.append(
435436
EvaluatorCheck(
436437
path=evaluator["path"],
437-
improvement_check=improvement_check,
438+
# TODO: Add back in with number valence on Evaluators
439+
# improvement_check=improvement_check,
438440
score=score,
439441
delta=delta,
440442
threshold=threshold,
@@ -590,10 +592,10 @@ def check_evaluation_improvement(
590592
previous_score = get_score_from_evaluator_stat(stat=previous_evaluator_stat)
591593
diff = round(latest_score - previous_score, 2)
592594
if diff >= 0:
593-
logger.info(f"{GREEN}✅ Improvement of [{diff}] for evaluator {evaluator_path}{RESET}")
595+
logger.info(f"{CYAN}Change of [{diff}] for Evaluator {evaluator_path}{RESET}")
594596
return True, latest_score, diff
595597
else:
596-
logger.info(f"{RED}❌ Regression of [{diff}] for evaluator {evaluator_path}{RESET}")
598+
logger.info(f"{CYAN}Change of [{diff}] for Evaluator {evaluator_path}{RESET}")
597599
return False, latest_score, diff
598600
else:
599601
raise ValueError(f"Evaluator {evaluator_path} not found in the stats.")

0 commit comments

Comments
 (0)