Skip to content

Commit ab715bf

Browse files
Improve evals run (#23)
* suppress unhelpful logging of "No previous versions to compare with." * number of evaluators when there's only one evaluatee * print link to evaluation once more after results
1 parent 5ef02c9 commit ab715bf

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

src/humanloop/eval_utils.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ def process_datapoint(datapoint: Datapoint):
377377

378378
# Execute the function and send the logs to Humanloop in parallel
379379
total_datapoints = len(hl_dataset.datapoints)
380-
logger.info(f"\n{CYAN}Navigate to your evals:{RESET}\n{evaluation.url}\n")
380+
logger.info(f"\n{CYAN}Navigate to your Evaluation:{RESET}\n{evaluation.url}\n")
381381
logger.info(f"{CYAN}{type_.capitalize()} Version ID: {hl_file.version_id}{RESET}")
382382
logger.info(f"{CYAN}Run ID: {batch_id}{RESET}")
383383

@@ -398,7 +398,7 @@ def process_datapoint(datapoint: Datapoint):
398398

399399
# Wait for the Evaluation to complete then print the results
400400
complete = False
401-
stats = None
401+
402402
while not complete:
403403
stats = client.evaluations.get_stats(id=evaluation.id)
404404
logger.info(f"\r{stats.progress}")
@@ -410,6 +410,10 @@ def process_datapoint(datapoint: Datapoint):
410410
logger.info(stats.report)
411411

412412
checks: List[EvaluatorCheck] = []
413+
if all(evaluator.get("threshold") is None for evaluator in evaluators) and len(stats.version_stats) == 1:
414+
# Skip `check_evaluation_improvement` if no thresholds were provided and there is only one run.
415+
# (Or the logs would not be helpful)
416+
return checks
413417
for evaluator in evaluators:
414418
improvement_check, score, delta = check_evaluation_improvement(
415419
evaluation=evaluation,
@@ -437,6 +441,7 @@ def process_datapoint(datapoint: Datapoint):
437441
threshold_check=threshold_check,
438442
)
439443
)
444+
logger.info(f"\n{CYAN}View your Evaluation:{RESET}\n{evaluation.url}\n")
440445
return checks
441446

442447

0 commit comments

Comments
 (0)