44from bobber .lib .exit_codes import BASELINE_FAILURE
55from bobber .lib .analysis .common import bcolors
66from bobber .lib .system .file_handler import read_yaml
7- from typing import Optional , Tuple
7+ from typing import NoReturn , Optional , Tuple
88
99
1010# Map the dicitonary keys in the baseline to human-readable names.
@@ -111,6 +111,8 @@ def evaluate_fio(baselines: dict, results: dict, test_name: str, failures: int,
111111 threshold.
112112 """
113113 for test , value in baselines .items ():
114+ if test_name not in results .keys ():
115+ continue
114116 if test_name == 'bandwidth' :
115117 unit = '(GB/s)'
116118 expected = value / 1000000000
@@ -155,6 +157,8 @@ def evaluate_nccl(baseline: dict, results: dict, failures: int,
155157 Returns an ``integer`` of the number of results that have not met the
156158 threshold.
157159 """
160+ if 'max_bus_bw' not in baseline .keys ():
161+ return failures
158162 print (' NCCL Max Bus Bandwidth (GB/s)' )
159163 expected = baseline ['max_bus_bw' ]
160164 got = results ['nccl' ]['max_bus_bw' ]
@@ -196,6 +200,8 @@ def evaluate_dali(baselines: dict, results: dict, test_name: str,
196200 threshold.
197201 """
198202 for test , value in baselines .items ():
203+ if test not in results .keys ():
204+ continue
199205 print (f' DALI { test } (images/second)' )
200206 expected = value
201207 got = round (results [test ]['average images/second' ], 3 )
@@ -208,7 +214,7 @@ def evaluate_dali(baselines: dict, results: dict, test_name: str,
208214
209215
210216def evaluate_test (baseline : dict , results : dict , system_count : int ,
211- tolerance : int ) :
217+ tolerance : int , failures : int ) -> int :
212218 """
213219 Evaluate all tests for N-nodes and compare against the baseline.
214220
@@ -228,9 +234,16 @@ def evaluate_test(baseline: dict, results: dict, system_count: int,
228234 tolerance : int
229235 An ``int`` of the percentage below the threshold to still mark as
230236 passing.
231- """
232- failures = 0
237+ failures : int
238+ An ``integer`` of the number of results that have not met the
239+ threshold.
233240
241+ Returns
242+ -------
243+ int
244+ Returns an ``integer`` of the number of results that have not met the
245+ threshold.
246+ """
234247 for test_name , test_values in baseline .items ():
235248 print ('-' * 80 )
236249 if test_name in ['bandwidth' , 'iops' ]:
@@ -244,18 +257,11 @@ def evaluate_test(baseline: dict, results: dict, system_count: int,
244257 test_name ,
245258 failures ,
246259 tolerance )
247-
248- if failures > 0 :
249- print ('-' * 80 )
250- print (f'{ failures } tests did not meet the suggested criteria!' )
251- print ('See results above for failed tests and verify setup.' )
252- # Throw a non-zero exit status so any tools that read codes will catch
253- # that the baseline was not met.
254- sys .exit (BASELINE_FAILURE )
260+ return failures
255261
256262
257263def compare_baseline (results : dict , baseline : str , tolerance : int ,
258- custom : Optional [bool ] = False ):
264+ custom : Optional [bool ] = False ) -> NoReturn :
259265 """
260266 Compare a baseline against parsed results.
261267
@@ -281,6 +287,8 @@ def compare_baseline(results: dict, baseline: str, tolerance: int,
281287 passed from a YAML file. If `False`, it will compare against an
282288 included baseline.
283289 """
290+ failures = 0
291+
284292 print ('=' * 80 )
285293 print ('Baseline assessment' )
286294 if custom :
@@ -299,9 +307,18 @@ def compare_baseline(results: dict, baseline: str, tolerance: int,
299307 print ('Skipping...' )
300308 continue
301309 print (f' { system_count } System(s)' )
302- evaluate_test (baseline_results ,
303- results ['systems' ][str (system_count )],
304- system_count ,
305- tolerance )
310+ failures = evaluate_test (baseline_results ,
311+ results ['systems' ][str (system_count )],
312+ system_count ,
313+ tolerance ,
314+ failures )
315+
316+ if failures > 0 :
317+ print ('-' * 80 )
318+ print (f'{ failures } test(s) did not meet the suggested criteria!' )
319+ print ('See results above for failed tests and verify setup.' )
320+ # Throw a non-zero exit status so any tools that read codes will catch
321+ # that the baseline was not met.
322+ sys .exit (BASELINE_FAILURE )
306323
307324 print ('=' * 80 )
0 commit comments