KernelTuner · maric-a-b · Apr 2, 2025 · May 8, 2025 · May 8, 2025 · Aug 13, 2025
diff --git a/.gitignore b/.gitignore
@@ -31,6 +31,7 @@ temp_*.*
 .DS_Store
 .python-version
 .nox
+.venv
 
 ### Visual Studio Code ###
 !.vscode/settings.json

diff --git a/kernel_tuner/__init__.py b/kernel_tuner/__init__.py
@@ -1,5 +1,10 @@
 from kernel_tuner.integration import store_results, create_device_targets
-from kernel_tuner.interface import tune_kernel, tune_kernel_T1, run_kernel
+from kernel_tuner.interface import (
+    tune_kernel,
+    tune_kernel_T1,
+    tune_cache,
+    run_kernel,
+)
 
 from importlib.metadata import version
 

diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
@@ -438,7 +438,7 @@
                self.tegra.gr_clock = instance.params["tegra_gr_clock"]


    def benchmark(self, func, gpu_args, instance, verbose, objective, skip_nvml_setting=False):
        """Benchmark the kernel instance."""
        logging.debug("benchmark " + instance.name)
        logging.debug("thread block dimensions x,y,z=%d,%d,%d", *instance.threads)
@@ -480,11 +480,14 @@
                     print(
                         f"skipping config {util.get_instance_string(instance.params)} reason: too many resources requested for launch"
                     )
-                result[objective] = util.RuntimeFailedConfig()
+                result['__error__'] = util.RuntimeFailedConfig()
             else:
                 logging.debug("benchmark encountered runtime failure: " + str(e))
                 print("Error while benchmarking:", instance.name)
                 raise e
+
+        assert util.check_result_type(result), "The error in a result MUST be an actual error."
+
         return result
 
     def check_kernel_output(
@@ -567,7 +570,7 @@
 
         instance = self.create_kernel_instance(kernel_source, kernel_options, params, verbose)
         if isinstance(instance, util.ErrorConfig):
-            result[to.objective] = util.InvalidConfig()
+            result['__error__'] = util.InvalidConfig()
         else:
             # Preprocess the argument list. This is required to deal with `MixedPrecisionArray`s
             gpu_args = _preprocess_gpu_arguments(gpu_args, params)
@@ -577,7 +580,7 @@
                 start_compilation = time.perf_counter()
                 func = self.compile_kernel(instance, verbose)
                 if not func:
-                    result[to.objective] = util.CompilationFailedConfig()
+                    result['__error__'] = util.CompilationFailedConfig()
                 else:
                     # add shared memory arguments to compiled module
                     if kernel_options.smem_args is not None:
@@ -622,6 +625,8 @@
         result["verification_time"] = last_verification_time or 0
         result["benchmark_time"] = last_benchmark_time or 0
 
+        assert util.check_result_type(result), "The error in a result MUST be an actual error."
+
         return result
 
     def compile_kernel(self, instance, verbose):

diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py
@@ -20,7 +20,7 @@
 
     :returns: the current version of the T1 schemas and the JSON string of the schema
     :rtype: string, string
-    """    
+    """
     current_version = "1.0.0"
     input_file = schema_dir.joinpath(f"T1/{current_version}/input-schema.json")
     with input_file.open() as fh:
@@ -30,9 +30,9 @@
 def get_input_file(filepath: Path, validate=True) -> dict[str, any]:
     """Load the T1 input file from the given path, validates it and returns contents if valid.
 
-    :param filepath: Path to the input file to load. 
-    :returns: the contents of the file if valid. 
-    """    
+    :param filepath: Path to the input file to load.
+    :returns: the contents of the file if valid.
+    """
     with filepath.open() as fp:
         input_file = json.load(fp)
     if validate:
@@ -57,20 +57,38 @@
     return current_version, json_string
 
 
-def get_configuration_validity(objective) -> str:
+def get_configuration_validity(error) -> str:
     """Convert internal Kernel Tuner error to string."""
     errorstring: str
-    if not isinstance(objective, util.ErrorConfig):
+    if not isinstance(error, util.ErrorConfig):
         errorstring = "correct"
     else:
-        if isinstance(objective, util.CompilationFailedConfig):
+        if isinstance(error, util.CompilationFailedConfig):
             errorstring = "compile"
-        elif isinstance(objective, util.RuntimeFailedConfig):
+        elif isinstance(error, util.RuntimeFailedConfig):
             errorstring = "runtime"
-        elif isinstance(objective, util.InvalidConfig):
+        elif isinstance(error, util.InvalidConfig):
             errorstring = "constraints"
         else:
-            raise ValueError(f"Unkown objective type {type(objective)}, value {objective}")
+            raise ValueError(f"Unkown error type {type(error)}, value {error}")
+    return errorstring
+
+
+def get_configuration_validity2(result) -> str:
+    """Convert internal Kernel Tuner error to string."""
+    errorstring: str
+    if "__error__" not in result:
+        errorstring = "correct"
+    else:
+        error = result["__error__"]
+        if isinstance(error, util.CompilationFailedConfig):
+            errorstring = "compile"
+        elif isinstance(error, util.RuntimeFailedConfig):
+            errorstring = "runtime"
+        elif isinstance(error, util.InvalidConfig):
+            errorstring = "constraints"
+        else:
+            raise ValueError(f"Unkown error type {type(error)}, value {error}")
     return errorstring
 
 
@@ -88,7 +106,7 @@
        filepath.mkdir()


 def get_t4_results(results, tune_params, objective="time"):
    """Get the obtained auto-tuning results in a dictionary.

    This function produces a dictionary that adheres to the T4 auto-tuning output JSON schema.
@@ -103,6 +121,11 @@
     :type objective: string
 
     """
+    assert not isinstance(objective, (list, tuple))
+
+    if isinstance(objective, (list, tuple)) and len(objective) > 1:
+        raise ValueError("The T4 format does not support multiple objectives.")
+
     timing_keys = ["compile_time", "benchmark_time", "framework_time", "strategy_time", "verification_time"]
     not_measurement_keys = list(tune_params.keys()) + timing_keys + ["timestamp"] + ["times"]
 
@@ -129,7 +152,8 @@
         out["times"] = timings
 
         # encode the validity of the configuration
-        out["invalidity"] = get_configuration_validity(result[objective])
+        # out["invalidity"] = get_configuration_validity(result[objective])
+        out["invalidity"] = get_configuration_validity2(result)
 
         # Kernel Tuner does not support producing results of configs that fail the correctness check
         # therefore correctness is always 1
@@ -142,11 +166,11 @@
                measurements.append(dict(name=key, value=value, unit="ms" if key.startswith("time") else ""))
         out["measurements"] = measurements
 
         # objectives
-        # In Kernel Tuner we currently support only one objective at a time, this can be a user-defined
-        # metric that combines scores from multiple different quantities into a single value to support
-        # multi-objective tuning however.
-        out["objectives"] = [objective]
+        # out["objectives"] = objective
+        objectives = [objective] if isinstance(objective, str) else list(objective)
+        assert isinstance(objectives, list)
+        out["objectives"] = objectives
 
         # append to output
         output_data.append(out)
@@ -310,7 +334,7 @@
         spec = spec_from_file_location(module_name, file_path)
         if spec is None:
             raise ImportError(f"Could not load spec from {file_path}")
-        
+
         # create a module from the spec and execute it
         module = module_from_spec(spec)
         spec.loader.exec_module(module)
@@ -322,6 +346,6 @@
         module = load_module(file_path.stem)
     except ImportError:
         module = load_module(f"{file_path.parent.stem}.{file_path.stem}")
-    
+
     # return the class from the module
     return getattr(module, class_name)
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
@@ -65,6 +65,7 @@
     pyatf_strategies,
     random_sample,
     simulated_annealing,
+    pymoo_minimize,
     skopt
 )
 from kernel_tuner.strategies.wrapper import OptAlgWrapper
@@ -86,6 +87,8 @@
     "skopt": skopt,
     "firefly_algorithm": firefly_algorithm,
     "bayes_opt": bayes_opt,
+    "nsga2": pymoo_minimize,
+    "nsga3": pymoo_minimize,
     "pyatf_strategies": pyatf_strategies,
 }
 
@@ -438,15 +441,15 @@
                 """Optimization objective to sort results on, consisting of a string
             that also occurs in results as a metric or observed quantity, default 'time'.
             Please see :ref:`objectives`.""",
-                "string",
+                "str | list[str]",
             ),
         ),
         (
             "objective_higher_is_better",
             (
                 """boolean that specifies whether the objective should
             be maximized (True) or minimized (False), default False.""",
-                "bool",
+                "bool | list[bool]",
             ),
         ),
         (
@@ -477,6 +480,7 @@
         ("metrics", ("specifies user-defined metrics, please see :ref:`metrics`.", "dict")),
         ("simulation_mode", ("Simulate an auto-tuning search from an existing cachefile", "bool")),
         ("observers", ("""A list of Observers to use during tuning, please see :ref:`observers`.""", "list")),
+        ("seed", ("""The random seed.""", "int")),
     ]
 )
 
@@ -555,7 +559,7 @@
 )


 def tune_kernel(
    kernel_name,
    kernel_source,
    problem_size,
@@ -590,6 +594,8 @@
     observers=None,
     objective=None,
     objective_higher_is_better=None,
+    objectives=None,
+    seed=None,
 ):
     start_overhead_time = perf_counter()
     if log:
@@ -599,8 +605,20 @@
 
     _check_user_input(kernel_name, kernelsource, arguments, block_size_names)
 
-    # default objective if none is specified
-    objective, objective_higher_is_better = get_objective_defaults(objective, objective_higher_is_better)
+    if objectives:
+        if isinstance(objectives, dict):
+            objective = list(objectives.keys())
+            objective_higher_is_better = list(objectives.values())
+        else:
+            raise ValueError("objectives should be a dict of (objective, higher_is_better) pairs")
+    else:
+        objective, objective_higher_is_better = get_objective_defaults(objective, objective_higher_is_better)
+        objective = [objective]
+        objective_higher_is_better = [objective_higher_is_better]
+
+    assert isinstance(objective, list)
+    assert isinstance(objective_higher_is_better, list)
+    assert len(objective) == len(objective_higher_is_better)
 
     # check for forbidden names in tune parameters
     util.check_tune_params_list(tune_params, observers, simulation_mode=simulation_mode)
@@ -624,9 +642,9 @@
         if "max_fevals" in strategy_options:
             tuning_options["max_fevals"] = strategy_options["max_fevals"]
         if "time_limit" in strategy_options:
-            tuning_options["time_limit"] = strategy_options["time_limit"] 
+            tuning_options["time_limit"] = strategy_options["time_limit"]
         if "searchspace_construction_options" in strategy_options:
-            searchspace_construction_options = strategy_options["searchspace_construction_options"]         
+            searchspace_construction_options = strategy_options["searchspace_construction_options"]
 
     # log the user inputs
     logging.debug("tune_kernel called")
@@ -701,13 +719,33 @@
 
     # finished iterating over search space
     if results:  # checks if results is not empty
-        best_config = util.get_best_config(results, objective, objective_higher_is_better)
-        # add the best configuration to env
-        env["best_config"] = best_config
-        if not device_options.quiet:
-            units = getattr(runner, "units", None)
-            print("best performing configuration:")
-            util.print_config_output(tune_params, best_config, device_options.quiet, metrics, units)
+        if len(objective) == 1:
+            objective = objective[0]
+            objective_higher_is_better = objective_higher_is_better[0]
+            best_config = util.get_best_config(results, objective, objective_higher_is_better)
+            # add the best configuration to env
+            env['best_config'] = best_config
+            if not device_options.quiet:
+                units = getattr(runner, "units", None)
+                keys = list(tune_params.keys())
+                keys += [objective]
+                if metrics:
+                    keys += list(metrics.keys())
+                print(f"\nBEST PERFORMING CONFIGURATION FOR OBJECTIVE {objective}:")
+                print(util.get_config_string(best_config, keys, units))
+        else:
+            pareto_front = util.get_pareto_results(results, objective, objective_higher_is_better)
+            # add the best configuration to env
+            env['best_config'] = pareto_front
+            if not device_options.quiet:
+                units = getattr(runner, "units", None)
+                keys = list(tune_params.keys())
+                keys += list(objective)
+                if metrics:
+                    keys += list(metrics.keys)
+                print(f"\nBEST PERFORMING CONFIGURATIONS FOR OBJECTIVES: {objective}:")
+                for best_config in pareto_front:
+                    print(util.get_config_string(best_config, keys, units))
     elif not device_options.quiet:
         print("no results to report")
 
@@ -722,6 +760,28 @@
 
 tune_kernel.__doc__ = _tune_kernel_docstring
 
+
+def tune_cache(*,
+    cache_path,
+    restrictions = None,
+    **kwargs,
+):
+    cache = util.read_cache(cache_path, open_cache=False)
+    tune_args = util.infer_args_from_cache(cache)
+    _restrictions = [util.infer_restrictions_from_cache(cache)]
+
+    # Add the user provided restrictions
+    if restrictions:
+        if isinstance(restrictions, list):
+            _restrictions.extend(restrictions)
+        else:
+            raise ValueError("The restrictions must be a list()")
+
+    tune_args.update(kwargs)
+
+    return tune_kernel(**tune_args, cache=cache_path, restrictions=_restrictions, simulation_mode=True)
+
+
 _run_kernel_docstring = """Compile and run a single kernel
 
     Compiles and runs a single kernel once, given a specific instance of the kernels tuning parameters.
@@ -869,7 +929,7 @@
     strategy_options: dict={},
 ) -> tuple:
     """Call the tune function with a T1 input file.
-    
+
     The device, strategy and strategy_options can be overridden by passing a strategy name and options, otherwise the input file specification is used.
     """
     inputs = get_input_file(input_filepath)