IntelPython · vchamarthi · May 4, 2026 · May 11, 2026 · May 15, 2026 · ndgrigorian
@@ -0,0 +1,20 @@
+{
+    "version": 1,
+    "project": "mkl_umath",
+    "project_url": "https://github.com/IntelPython/mkl_umath",
+    "repo": "..",
+    "branches": [
+        "main"
+    ],
+    "environment_type": "existing",
+    "benchmark_dir": "benchmarks",
+    "env_dir": ".asv/env",
+    "results_dir": ".asv/results",
+    "html_dir": ".asv/html",
+    "show_commit_url": "https://github.com/IntelPython/mkl_umath/commit/",
+    "build_cache_size": 2,
+    "default_benchmark_timeout": 1500,
+    "regressions_thresholds": {
+        ".*": 0.2
+    }
+}
@@ -0,0 +1,4 @@
+# Trigger MKL patching once per ASV worker process.
+# ASV uses --launch-method spawn in CI, so each worker is a fresh process
+# and this runs exactly once before any benchmark is collected or timed.
+from . import _patch_setup  # noqa: F401
@@ -0,0 +1,74 @@
+"""MKL patch setup — executed once per ASV worker process at import time.
+
+Patches NumPy with Intel MKL implementations for fft, random, and umath.
+Hard-fails with a descriptive RuntimeError if any package is missing or the
+patch does not take effect, so benchmarks never silently run on stock NumPy.
+
+Visible output goes to stderr; pass --show-stderr to ``asv run`` to see it.
+"""
+
+import sys
+
+_PATCH_MAP = [
+    ("mkl_fft",    "patch_numpy_fft"),
+    ("mkl_random", "patch_numpy_random"),
+    ("mkl_umath",  "patch_numpy_umath"),
+]
+
+
+def _apply_patches():
+    patched = {}
+
+    for mod_name, patch_fn_name in _PATCH_MAP:
+        try:
+            mod = __import__(mod_name)
+        except ImportError as exc:
+            raise RuntimeError(
+                f"[mkl-patch] Cannot import {mod_name}: {exc}\n"
+                f"  Ensure the conda env contains {mod_name} from the Intel channel.\n"
+                f"  Required channels: https://software.repos.intel.com/python/conda"
+            ) from exc
+
+        patch_fn = getattr(mod, patch_fn_name, None)
+        if patch_fn is None:
+            raise RuntimeError(
+                f"[mkl-patch] {mod_name} has no {patch_fn_name}(). "
+                f"Upgrade {mod_name} to a version that exposes the stock-numpy patch API."
+            )
+
+        try:
+            patch_fn()
+        except Exception as exc:
+            raise RuntimeError(
+                f"[mkl-patch] {mod_name}.{patch_fn_name}() raised: {exc!r}"
+            ) from exc
+
+        is_patched_fn = getattr(mod, "is_patched", None)
+        if callable(is_patched_fn) and not is_patched_fn():
+            raise RuntimeError(
+                f"[mkl-patch] {mod_name}.is_patched() returned False after patching. "
+                f"NumPy may have been imported before patching in a conflicting state."
+            )
+
+        patched[mod_name] = mod
+
+    # Verbose attribution — verify numpy-level dispatch changed hands
+    import numpy as np
+
+    _attr_checks = {
+        "mkl_fft":    lambda: np.fft.fft.__module__,
+        "mkl_random": lambda: np.random.random.__module__,
+        "mkl_umath":  lambda: np.exp.__module__,
+    }
+    for mod_name in patched:
+        try:
+            attr = _attr_checks[mod_name]()
+        except Exception:
+            attr = "unknown"
+        sys.stderr.write(f"[mkl-patch] {mod_name}: numpy dispatch → {attr}\n")
+
+    sys.stderr.write("[mkl-patch] ALL OK — mkl_fft, mkl_random, mkl_umath active\n")
+    sys.stderr.flush()
+
+
+_apply_patches()
@@ -0,0 +1,98 @@
+"""Micro-benchmarks for mkl_umath exponential and logarithm ufuncs.
+
+Each class times a single ufunc over a Cartesian product of
+  dtype  ∈ [float32, float64]
+  size   ∈ [10_000, 100_000, 1_000_000]
+
+Arrays are pre-allocated in setup() and reused across timing calls.
+Patching is applied once at package import via benchmarks._patch_setup.
+"""
+
+import numpy as np
+
+
+class BenchExp:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        # float32 overflows exp around 88.7; use [-10, 10] safe for both dtypes
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(-10.0, 10.0, size).astype(dtype)
+
+    def time_exp(self, dtype, size):
+        np.exp(self.x)
+
+
+class BenchExp2:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        # float32 overflows exp2 around 127
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(-10.0, 10.0, size).astype(dtype)
+
+    def time_exp2(self, dtype, size):
+        np.exp2(self.x)
+
+
+class BenchExpm1:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(-10.0, 10.0, size).astype(dtype)
+
+    def time_expm1(self, dtype, size):
+        np.expm1(self.x)
+
+
+class BenchLog:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(1e-3, 1e3, size).astype(dtype)
+
+    def time_log(self, dtype, size):
+        np.log(self.x)
+
+
+class BenchLog2:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(1e-3, 1e3, size).astype(dtype)
+
+    def time_log2(self, dtype, size):
+        np.log2(self.x)
+
+
+class BenchLog10:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(1e-3, 1e3, size).astype(dtype)
+
+    def time_log10(self, dtype, size):
+        np.log10(self.x)
+
+
+class BenchLog1p:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        # log1p(x) is defined for x > -1; use [0, 10] which is always safe
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(0.0, 10.0, size).astype(dtype)
+
+    def time_log1p(self, dtype, size):
+        np.log1p(self.x)
@@ -0,0 +1,84 @@
+"""Micro-benchmarks for mkl_umath sqrt, cbrt, and miscellaneous ufuncs.
+
+Each class times a single ufunc over a Cartesian product of
+  dtype  ∈ [float32, float64]
+  size   ∈ [10_000, 100_000, 1_000_000]
+
+Arrays are pre-allocated in setup() and reused across timing calls.
+Patching is applied once at package import via benchmarks._patch_setup.
+"""
+
+import numpy as np
+
+
+class BenchSqrt:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(0.0, 100.0, size).astype(dtype)
+
+    def time_sqrt(self, dtype, size):
+        np.sqrt(self.x)
+
+
+class BenchCbrt:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(-100.0, 100.0, size).astype(dtype)
+
+    def time_cbrt(self, dtype, size):
+        np.cbrt(self.x)
+
+
+class BenchSquare:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(-10.0, 10.0, size).astype(dtype)
+
+    def time_square(self, dtype, size):
+        np.square(self.x)
+
+
+class BenchFabs:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(-100.0, 100.0, size).astype(dtype)
+
+    def time_fabs(self, dtype, size):
+        np.fabs(self.x)
+
+
+class BenchAbsolute:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(-100.0, 100.0, size).astype(dtype)
+
+    def time_absolute(self, dtype, size):
+        np.absolute(self.x)
+
+
+class BenchReciprocal:
+    params = (["float32", "float64"], [10_000, 100_000, 1_000_000])
+    param_names = ["dtype", "size"]
+
+    def setup(self, dtype, size):
+        # Avoid values near zero to prevent inf results dominating timing
+        rng = np.random.default_rng(42)
+        self.x = rng.uniform(0.01, 100.0, size).astype(dtype)
+
+    def time_reciprocal(self, dtype, size):
+        np.reciprocal(self.x)