From b2cbd649f00cb05aedf2aa03d008e00a513a8a8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathias=20Sabl=C3=A9-Meyer?= Date: Fri, 22 May 2026 00:59:53 +0100 Subject: [PATCH 1/9] Batch estimator call in GeneralizingEstimator.score `_gl_score` invoked the scorer's response method (`predict` / `predict_proba` / `decision_function`) `n_estimators * n_slices` times per fold. Stack X across slices and call the response method once per estimator, then apply the metric per slice on the resulting predictions. The batching saves on overhead and best utilises vectorized operations. Scorers without a recognised `_response_method` (e.g. `scoring=None` or custom callables) fall back to the original nested loop. --- mne/decoding/search_light.py | 75 +++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 9 deletions(-) diff --git a/mne/decoding/search_light.py b/mne/decoding/search_light.py index 81699ecd5ba..31238fca162 100644 --- a/mne/decoding/search_light.py +++ b/mne/decoding/search_light.py @@ -743,17 +743,74 @@ def _gl_score(estimators, scoring, X, y, pb): """ # FIXME: The level parallelization may be a bit high, and might be memory # consuming. Perhaps need to lower it down to the loop across X slices. - score_shape = [len(estimators), X.shape[-1]] - for jj in range(X.shape[-1]): - for ii, est in enumerate(estimators): - _score = scoring(est, X[..., jj], y) - # Initialize array of predictions on the first score iteration + n_sample, n_iter = X.shape[0], X.shape[-1] + n_train = len(estimators) + score_shape = [n_train, n_iter] + score = None + + # Detect whether we can batch the estimator. Recognised: + # * predict, + # * predict_proba + # * decision_function + # * "default" (= predict) + # * A tuple of those: roc_auc = ("decision_function", "predict_proba") + score_func = getattr(scoring, "_score_func", None) + rm = getattr(scoring, "_response_method", None) + valid = {"predict", "predict_proba", "decision_function"} + if rm == "default": + response_method = "predict" + elif isinstance(rm, str) and rm in valid: + response_method = rm + elif isinstance(rm, tuple) and all(m in valid for m in rm): + response_method = rm + else: + response_method = None + can_batch = score_func is not None and response_method is not None + + # If we can't batch we do a simple nested loop. + # Covers scoring=None / unrecognised scorers + if not can_batch: + for jj in range(n_iter): + for ii, est in enumerate(estimators): + _score = scoring(est, X[..., jj], y) + if (ii == 0) and (jj == 0): + score = np.zeros(score_shape, type(_score)) + score[ii, jj, ...] = _score + pb.update(jj * n_train + ii + 1) + return score + + # We can batch; the logic is: reshape X, predict once, reshape back, score + # First: stack X across slices for one batched response call per estimator + X_stack = np.moveaxis(X, -1, 1) + X_stack = X_stack.reshape(n_sample * n_iter, *X_stack.shape[2:]) + + # Use the provided response method, or pick the first one supported + # by the estimator + if isinstance(response_method, str): + method = response_method + else: + for m in response_method: + if hasattr(estimators[0], m): + method = m + break + + # Ensures score_func(..., **kwargs) doesn't crash when scoring._kwargs=None + kwargs = scoring._kwargs or {} + + for ii, est in enumerate(estimators): + y_pred = getattr(est, method)(X_stack) + # predict_proba returns probabilities for both classes; use the + # positive-class probabilities expected by binary scorers + if method == "predict_proba" and y_pred.ndim == 2 and y_pred.shape[1] == 2: + y_pred = y_pred[:, 1] + # Now, reshape back the prediction, then score + y_pred = y_pred.reshape((n_sample, n_iter) + y_pred.shape[1:]) + for jj in range(n_iter): + _score = scoring._sign * score_func(y, y_pred[:, jj], **kwargs) if (ii == 0) and (jj == 0): - dtype = type(_score) - score = np.zeros(score_shape, dtype) + score = np.zeros(score_shape, type(_score)) score[ii, jj, ...] = _score - - pb.update(jj * len(estimators) + ii + 1) + pb.update(ii * n_iter + jj + 1) return score From 3cef4a52ab92ce693a48c6ac3fe799f4f82d9b76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathias=20Sabl=C3=A9-Meyer?= Date: Fri, 22 May 2026 10:48:47 +0100 Subject: [PATCH 2/9] ENH: Vectorise accuracy scoring in GeneralizingEstimator.score When the scorer is `accuracy_score` with default kwargs, 1d-output (but can be multi-class), and `response_method == "predict"`, replace the per-slice `accuracy_score(y, y_pred[:, jj])` calls with one numpy reduction per estimator: `(y_pred == y[:, None]).mean(axis=0)`. Other scorers, multi-output `y`, etc. keep nested-loop behavior. --- mne/decoding/search_light.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/mne/decoding/search_light.py b/mne/decoding/search_light.py index 31238fca162..7e1cce71590 100644 --- a/mne/decoding/search_light.py +++ b/mne/decoding/search_light.py @@ -797,6 +797,15 @@ def _gl_score(estimators, scoring, X, y, pb): # Ensures score_func(..., **kwargs) doesn't crash when scoring._kwargs=None kwargs = scoring._kwargs or {} + # Fast path: vectorised accuracy. Skips the per-jj score_func call and + # computes (y_pred == y[:, None]).mean(axis=0) for all slices at once. + fast_accuracy = ( + getattr(score_func, "__name__", "") == "accuracy_score" + and not kwargs + and response_method == "predict" + and y.ndim == 1 + ) + for ii, est in enumerate(estimators): y_pred = getattr(est, method)(X_stack) # predict_proba returns probabilities for both classes; use the @@ -805,12 +814,20 @@ def _gl_score(estimators, scoring, X, y, pb): y_pred = y_pred[:, 1] # Now, reshape back the prediction, then score y_pred = y_pred.reshape((n_sample, n_iter) + y_pred.shape[1:]) - for jj in range(n_iter): - _score = scoring._sign * score_func(y, y_pred[:, jj], **kwargs) - if (ii == 0) and (jj == 0): - score = np.zeros(score_shape, type(_score)) - score[ii, jj, ...] = _score - pb.update(ii * n_iter + jj + 1) + # Either we can also score with a batch, here, or we loop again, below + if fast_accuracy: + row = scoring._sign * (y_pred == y[:, None]).mean(axis=0) + if ii == 0: + score = np.zeros(score_shape, row.dtype) + score[ii] = row + pb.update((ii + 1) * n_iter) + else: + for jj in range(n_iter): + _score = scoring._sign * score_func(y, y_pred[:, jj], **kwargs) + if (ii == 0) and (jj == 0): + score = np.zeros(score_shape, type(_score)) + score[ii, jj, ...] = _score + pb.update(ii * n_iter + jj + 1) return score From 7a66f157cde7a37884f5cd980116758e5db4b581 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathias=20Sabl=C3=A9-Meyer?= Date: Fri, 22 May 2026 12:53:57 +0100 Subject: [PATCH 3/9] MAINT: Refactor batched fast path in GeneralizingEstimator.score Replace the `fast_accuracy` flag with a `batched_score` which is either a callable if we recognized the scorer, or otherwise set to `None`. The scoring loop then branches on `batched_score`: call `batched_score(y_pred)` if `batched_score` is set, otherwise fall back to the nested loop. --- mne/decoding/search_light.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/mne/decoding/search_light.py b/mne/decoding/search_light.py index 7e1cce71590..0496079c9ac 100644 --- a/mne/decoding/search_light.py +++ b/mne/decoding/search_light.py @@ -797,14 +797,16 @@ def _gl_score(estimators, scoring, X, y, pb): # Ensures score_func(..., **kwargs) doesn't crash when scoring._kwargs=None kwargs = scoring._kwargs or {} - # Fast path: vectorised accuracy. Skips the per-jj score_func call and - # computes (y_pred == y[:, None]).mean(axis=0) for all slices at once. - fast_accuracy = ( - getattr(score_func, "__name__", "") == "accuracy_score" - and not kwargs - and response_method == "predict" - and y.ndim == 1 - ) + # Batched path: when we recognise score_func, build `batched_score` that + # scores all n_iter slices in a single vectorised reduction. it stays None + # for unrecognised scorers which falls back to nested loops + sign = scoring._sign + batched_score = None + if not kwargs and y.ndim == 1: + name = getattr(score_func, "__name__", "") + if name == "accuracy_score" and response_method == "predict": + def batched_score(y_pred): + return sign * (y_pred == y[:, None]).mean(axis=0) for ii, est in enumerate(estimators): y_pred = getattr(est, method)(X_stack) @@ -814,16 +816,16 @@ def _gl_score(estimators, scoring, X, y, pb): y_pred = y_pred[:, 1] # Now, reshape back the prediction, then score y_pred = y_pred.reshape((n_sample, n_iter) + y_pred.shape[1:]) - # Either we can also score with a batch, here, or we loop again, below - if fast_accuracy: - row = scoring._sign * (y_pred == y[:, None]).mean(axis=0) + # Either we can score with batching (if) or we loop again (else) + if batched_score is not None: + row = batched_score(y_pred) if ii == 0: score = np.zeros(score_shape, row.dtype) score[ii] = row pb.update((ii + 1) * n_iter) else: for jj in range(n_iter): - _score = scoring._sign * score_func(y, y_pred[:, jj], **kwargs) + _score = sign * score_func(y, y_pred[:, jj], **kwargs) if (ii == 0) and (jj == 0): score = np.zeros(score_shape, type(_score)) score[ii, jj, ...] = _score From 32028955ba93eb08b29ea6e48419b336035e6414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathias=20Sabl=C3=A9-Meyer?= Date: Fri, 22 May 2026 13:01:31 +0100 Subject: [PATCH 4/9] ENH: Vectorise balanced_accuracy in GeneralizingEstimator.score Adds `balanced_accuracy_score` to the `batched_score` dispatch by manually estimating accuracy per class and then averaging. --- mne/decoding/search_light.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mne/decoding/search_light.py b/mne/decoding/search_light.py index 0496079c9ac..62ff25b541a 100644 --- a/mne/decoding/search_light.py +++ b/mne/decoding/search_light.py @@ -807,6 +807,12 @@ def _gl_score(estimators, scoring, X, y, pb): if name == "accuracy_score" and response_method == "predict": def batched_score(y_pred): return sign * (y_pred == y[:, None]).mean(axis=0) + elif name == "balanced_accuracy_score" and response_method == "predict": + classes = np.unique(y) + def batched_score(y_pred): + return sign * np.stack( + [(y_pred[y == c] == c).mean(axis=0) for c in classes] + ).mean(axis=0) for ii, est in enumerate(estimators): y_pred = getattr(est, method)(X_stack) From 7a30425fc899684baba9724060ef8e9525ef7b9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathias=20Sabl=C3=A9-Meyer?= Date: Fri, 22 May 2026 13:33:50 +0100 Subject: [PATCH 5/9] ENH: Batch `scoring=None` in GeneralizingEstimator.score When `scoring=None`, sklearn wraps `estimator.score` in a scorer with no `_score_func` so previous code did not batch. But for stock classifiers, this is just `accuracy_score(y, predict(X))`: we now detect this and promote `scoring` to the named "accuracy" scorer which uses the existing batched path. --- mne/decoding/search_light.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mne/decoding/search_light.py b/mne/decoding/search_light.py index 62ff25b541a..b0833d0d0f4 100644 --- a/mne/decoding/search_light.py +++ b/mne/decoding/search_light.py @@ -748,6 +748,18 @@ def _gl_score(estimators, scoring, X, y, pb): score_shape = [n_train, n_iter] score = None + # scoring=None goes through sklearn's _PassthroughScorer, which delegates + # to estimator.score(X, y). For a classifier inheriting + # ClassifierMixin.score unchanged, that's accuracy which we now set. We + # compare `type(est).score.__qualname__` rather than `.__name__` because + # the bare name is "score" no matter which class defined the method. A bare + # method has qualname "ClassifierMixin.score", whereas any override + # resolves to ".score". We only take over bare methods. + if len(estimators) and getattr(scoring, "_score_func", None) is None: + qname = getattr(type(estimators[0]).score, "__qualname__", "") + if qname == "ClassifierMixin.score": + scoring = check_scoring(estimators[0], "accuracy") + # Detect whether we can batch the estimator. Recognised: # * predict, # * predict_proba From b23e72dfbb8979bc347ed9436878fc57e4e2b465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathias=20Sabl=C3=A9-Meyer?= Date: Fri, 22 May 2026 14:43:27 +0100 Subject: [PATCH 6/9] ENH: Vectorise roc_auc in GeneralizingEstimator.score Add `roc_auc_score` to the `batched_score` dispatch via the Mann-Whitney U identity with average-rank tie correction (`scipy.stats.rankdata(..., method="average", axis=0)` ranks all slices at once). Binary classification only: multi-class, or non-default kwargs, revert to nested loops. The rank identity implemented when batching gives the same AUC as sklearn within floating point precision, but implements it with different operations. A bit-exact match would need a loop, defeating the batching. --- mne/decoding/search_light.py | 17 +++++++++++++++++ mne/decoding/tests/test_search_light.py | 11 ++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/mne/decoding/search_light.py b/mne/decoding/search_light.py index b0833d0d0f4..ebb19a6cb2d 100644 --- a/mne/decoding/search_light.py +++ b/mne/decoding/search_light.py @@ -5,6 +5,7 @@ import logging import numpy as np +from scipy.stats import rankdata from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone from sklearn.metrics import check_scoring from sklearn.preprocessing import LabelEncoder @@ -825,6 +826,22 @@ def batched_score(y_pred): return sign * np.stack( [(y_pred[y == c] == c).mean(axis=0) for c in classes] ).mean(axis=0) + elif name == "roc_auc_score" and method in ( + "predict_proba", "decision_function" + ): + classes = np.unique(y) + if len(classes) == 2: # multi-class needs ovr/ovo; defer + pos = y == classes[1] + n_pos, n_neg = int(pos.sum()), int((~pos).sum()) + if n_pos and n_neg: # degenerate folds raise downstream in sklearn + def batched_score(y_pred): + # Mann-Whitney U identity with average-rank tie + # correction. Equivalent to sklearn's roc_auc within + # floating point precision, but different computaion + ranks = rankdata(y_pred, method="average", axis=0) + return sign * ( + ranks[pos].sum(axis=0) - n_pos * (n_pos + 1) / 2.0 + ) / (n_pos * n_neg) for ii, est in enumerate(estimators): y_pred = getattr(est, method)(X_stack) diff --git a/mne/decoding/tests/test_search_light.py b/mne/decoding/tests/test_search_light.py index 56d239c1bcc..5d9f732e22b 100644 --- a/mne/decoding/tests/test_search_light.py +++ b/mne/decoding/tests/test_search_light.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from numpy.testing import assert_array_equal, assert_equal +from numpy.testing import assert_array_equal, assert_equal, assert_allclose sklearn = pytest.importorskip("sklearn") @@ -246,7 +246,11 @@ def test_generalization_light(metadata_routing): gl.fit(X, y) score = gl.score(X, y) auc = roc_auc_score(y, gl.estimators_[0].predict_proba(X[..., 0])[..., 1]) - assert_equal(score[0, 0], auc) + + # The rank identity implemented when batching gives the same AUC as sklearn + # within floating point precision, but implements it with different + # operations. A bit-exact match would need a loop, defeating the batching. + assert_allclose(score[0, 0], auc) for scoring in ["foo", 999]: gl = GeneralizingEstimator(logreg, scoring=scoring) @@ -267,7 +271,8 @@ def test_generalization_light(metadata_routing): [roc_auc_score(y - 1, _y_pred) for _y_pred in _y_preds] for _y_preds in gl.decision_function(X).transpose(1, 2, 0) ] - assert_array_equal(score, manual_score) + # allclose instead of equal: see above, batching roc_auc forces this. + assert_allclose(score, manual_score) # n_jobs gl = GeneralizingEstimator(logreg, n_jobs=2) From aac4a90f63abf1262f31a27ac25622a07c4ce41f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 22 May 2026 16:19:14 +0000 Subject: [PATCH 7/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/decoding/search_light.py | 16 +++++++++++----- mne/decoding/tests/test_search_light.py | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/mne/decoding/search_light.py b/mne/decoding/search_light.py index ebb19a6cb2d..76d7b45dd2e 100644 --- a/mne/decoding/search_light.py +++ b/mne/decoding/search_light.py @@ -768,7 +768,7 @@ def _gl_score(estimators, scoring, X, y, pb): # * "default" (= predict) # * A tuple of those: roc_auc = ("decision_function", "predict_proba") score_func = getattr(scoring, "_score_func", None) - rm = getattr(scoring, "_response_method", None) + rm = getattr(scoring, "_response_method", None) valid = {"predict", "predict_proba", "decision_function"} if rm == "default": response_method = "predict" @@ -818,30 +818,36 @@ def _gl_score(estimators, scoring, X, y, pb): if not kwargs and y.ndim == 1: name = getattr(score_func, "__name__", "") if name == "accuracy_score" and response_method == "predict": + def batched_score(y_pred): return sign * (y_pred == y[:, None]).mean(axis=0) elif name == "balanced_accuracy_score" and response_method == "predict": classes = np.unique(y) + def batched_score(y_pred): return sign * np.stack( [(y_pred[y == c] == c).mean(axis=0) for c in classes] ).mean(axis=0) elif name == "roc_auc_score" and method in ( - "predict_proba", "decision_function" + "predict_proba", + "decision_function", ): classes = np.unique(y) if len(classes) == 2: # multi-class needs ovr/ovo; defer pos = y == classes[1] n_pos, n_neg = int(pos.sum()), int((~pos).sum()) if n_pos and n_neg: # degenerate folds raise downstream in sklearn + def batched_score(y_pred): # Mann-Whitney U identity with average-rank tie # correction. Equivalent to sklearn's roc_auc within # floating point precision, but different computaion ranks = rankdata(y_pred, method="average", axis=0) - return sign * ( - ranks[pos].sum(axis=0) - n_pos * (n_pos + 1) / 2.0 - ) / (n_pos * n_neg) + return ( + sign + * (ranks[pos].sum(axis=0) - n_pos * (n_pos + 1) / 2.0) + / (n_pos * n_neg) + ) for ii, est in enumerate(estimators): y_pred = getattr(est, method)(X_stack) diff --git a/mne/decoding/tests/test_search_light.py b/mne/decoding/tests/test_search_light.py index 5d9f732e22b..749d241c36c 100644 --- a/mne/decoding/tests/test_search_light.py +++ b/mne/decoding/tests/test_search_light.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from numpy.testing import assert_array_equal, assert_equal, assert_allclose +from numpy.testing import assert_allclose, assert_array_equal, assert_equal sklearn = pytest.importorskip("sklearn") From df1cfddc97e860d1c0b2d7c6edc9f878fb4f4578 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathias=20Sabl=C3=A9-Meyer?= Date: Fri, 22 May 2026 17:27:17 +0100 Subject: [PATCH 8/9] Fix typo: computaion ==> computation... --- mne/decoding/search_light.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne/decoding/search_light.py b/mne/decoding/search_light.py index 76d7b45dd2e..8f1f0a322e2 100644 --- a/mne/decoding/search_light.py +++ b/mne/decoding/search_light.py @@ -841,7 +841,7 @@ def batched_score(y_pred): def batched_score(y_pred): # Mann-Whitney U identity with average-rank tie # correction. Equivalent to sklearn's roc_auc within - # floating point precision, but different computaion + # floating point precision, but different computation ranks = rankdata(y_pred, method="average", axis=0) return ( sign From d7faec491b10c9e4d32dc28d24f4668c95bc1a44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathias=20Sabl=C3=A9-Meyer?= Date: Fri, 22 May 2026 18:44:00 +0100 Subject: [PATCH 9/9] Adds entry in `doc/changes/dev/` + `names.inc` --- doc/changes/dev/13909.other.rst | 4 ++++ doc/changes/names.inc | 1 + 2 files changed, 5 insertions(+) create mode 100644 doc/changes/dev/13909.other.rst diff --git a/doc/changes/dev/13909.other.rst b/doc/changes/dev/13909.other.rst new file mode 100644 index 00000000000..a2131a6ade9 --- /dev/null +++ b/doc/changes/dev/13909.other.rst @@ -0,0 +1,4 @@ +Batch and vectorise classifier estimation and scoring in +:meth:`mne.decoding.GeneralizingEstimator.score` for ``scoring=None``, +``"accuracy"``, ``"balanced_accuracy"`` and ``"roc_auc"``, by +:newcontrib:`Mathias Sablé-Meyer`. diff --git a/doc/changes/names.inc b/doc/changes/names.inc index 86644392e3b..3418311d780 100644 --- a/doc/changes/names.inc +++ b/doc/changes/names.inc @@ -212,6 +212,7 @@ .. _Martin Luessi: https://github.com/mluessi .. _Martin Oberg: https://github.com/obergmartin .. _Martin Schulz: https://github.com/marsipu +.. _Mathias Sablé-Meyer: https://s-m.ac/ .. _Mathieu Scheltienne: https://github.com/mscheltienne .. _Mathurin Massias: https://mathurinm.github.io/ .. _Mats van Es: https://github.com/matsvanes