pearsonr

mgraffg · mgraffg · commit 1acc151669dc · 2025-04-07T20:00:11.000Z
diff --git a/CompStats/metrics.py b/CompStats/metrics.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from functools import wraps
 from sklearn import metrics
+from scipy import stats
 from CompStats.interface import Perf
 from CompStats.utils import metrics_docs
 
@@ -651,7 +652,6 @@ def inner(y, hy):
                 **kwargs)
 
 
-@metrics_docs(hy_name='y_pred', attr_name='score_func')
 def d2_absolute_error_score(y_true,
                             *y_pred,
                             sample_weight=None,
@@ -672,3 +672,37 @@ def inner(y, hy):
                 num_samples=num_samples, n_jobs=n_jobs,
                 use_tqdm=use_tqdm,
                 **kwargs)
+
+
+def pearsonr(y_true, *y_pred,
+             alternative='two-sided', method=None,
+             num_samples: int=500,
+             n_jobs: int=-1,
+             use_tqdm=True,
+             **kwargs):
+    """:py:class:`~CompStats.interface.Perf` with :py:func:`~scipy.stats.pearsonr` as :py:attr:`score_func.`
+
+    :param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement. 
+    :type y_true: numpy.ndarray or pandas.DataFrame 
+    :param y_pred: Predictions, the algorithms will be identified with alg-k where k=1 is the first argument included in :py:attr:`y_pred.` 
+    :type y_pred: numpy.ndarray 
+    :param kwargs: Predictions, the algorithms will be identified using the keyword  
+    :type kwargs: numpy.ndarray 
+    :param num_samples: Number of bootstrap samples, default=500. 
+    :type num_samples: int 
+    :param n_jobs: Number of jobs to compute the statistic, default=-1 corresponding to use all threads. 
+    :type n_jobs: int 
+    :param use_tqdm: Whether to use tqdm.tqdm to visualize the progress, default=True 
+    :type use_tqdm: bool 
+    """
+
+    @wraps(stats.pearsonr)
+    def inner(y, hy):
+        return stats.pearsonr(y, hy,
+                              alternative=alternative,
+                              method=method).statistic
+    
+    return Perf(y_true, *y_pred, score_func=inner, error_func=None,
+                num_samples=num_samples, n_jobs=n_jobs,
+                use_tqdm=use_tqdm,
+                **kwargs)
diff --git a/CompStats/tests/test_metrics.py b/CompStats/tests/test_metrics.py
@@ -476,4 +476,21 @@ def test_d2_absolute_error_score():
                                    forest=hy,
                                    num_samples=50)
     _ = metrics.d2_absolute_error_score(y_val, hy)
-    assert _ == perf.statistic
+    assert _ == perf.statistic
+
+
+def test_pearsonr():
+    """test pearsonr"""
+    from CompStats.metrics import pearsonr
+    from scipy import stats
+
+    X, y = load_diabetes(return_X_y=True)
+    _ = train_test_split(X, y, test_size=0.3)
+    X_train, X_val, y_train, y_val = _
+    ens = RandomForestRegressor().fit(X_train, y_train)
+    hy = ens.predict(X_val)
+    perf = pearsonr(y_val,
+                    forest=hy,
+                    num_samples=50)
+    _ = stats.pearsonr(y_val, hy)
+    assert _.statistic == perf.statistic