From 0086c1dfb61120b8d42129d6f38c032f548af66b Mon Sep 17 00:00:00 2001
From: Maria Protogerou <maraprot@gmail.com>
Date: Fri, 17 Apr 2026 13:58:51 -0700
Subject: [PATCH 01/11] Add waveletdiff: wavelet-based denoising differentiator

- Implements Donoho & Johnstone (1994) universal threshold estimator
- Vectorised thresholding over multi-column inputs
- Supports variable step size via np.gradient
- Supports arbitrary axis via ascontiguousarray + moveaxis
- Adds pywt dependency
---
 pynumdiff/__init__.py                |   2 +-
 pynumdiff/basis_fit.py               | 123 +++++++++++++++++++++++++++
 pynumdiff/tests/test_diff_methods.py |  18 +++-
 3 files changed, 138 insertions(+), 5 deletions(-)

diff --git a/pynumdiff/__init__.py b/pynumdiff/__init__.py
index 7a3c465..2e33073 100644
--- a/pynumdiff/__init__.py
+++ b/pynumdiff/__init__.py
@@ -15,6 +15,6 @@
 from .finite_difference import finitediff, first_order, second_order, fourth_order
 from .smooth_finite_difference import kerneldiff, meandiff, mediandiff, gaussiandiff, friedrichsdiff, butterdiff
 from .polynomial_fit import splinediff, polydiff, savgoldiff
-from .basis_fit import spectraldiff, rbfdiff
+from .basis_fit import spectraldiff, rbfdiff, waveletdiff
 from .total_variation_regularization import iterative_velocity
 from .kalman_smooth import kalman_filter, rts_smooth, rtsdiff, constant_velocity, constant_acceleration, constant_jerk
diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py
index c43aff0..10337c3 100644
--- a/pynumdiff/basis_fit.py
+++ b/pynumdiff/basis_fit.py
@@ -2,6 +2,7 @@
 from warnings import warn
 import numpy as np
 from scipy import sparse
+import pywt
 
 from pynumdiff.utils import utility
 
@@ -137,3 +138,125 @@ def rbfdiff(x, dt_or_t, sigma=1, lmbd=0.01):
     alpha = sparse.linalg.spsolve(rbf_regularized, x) # solve sparse system targeting the noisy data, O(N sigma^2)
 
     return rbf @ alpha, drbfdt @ alpha # find samples of reconstructions using the smooth bases
+
+
+def waveletdiff(x, dt_or_t, wavelet='db4', level=None, threshold=1.0, axis=0, mode='periodization'):
+    """Smooth and differentiate noisy data via discrete wavelet denoising.
+
+    Decomposes x into wavelet detail and approximation coefficients, soft-thresholds
+    the detail coefficients to remove noise using the Donoho & Johnstone (1994)
+    universal threshold estimator, reconstructs a smoothed signal, then
+    differentiates with finite differences via np.gradient.
+
+    :param np.array x: data to differentiate
+    :param float or array dt_or_t: scalar dt or array of sample times. If an
+        array is provided it is passed directly to np.gradient, giving correct
+        results for non-uniformly sampled data.
+    :param str wavelet: PyWavelets wavelet name, e.g. 'db4', 'sym4', 'coif2'.
+        'db4' is a solid general-purpose default. Biorthogonal wavelets such as
+        'bior2.2' or 'bior4.4' are symmetric and designed for smooth reconstruction
+        but may need a lower threshold value.
+    :param int level: decomposition depth. None (default) resolves to
+        min(pywt.dwt_max_level(N, wavelet), 5) to avoid over-decomposing short
+        signals. Increase for heavily oversampled data.
+    :param float threshold: soft-thresholding scale factor in [0, inf).
+        Multiplies the universal threshold sigma * sqrt(2 * log(N)).
+        threshold=1.0 is the classical Donoho & Johnstone universal threshold
+        and is the recommended starting point. Values < 1.0 give less smoothing;
+        values > 1.0 give more aggressive smoothing. This parameter maps onto
+        tvgamma in the pynumdiff.optimize framework.
+    :param int axis: axis along which to differentiate (default 0).
+    :param str mode: PyWavelets signal extension mode passed to wavedec/waverec.
+        'periodization' (default) keeps coefficient arrays exactly length N and
+        is the most numerically stable choice for differentiation. 'reflect' is
+        a good alternative for clearly non-periodic signals.
+        See pywt.Modes.modes for all options.
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
+    """
+    N = x.shape[axis]
+
+    # Axis normalisation — bring target axis to front.
+    # Skip moveaxis when axis is already 0 to avoid an unnecessary allocation.
+    # When we do move, call ascontiguousarray immediately so the subsequent
+    # reshape is guaranteed zero-copy.
+    if axis == 0:
+        x_work = x if x.flags['C_CONTIGUOUS'] else np.ascontiguousarray(x)
+    else:
+        x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0))
+
+    shape = x_work.shape
+    x_flat = x_work.reshape(N, -1)  # (N, M) contiguous, no hidden copy
+    M = x_flat.shape[1]
+
+    if np.isscalar(dt_or_t):
+        grad_arg = dt_or_t
+    else:
+        if len(dt_or_t) != N:
+            raise ValueError(
+                "`dt_or_t` array must have the same length as x along `axis`."
+            )
+        grad_arg = dt_or_t  # np.gradient accepts a full coordinate array
+
+    # Conservative level default avoids over-decomposing short signals
+    # (pywt default uses the maximum possible level).
+    if level is None:
+        max_level = pywt.dwt_max_level(N, wavelet)
+        level = min(max_level, 5)
+
+    # Decompose all columns and stack coefficients into 2-D arrays of shape
+    # (coeff_len_i, M). Probing column 0 first lets us pre-allocate correctly;
+    # the probe result is reused for col 0 so we pay N+1 wavedec calls total.
+    _probe = pywt.wavedec(x_flat[:, 0], wavelet, level=level, mode=mode)
+    coeff_lengths = [len(c) for c in _probe]
+    n_levels = len(_probe)
+
+    coeffs_all = [
+        np.empty((coeff_lengths[i], M), dtype=x_flat.dtype)
+        for i in range(n_levels)
+    ]
+    for i, c in enumerate(_probe):
+        coeffs_all[i][:, 0] = c
+
+    for col in range(1, M):
+        for i, c in enumerate(
+            pywt.wavedec(x_flat[:, col], wavelet, level=level, mode=mode)
+        ):
+            coeffs_all[i][:, col] = c
+
+    # Vectorised noise estimation and soft-thresholding over all columns at once.
+    # sigma: robust MAD estimator from finest detail level, shape (M,).
+    # thresh: per-column universal threshold, shape (M,).
+    # Approximation coefficients (index 0) are left untouched; only detail
+    # levels (indices 1..n_levels-1) are thresholded.
+    sigma = np.median(np.abs(coeffs_all[-1]), axis=0) / 0.6745
+    np.maximum(sigma, 1e-10, out=sigma)  # floor avoids zero threshold on clean signals
+
+    thresh = threshold * sigma * np.sqrt(2 * np.log(N))  # shape (M,)
+
+    coeffs_denoised = [coeffs_all[0]] + [
+        pywt.threshold(c, thresh[np.newaxis, :], mode='soft')
+        for c in coeffs_all[1:]
+    ]
+
+    # Reconstruct and differentiate — pywt.waverec is 1-D only so a column
+    # loop remains, but all Python-level arithmetic has been moved out above.
+    x_hat_flat    = np.empty_like(x_flat)
+    dxdt_hat_flat = np.empty_like(x_flat)
+
+    for col in range(M):
+        col_coeffs = [coeffs_denoised[i][:, col] for i in range(n_levels)]
+        x_hat_col = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N]
+        x_hat_flat[:, col]    = x_hat_col
+        dxdt_hat_flat[:, col] = np.gradient(x_hat_col, grad_arg)
+
+    # Restore original shape and axis order.
+    # moveaxis on the way out is only needed when we moved on the way in.
+    x_hat    = x_hat_flat.reshape(shape)
+    dxdt_hat = dxdt_hat_flat.reshape(shape)
+
+    if axis != 0:
+        x_hat    = np.moveaxis(x_hat,    0, axis)
+        dxdt_hat = np.moveaxis(dxdt_hat, 0, axis)
+
+    return x_hat, dxdt_hat
diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py
index 7d9eda6..a5429b1 100644
--- a/pynumdiff/tests/test_diff_methods.py
+++ b/pynumdiff/tests/test_diff_methods.py
@@ -5,7 +5,7 @@
 from ..smooth_finite_difference import kerneldiff, mediandiff, meandiff, gaussiandiff, friedrichsdiff, butterdiff
 from ..finite_difference import finitediff, first_order, second_order, fourth_order
 from ..polynomial_fit import polydiff, savgoldiff, splinediff
-from ..basis_fit import spectraldiff, rbfdiff
+from ..basis_fit import spectraldiff, rbfdiff, waveletdiff
 from ..total_variation_regularization import velocity, acceleration, jerk, iterative_velocity, smooth_acceleration
 from ..kalman_smooth import rtsdiff, constant_velocity, constant_acceleration, constant_jerk, robustdiff
 from ..linear_model import lineardiff
@@ -47,6 +47,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
     (spline_irreg_step, {'degree':5, 's':2}),
     (spectraldiff, {'high_freq_cutoff':0.2}), (spectraldiff, [0.2]),
     (rbfdiff, {'sigma':0.5, 'lmbd':0.001}),
+    (waveletdiff, {'wavelet':'db4', 'threshold':1.0}),
+    (waveletdiff, {'wavelet':'db4', 'threshold':1.0}),
     (constant_velocity, {'r':1e-2, 'q':1e3}), (constant_velocity, [1e-2, 1e3]),
     (constant_acceleration, {'r':1e-3, 'q':1e4}), (constant_acceleration, [1e-3, 1e4]),
     (constant_jerk, {'r':1e-4, 'q':1e5}), (constant_jerk, [1e-4, 1e5]),
@@ -162,6 +164,12 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
               [(-2, -2), (0, 0), (0, -1), (0, 0)],
               [(0, 0), (2, 2), (0, 0), (2, 2)],
               [(1, 1), (3, 3), (1, 1), (3, 3)]],
+    waveletdiff: [[(-14, -15), (-14, -14), (-1, -1), (0, 0)],
+                  [(-9, -9), (-8, -8), (0, 0), (1, 1)],
+                  [(-9, -9), (0, 0), (0, 0), (1, 1)],
+                  [(-1, -1), (0, 0), (0, 0), (1, 1)],
+                  [(1, 0), (2, 2), (1, 1), (2, 2)],
+                  [(0, 0), (3, 3), (1, 0), (3, 3)]], 
     velocity: [[(-25, -25), (-18, -19), (0, -1), (1, 0)],
                [(-12, -12), (-11, -12), (-1, -1), (-1, -2)],
                [(0, -1), (1, 0), (0, -1), (1, 0)],
@@ -308,7 +316,8 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re
     (kerneldiff, {'kernel': 'gaussian', 'window_size': 5}),
     (butterdiff, {'filter_order': 3, 'cutoff_freq': 1 - 1e-6}),
     (finitediff, {}),
-    (savgoldiff, {'degree': 3, 'window_size': 11, 'smoothing_win': 3})
+    (savgoldiff, {'degree': 3, 'window_size': 11, 'smoothing_win': 3}),
+    (waveletdiff, {'wavelet': 'db4', 'threshold': 1.0}),
 ]
 
 # Similar to the error_bounds table, index by method first. But then we test against only one 2D function,
@@ -319,7 +328,8 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re
     kerneldiff: [(2, 1), (3, 2)],
     butterdiff: [(0, -1), (1, -1)],
     finitediff: [(0, -1), (1, -1)],
-    savgoldiff: [(0, -1), (1, 1)]
+    savgoldiff: [(0, -1), (1, 1)],
+    waveletdiff: [(1, 0), (2, 1)],
 }
 
 @mark.parametrize("multidim_method_and_params", multidim_methods_and_params)
@@ -372,4 +382,4 @@ def test_multidimensionality(multidim_method_and_params, request):
         ax2.plot_wireframe(T1, T2, computed_d2)
         ax3.plot_wireframe(T1, T2, computed_laplacian, label='computed')
         legend = ax3.legend(bbox_to_anchor=(0.7, 0.8)); legend.legend_handles[0].set_facecolor(pyplot.cm.viridis(0.6))
-        fig.suptitle(f'{diff_method.__name__}', fontsize=16)
+        fig.suptitle(f'{diff_method.__name__}', fontsize=16)
\ No newline at end of file

From 8431f6df9dbdd7f8158a51d27c535f5347ccbedb Mon Sep 17 00:00:00 2001
From: Maria Protogerou <maraprot@gmail.com>
Date: Fri, 17 Apr 2026 14:51:27 -0700
Subject: [PATCH 02/11] resolved another merge conflict

---
 pynumdiff/tests/test_diff_methods.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py
index 7bcd75e..e06e8e4 100644
--- a/pynumdiff/tests/test_diff_methods.py
+++ b/pynumdiff/tests/test_diff_methods.py
@@ -5,13 +5,8 @@
 from ..smooth_finite_difference import kerneldiff, mediandiff, meandiff, gaussiandiff, friedrichsdiff, butterdiff
 from ..finite_difference import finitediff, first_order, second_order, fourth_order
 from ..polynomial_fit import polydiff, savgoldiff, splinediff
-<<<<<<< HEAD
 from ..basis_fit import spectraldiff, rbfdiff, waveletdiff
-from ..total_variation_regularization import velocity, acceleration, jerk, iterative_velocity, smooth_acceleration
-=======
-from ..basis_fit import spectraldiff, rbfdiff
 from ..total_variation_regularization import velocity, acceleration, jerk, iterative_velocity, smooth_acceleration, tvrdiff
->>>>>>> b38199f982cb4036065f599b3fe00f6076671a6a
 from ..kalman_smooth import rtsdiff, constant_velocity, constant_acceleration, constant_jerk, robustdiff
 from ..linear_model import lineardiff
 # Function aliases for testing cases where parameters change the behavior in a big way, so error limits can be indexed in dict

From f574f2fbf56cf4edbdfed4330a10351a53ba0260 Mon Sep 17 00:00:00 2001
From: Maria Protogerou <maraprot@gmail.com>
Date: Fri, 1 May 2026 02:03:47 -0700
Subject: [PATCH 03/11] made all changes requested

---
 pynumdiff/basis_fit.py               | 116 ++++++++++++++++-----------
 pynumdiff/tests/test_diff_methods.py |  14 +---
 2 files changed, 71 insertions(+), 59 deletions(-)

diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py
index 3f2c5d0..6910b0a 100644
--- a/pynumdiff/basis_fit.py
+++ b/pynumdiff/basis_fit.py
@@ -136,18 +136,21 @@ def rbfdiff(x, dt_or_t, sigma=1, lmbd=0.01, axis=0):
     return np.moveaxis(x_hat_flattened.reshape(plump), 0, axis), np.moveaxis(dxdt_hat_flattened.reshape(plump), 0, axis)
 
 
-def waveletdiff(x, dt_or_t, wavelet='db4', level=None, threshold=1.0, axis=0, mode='periodization'):
+def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='periodization'):
     """Smooth and differentiate noisy data via discrete wavelet denoising.
 
     Decomposes x into wavelet detail and approximation coefficients, soft-thresholds
     the detail coefficients to remove noise using the Donoho & Johnstone (1994)
     universal threshold estimator, reconstructs a smoothed signal, then
-    differentiates with finite differences via np.gradient.
+    differentiates analytically by applying derivative reconstruction filters to
+    the denoised wavelet coefficients.
 
-    :param np.array x: data to differentiate
-    :param float or array dt_or_t: scalar dt or array of sample times. If an
-        array is provided it is passed directly to np.gradient, giving correct
-        results for non-uniformly sampled data.
+    Because the DWT requires uniform spacing, this method only accepts a scalar
+    time step dt (not a vector of sample times). For non-uniformly sampled data,
+    use :func:`rbfdiff` or :func:`splinediff` instead.
+
+    :param np.array x: data to differentiate. May be multidimensional; see :code:`axis`.
+    :param float dt: uniform time step between samples.
     :param str wavelet: PyWavelets wavelet name, e.g. 'db4', 'sym4', 'coif2'.
         'db4' is a solid general-purpose default. Biorthogonal wavelets such as
         'bior2.2' or 'bior4.4' are symmetric and designed for smooth reconstruction
@@ -170,39 +173,32 @@ def waveletdiff(x, dt_or_t, wavelet='db4', level=None, threshold=1.0, axis=0, mo
     :return: - **x_hat** (np.array) -- estimated (smoothed) x
              - **dxdt_hat** (np.array) -- estimated derivative of x
     """
-    N = x.shape[axis]
+    if not np.isscalar(dt):
+        raise ValueError(
+            "`dt` must be a scalar. The DWT requires uniformly sampled data. "
+            "For variable step sizes, use rbfdiff or splinediff instead."
+        )
 
-    # Axis normalisation — bring target axis to front.
-    # Skip moveaxis when axis is already 0 to avoid an unnecessary allocation.
-    # When we do move, call ascontiguousarray immediately so the subsequent
-    # reshape is guaranteed zero-copy.
-    if axis == 0:
-        x_work = x if x.flags['C_CONTIGUOUS'] else np.ascontiguousarray(x)
-    else:
-        x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0))
+    N = x.shape[axis]
 
+    # Bring axis of differentiation to front so each column of x_flat is one
+    # signal to differentiate. moveaxis returns a view with updated strides,
+    # so ascontiguousarray ensures the subsequent reshape is zero-copy.
+    x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0))
     shape = x_work.shape
-    x_flat = x_work.reshape(N, -1)  # (N, M) contiguous, no hidden copy
+    x_flat = x_work.reshape(N, -1)  # (N, M)
     M = x_flat.shape[1]
 
-    if np.isscalar(dt_or_t):
-        grad_arg = dt_or_t
-    else:
-        if len(dt_or_t) != N:
-            raise ValueError(
-                "`dt_or_t` array must have the same length as x along `axis`."
-            )
-        grad_arg = dt_or_t  # np.gradient accepts a full coordinate array
-
-    # Conservative level default avoids over-decomposing short signals
-    # (pywt default uses the maximum possible level).
+    # Conservative level cap: pywt's default uses the maximum possible level,
+    # which can over-decompose short signals and wash out meaningful detail.
+    # Capping at 5 keeps at least 2^5 = 32 samples in the coarsest subband,
+    # which is enough to represent a smooth approximation without artefacts.
     if level is None:
         max_level = pywt.dwt_max_level(N, wavelet)
         level = min(max_level, 5)
 
-    # Decompose all columns and stack coefficients into 2-D arrays of shape
-    # (coeff_len_i, M). Probing column 0 first lets us pre-allocate correctly;
-    # the probe result is reused for col 0 so we pay N+1 wavedec calls total.
+    # Decompose all columns; probe column 0 first to learn coefficient lengths
+    # and pre-allocate, reusing that result so we only pay N+1 wavedec calls.
     _probe = pywt.wavedec(x_flat[:, 0], wavelet, level=level, mode=mode)
     coeff_lengths = [len(c) for c in _probe]
     n_levels = len(_probe)
@@ -221,10 +217,19 @@ def waveletdiff(x, dt_or_t, wavelet='db4', level=None, threshold=1.0, axis=0, mo
             coeffs_all[i][:, col] = c
 
     # Vectorised noise estimation and soft-thresholding over all columns at once.
-    # sigma: robust MAD estimator from finest detail level, shape (M,).
-    # thresh: per-column universal threshold, shape (M,).
-    # Approximation coefficients (index 0) are left untouched; only detail
-    # levels (indices 1..n_levels-1) are thresholded.
+    #
+    # Soft-thresholding achieves smoothing by shrinking wavelet detail
+    # coefficients toward zero: coefficients whose magnitude is below the
+    # threshold (mostly noise) are zeroed out, while large coefficients (true
+    # signal features) are kept but reduced by the threshold amount. Only detail
+    # levels (indices 1..n_levels-1) are thresholded; the coarse approximation
+    # coefficients (index 0) are left untouched.
+    #
+    # sigma: robust noise-level estimate via the median absolute deviation of
+    #        the finest detail level. Dividing by 0.6745 converts MAD to an
+    #        estimate of the Gaussian standard deviation.
+    # thresh: per-column Donoho & Johnstone (1994) universal threshold,
+    #         sigma * sqrt(2 * log(N)), scaled by the user-supplied `threshold`.
     sigma = np.median(np.abs(coeffs_all[-1]), axis=0) / 0.6745
     np.maximum(sigma, 1e-10, out=sigma)  # floor avoids zero threshold on clean signals
 
@@ -235,24 +240,43 @@ def waveletdiff(x, dt_or_t, wavelet='db4', level=None, threshold=1.0, axis=0, mo
         for c in coeffs_all[1:]
     ]
 
-    # Reconstruct and differentiate — pywt.waverec is 1-D only so a column
-    # loop remains, but all Python-level arithmetic has been moved out above.
+    # Build derivative reconstruction filters from the wavelet's reconstruction
+    # filters. Because the DWT reconstructs a signal as a linear combination of
+    # shifted scaling/wavelet functions, the derivative of the reconstruction is
+    # the same linear combination of the *derivatives* of those basis functions.
+    # We obtain derivative filters by finite-differencing the reconstruction
+    # lowpass filter (rec_lo), then scaling by 1/dt to convert discrete
+    # differences to continuous-time derivatives.
+    w = pywt.Wavelet(wavelet)
+    rec_lo = np.array(w.rec_lo)
+    # First-order finite difference of the filter gives the derivative filter.
+    # np.diff shortens by 1; padding with a leading zero keeps the filter length
+    # and phase consistent with the original so waverec alignment is preserved.
+    d_rec_lo = np.concatenate(([0.0], np.diff(rec_lo))) / dt
+    d_rec_hi = np.concatenate(([0.0], np.diff(np.array(w.rec_hi)))) / dt
+
+    # Reconstruct x_hat and dxdt_hat column by column.
+    # pywt.waverec is 1-D only, so the column loop is unavoidable here;
+    # the vectorised operations above have already moved all Python-level
+    # arithmetic outside this loop.
     x_hat_flat    = np.empty_like(x_flat)
     dxdt_hat_flat = np.empty_like(x_flat)
 
     for col in range(M):
         col_coeffs = [coeffs_denoised[i][:, col] for i in range(n_levels)]
-        x_hat_col = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N]
-        x_hat_flat[:, col]    = x_hat_col
-        dxdt_hat_flat[:, col] = np.gradient(x_hat_col, grad_arg)
 
-    # Restore original shape and axis order.
-    # moveaxis on the way out is only needed when we moved on the way in.
-    x_hat    = x_hat_flat.reshape(shape)
-    dxdt_hat = dxdt_hat_flat.reshape(shape)
+        # Standard reconstruction for the smoothed signal.
+        x_hat_flat[:, col] = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N]
 
-    if axis != 0:
-        x_hat    = np.moveaxis(x_hat,    0, axis)
-        dxdt_hat = np.moveaxis(dxdt_hat, 0, axis)
+        # Derivative reconstruction: replace the wavelet's reconstruction
+        # filters with their finite-difference derivatives and run waverec.
+        d_wavelet = pywt.Wavelet(
+            filter_bank=(w.dec_lo, w.dec_hi, d_rec_lo, d_rec_hi)
+        )
+        dxdt_hat_flat[:, col] = pywt.waverec(col_coeffs, d_wavelet, mode=mode)[:N]
+
+    # Restore original shape and axis order.
+    x_hat    = np.moveaxis(x_hat_flat.reshape(shape),    0, axis)
+    dxdt_hat = np.moveaxis(dxdt_hat_flat.reshape(shape), 0, axis)
 
     return x_hat, dxdt_hat
diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py
index e06e8e4..ebe9c69 100644
--- a/pynumdiff/tests/test_diff_methods.py
+++ b/pynumdiff/tests/test_diff_methods.py
@@ -332,19 +332,15 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re
     (kerneldiff, {'kernel': 'gaussian', 'window_size': 5}),
     (butterdiff, {'filter_order': 3, 'cutoff_freq': 1 - 1e-6}),
     (finitediff, {}),
-<<<<<<< HEAD
-    (savgoldiff, {'degree': 3, 'window_size': 11, 'smoothing_win': 3}),
-    (waveletdiff, {'wavelet': 'db4', 'threshold': 1.0}),
-=======
     (polydiff, {'degree': 2, 'window_size': 5}),
     (savgoldiff, {'degree': 3, 'window_size': 11, 'smoothing_win': 3}),
+    (waveletdiff, {'wavelet': 'db4', 'threshold': 1.0}),
     (rtsdiff, {'order':2, 'log_qr_ratio':7, 'forwardbackward':True}),
     (spectraldiff, {'high_freq_cutoff': 0.25, 'pad_to_zero_dxdt': False}),
     (rbfdiff, {'sigma': 0.5, 'lmbd': 1e-6}),
     (splinediff, {'degree': 9, 's': 1e-6}),
     (robustdiff, {'order':2, 'log_q':7, 'log_r':2}),
     (tvrdiff, {'order': 3, 'gamma': 1e-4})
->>>>>>> b38199f982cb4036065f599b3fe00f6076671a6a
 ]
 
 # Similar to the error_bounds table, index by method first. But then we test against only one 2D function,
@@ -355,10 +351,7 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re
     kerneldiff: [(2, 1), (3, 2)],
     butterdiff: [(0, -1), (1, -1)],
     finitediff: [(0, -1), (1, -1)],
-<<<<<<< HEAD
-    savgoldiff: [(0, -1), (1, 1)],
     waveletdiff: [(1, 0), (2, 1)],
-=======
     polydiff: [(1, -1), (1, 0)],
     savgoldiff: [(0, -1), (1, 1)],
     rtsdiff: [(1, -1), (1, 0)],
@@ -367,7 +360,6 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re
     splinediff: [(0, -1), (1, 0)],
     robustdiff: [(-2, -3), (0, -1)],
     tvrdiff: [(0, -1), (1, 0)]
->>>>>>> b38199f982cb4036065f599b3fe00f6076671a6a
 }
 
 @mark.parametrize("multidim_method_and_params", multidim_methods_and_params)
@@ -420,9 +412,6 @@ def test_multidimensionality(multidim_method_and_params, request):
         ax2.plot_wireframe(T1, T2, computed_d2)
         ax3.plot_wireframe(T1, T2, computed_laplacian, label='computed')
         legend = ax3.legend(bbox_to_anchor=(0.7, 0.8)); legend.legend_handles[0].set_facecolor(pyplot.cm.viridis(0.6))
-<<<<<<< HEAD
-        fig.suptitle(f'{diff_method.__name__}', fontsize=16)
-=======
         fig.suptitle(f'{diff_method.__name__}', fontsize=16)
 
 
@@ -475,4 +464,3 @@ def test_missing_data(diff_method_and_params):
     
     assert np.all(np.isfinite(x_hat))
     assert np.all(np.isfinite(dxdt_hat))
->>>>>>> b38199f982cb4036065f599b3fe00f6076671a6a

From af06d012699b58b4ac5daf58b5c2c9434dc67fd3 Mon Sep 17 00:00:00 2001
From: Maria Protogerou <maraprot@gmail.com>
Date: Fri, 1 May 2026 02:10:16 -0700
Subject: [PATCH 04/11] Add pywavelets as a dependency

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a0d7d55..736c508 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,8 @@ classifiers = [
 dependencies = [
     "numpy",
     "scipy",
-    "matplotlib"
+    "matplotlib",
+    "pywavelets"
 ]
 
 [project.urls]

From 68f9255d8a02768a54cfc6029e755645d0e039af Mon Sep 17 00:00:00 2001
From: Maria Protogerou <maraprot@gmail.com>
Date: Fri, 1 May 2026 02:16:40 -0700
Subject: [PATCH 05/11] Fix waveletdiff: use np.gradient on denoised signal for
 derivative

---
 pynumdiff/basis_fit.py | 36 ++++++++++--------------------------
 1 file changed, 10 insertions(+), 26 deletions(-)

diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py
index 6910b0a..5b46230 100644
--- a/pynumdiff/basis_fit.py
+++ b/pynumdiff/basis_fit.py
@@ -240,40 +240,24 @@ def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='p
         for c in coeffs_all[1:]
     ]
 
-    # Build derivative reconstruction filters from the wavelet's reconstruction
-    # filters. Because the DWT reconstructs a signal as a linear combination of
-    # shifted scaling/wavelet functions, the derivative of the reconstruction is
-    # the same linear combination of the *derivatives* of those basis functions.
-    # We obtain derivative filters by finite-differencing the reconstruction
-    # lowpass filter (rec_lo), then scaling by 1/dt to convert discrete
-    # differences to continuous-time derivatives.
-    w = pywt.Wavelet(wavelet)
-    rec_lo = np.array(w.rec_lo)
-    # First-order finite difference of the filter gives the derivative filter.
-    # np.diff shortens by 1; padding with a leading zero keeps the filter length
-    # and phase consistent with the original so waverec alignment is preserved.
-    d_rec_lo = np.concatenate(([0.0], np.diff(rec_lo))) / dt
-    d_rec_hi = np.concatenate(([0.0], np.diff(np.array(w.rec_hi)))) / dt
-
-    # Reconstruct x_hat and dxdt_hat column by column.
+    # Reconstruct x_hat and differentiate column by column.
     # pywt.waverec is 1-D only, so the column loop is unavoidable here;
     # the vectorised operations above have already moved all Python-level
     # arithmetic outside this loop.
+    #
+    # After wavelet denoising we have a smooth, noise-free signal. np.gradient
+    # applies a second-order central finite difference to that clean signal,
+    # which gives an accurate derivative. This is appropriate here because the
+    # heavy lifting (noise removal) has already been done by the wavelet
+    # thresholding step; np.gradient on a smooth signal converges at O(dt^2).
     x_hat_flat    = np.empty_like(x_flat)
     dxdt_hat_flat = np.empty_like(x_flat)
 
     for col in range(M):
         col_coeffs = [coeffs_denoised[i][:, col] for i in range(n_levels)]
-
-        # Standard reconstruction for the smoothed signal.
-        x_hat_flat[:, col] = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N]
-
-        # Derivative reconstruction: replace the wavelet's reconstruction
-        # filters with their finite-difference derivatives and run waverec.
-        d_wavelet = pywt.Wavelet(
-            filter_bank=(w.dec_lo, w.dec_hi, d_rec_lo, d_rec_hi)
-        )
-        dxdt_hat_flat[:, col] = pywt.waverec(col_coeffs, d_wavelet, mode=mode)[:N]
+        x_hat_col = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N]
+        x_hat_flat[:, col]    = x_hat_col
+        dxdt_hat_flat[:, col] = np.gradient(x_hat_col, dt)
 
     # Restore original shape and axis order.
     x_hat    = np.moveaxis(x_hat_flat.reshape(shape),    0, axis)

From 5bcbe293e13635c0a91f4b8d6ddc9934f9eb144e Mon Sep 17 00:00:00 2001
From: Maria Protogerou <maraprot@gmail.com>
Date: Fri, 8 May 2026 11:36:50 -0700
Subject: [PATCH 06/11] Fix waveletdiff: use spectral differentiation of
 denoised signal

---
 pynumdiff/basis_fit.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py
index 5b46230..fbc4949 100644
--- a/pynumdiff/basis_fit.py
+++ b/pynumdiff/basis_fit.py
@@ -245,19 +245,25 @@ def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='p
     # the vectorised operations above have already moved all Python-level
     # arithmetic outside this loop.
     #
-    # After wavelet denoising we have a smooth, noise-free signal. np.gradient
-    # applies a second-order central finite difference to that clean signal,
-    # which gives an accurate derivative. This is appropriate here because the
-    # heavy lifting (noise removal) has already been done by the wavelet
-    # thresholding step; np.gradient on a smooth signal converges at O(dt^2).
+    # After wavelet denoising we have a smooth, noise-free signal. We
+    # differentiate it analytically in the Fourier domain: multiplying the
+    # FFT by i*omega is equivalent to applying the derivative operator exactly,
+    # with no finite-difference truncation error. This keeps the two concerns
+    # cleanly separated — wavelets handle denoising, Fourier handles
+    # differentiation.
     x_hat_flat    = np.empty_like(x_flat)
     dxdt_hat_flat = np.empty_like(x_flat)
 
+    # Angular frequency axis for a length-N signal sampled at dt.
+    # fftfreq returns cycles/sample; multiplying by 2*pi/dt gives rad/s.
+    k = np.fft.fftfreq(N, d=dt) * 2 * np.pi
+
     for col in range(M):
         col_coeffs = [coeffs_denoised[i][:, col] for i in range(n_levels)]
         x_hat_col = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N]
         x_hat_flat[:, col]    = x_hat_col
-        dxdt_hat_flat[:, col] = np.gradient(x_hat_col, dt)
+        X = np.fft.fft(x_hat_col)
+        dxdt_hat_flat[:, col] = np.real(np.fft.ifft(1j * k * X))
 
     # Restore original shape and axis order.
     x_hat    = np.moveaxis(x_hat_flat.reshape(shape),    0, axis)

From dbf2f3ffb52e82c1759e02425abaa79a02c2b31d Mon Sep 17 00:00:00 2001
From: Maria Protogerou <maraprot@gmail.com>
Date: Fri, 12 Jun 2026 09:54:22 -0700
Subject: [PATCH 07/11] Replace FFT-based wavelet differentiation with wavelet
 basis derivative operator

---
 pynumdiff/basis_fit.py | 181 +++++++++++++++++++++++++----------------
 1 file changed, 111 insertions(+), 70 deletions(-)

diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py
index fbc4949..028b031 100644
--- a/pynumdiff/basis_fit.py
+++ b/pynumdiff/basis_fit.py
@@ -1,7 +1,9 @@
 """Methods based on fitting basis functions to data"""
+from functools import lru_cache
 from warnings import warn
 import numpy as np
 from scipy import sparse
+from scipy.interpolate import CubicSpline
 import pywt
 
 from pynumdiff.utils import utility
@@ -136,14 +138,94 @@ def rbfdiff(x, dt_or_t, sigma=1, lmbd=0.01, axis=0):
     return np.moveaxis(x_hat_flattened.reshape(plump), 0, axis), np.moveaxis(dxdt_hat_flattened.reshape(plump), 0, axis)
 
 
+@lru_cache(maxsize=32)
+def _wavelet_derivative_synthesis_matrix(N, dt, wavelet, level, mode):
+    """Build sparse samples of d/dt of the inverse-DWT synthesis basis.
+
+    For a fixed wavelet/level/mode/length, wavedec/waverec define a linear
+    synthesis map
+
+        x(t_n) = sum_k c_k phi_k(t_n).
+
+    This routine samples phi'_k(t_n) once, stores those samples sparsely, and
+    lets waveletdiff compute
+
+        x'(t_n) = sum_k c_k phi'_k(t_n)
+
+    without differentiating the reconstructed signal.  The derivative samples
+    are obtained from a local cubic interpolant of each compactly supported
+    synthesis basis vector; this is bookkeeping on the basis functions, not a
+    finite-difference derivative of the data.
+    """
+    zero = np.zeros(N)
+    template = pywt.wavedec(zero, wavelet, level=level, mode=mode)
+    coeff_lengths = tuple(len(c) for c in template)
+    coeff_offsets = np.cumsum((0,) + coeff_lengths[:-1])
+    n_coeffs = sum(coeff_lengths)
+    t = np.arange(N, dtype=float) * dt
+
+    rows, cols, vals = [], [], []
+    eps = 1e-12
+
+    for band, (offset, length) in enumerate(zip(coeff_offsets, coeff_lengths)):
+        for local_idx in range(length):
+            coeffs = [np.zeros_like(c, dtype=float) for c in template]
+            coeffs[band][local_idx] = 1.0
+            basis = pywt.waverec(coeffs, wavelet, mode=mode)[:N]
+
+            # Basis functions are compactly supported, but boundary extension can
+            # split support across the two ends.  Differentiating only the active
+            # samples keeps the matrix sparse and avoids global sinusoidal bases.
+            active = np.flatnonzero(np.abs(basis) > eps)
+            if active.size == 0:
+                continue
+
+            # Include one-sample padding around active support so the cubic has
+            # enough context near the edges of the support.  If support wraps or
+            # covers most of the signal, fall back to all samples.
+            support = np.zeros(N, dtype=bool)
+            support[active] = True
+            support[np.maximum(active - 1, 0)] = True
+            support[np.minimum(active + 1, N - 1)] = True
+            idx = np.flatnonzero(support)
+            if idx.size < 4 or (idx[-1] - idx[0] + 1) > 2 * idx.size:
+                idx = np.arange(N)
+
+            # CubicSpline requires strictly increasing x and at least two points.
+            # With >=4 points the not-a-knot default is well-defined; with fewer,
+            # fall back to clamped end slopes of zero.
+            bc_type = 'not-a-knot' if idx.size >= 4 else ((1, 0.0), (1, 0.0))
+            spline = CubicSpline(t[idx], basis[idx], bc_type=bc_type, extrapolate=False)
+            deriv_vals = spline(t[idx], 1)
+            keep = np.isfinite(deriv_vals) & (np.abs(deriv_vals) > eps)
+
+            rows.extend(idx[keep])
+            cols.extend(np.full(np.count_nonzero(keep), offset + local_idx))
+            vals.extend(deriv_vals[keep])
+
+    return sparse.csr_matrix((vals, (rows, cols)), shape=(N, n_coeffs)), coeff_lengths
+
+
+def _flatten_wavelet_coeffs(coeffs):
+    """Stack a wavedec coefficient list into a 2-D coefficient matrix."""
+    return np.vstack([c for band in coeffs for c in band])
+
+
 def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='periodization'):
-    """Smooth and differentiate noisy data via discrete wavelet denoising.
+    """Smooth and differentiate noisy data with a wavelet-basis derivative sum.
 
-    Decomposes x into wavelet detail and approximation coefficients, soft-thresholds
-    the detail coefficients to remove noise using the Donoho & Johnstone (1994)
-    universal threshold estimator, reconstructs a smoothed signal, then
-    differentiates analytically by applying derivative reconstruction filters to
-    the denoised wavelet coefficients.
+    Decomposes x into wavelet approximation/detail coefficients, soft-thresholds
+    the detail coefficients to denoise, reconstructs a smoothed signal, and then
+    estimates the derivative directly from the denoised wavelet coefficients:
+
+        x(t_n)  = sum_k c_k phi_k(t_n)
+        x'(t_n) = sum_k c_k phi'_k(t_n)
+
+    The first sum is the ordinary inverse wavelet transform.  The second sum is
+    evaluated by precomputing sparse samples of the derivative of each synthesis
+    basis function and multiplying that sparse matrix by the denoised
+    coefficients.  This avoids the previous reconstruct-then-FFT derivative path
+    and does not call finite differences or np.gradient on the signal.
 
     Because the DWT requires uniform spacing, this method only accepts a scalar
     time step dt (not a vector of sample times). For non-uniformly sampled data,
@@ -152,24 +234,13 @@ def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='p
     :param np.array x: data to differentiate. May be multidimensional; see :code:`axis`.
     :param float dt: uniform time step between samples.
     :param str wavelet: PyWavelets wavelet name, e.g. 'db4', 'sym4', 'coif2'.
-        'db4' is a solid general-purpose default. Biorthogonal wavelets such as
-        'bior2.2' or 'bior4.4' are symmetric and designed for smooth reconstruction
-        but may need a lower threshold value.
     :param int level: decomposition depth. None (default) resolves to
-        min(pywt.dwt_max_level(N, wavelet), 5) to avoid over-decomposing short
-        signals. Increase for heavily oversampled data.
+        min(pywt.dwt_max_level(N, wavelet), 5) to avoid over-decomposing short signals.
     :param float threshold: soft-thresholding scale factor in [0, inf).
-        Multiplies the universal threshold sigma * sqrt(2 * log(N)).
-        threshold=1.0 is the classical Donoho & Johnstone universal threshold
-        and is the recommended starting point. Values < 1.0 give less smoothing;
-        values > 1.0 give more aggressive smoothing. This parameter maps onto
-        tvgamma in the pynumdiff.optimize framework.
     :param int axis: axis along which to differentiate (default 0).
     :param str mode: PyWavelets signal extension mode passed to wavedec/waverec.
-        'periodization' (default) keeps coefficient arrays exactly length N and
-        is the most numerically stable choice for differentiation. 'reflect' is
-        a good alternative for clearly non-periodic signals.
-        See pywt.Modes.modes for all options.
+        'periodization' keeps coefficient arrays compact; 'reflect' is often a
+        better choice for clearly non-periodic signals.
     :return: - **x_hat** (np.array) -- estimated (smoothed) x
              - **dxdt_hat** (np.array) -- estimated derivative of x
     """
@@ -180,19 +251,11 @@ def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='p
         )
 
     N = x.shape[axis]
-
-    # Bring axis of differentiation to front so each column of x_flat is one
-    # signal to differentiate. moveaxis returns a view with updated strides,
-    # so ascontiguousarray ensures the subsequent reshape is zero-copy.
     x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0))
     shape = x_work.shape
-    x_flat = x_work.reshape(N, -1)  # (N, M)
+    x_flat = x_work.reshape(N, -1)
     M = x_flat.shape[1]
 
-    # Conservative level cap: pywt's default uses the maximum possible level,
-    # which can over-decompose short signals and wash out meaningful detail.
-    # Capping at 5 keeps at least 2^5 = 32 samples in the coarsest subband,
-    # which is enough to represent a smooth approximation without artefacts.
     if level is None:
         max_level = pywt.dwt_max_level(N, wavelet)
         level = min(max_level, 5)
@@ -216,57 +279,35 @@ def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='p
         ):
             coeffs_all[i][:, col] = c
 
-    # Vectorised noise estimation and soft-thresholding over all columns at once.
-    #
-    # Soft-thresholding achieves smoothing by shrinking wavelet detail
-    # coefficients toward zero: coefficients whose magnitude is below the
-    # threshold (mostly noise) are zeroed out, while large coefficients (true
-    # signal features) are kept but reduced by the threshold amount. Only detail
-    # levels (indices 1..n_levels-1) are thresholded; the coarse approximation
-    # coefficients (index 0) are left untouched.
-    #
-    # sigma: robust noise-level estimate via the median absolute deviation of
-    #        the finest detail level. Dividing by 0.6745 converts MAD to an
-    #        estimate of the Gaussian standard deviation.
-    # thresh: per-column Donoho & Johnstone (1994) universal threshold,
-    #         sigma * sqrt(2 * log(N)), scaled by the user-supplied `threshold`.
+    # Robust noise estimate from finest details, then Donoho-Johnstone
+    # soft-thresholding on detail bands only.
     sigma = np.median(np.abs(coeffs_all[-1]), axis=0) / 0.6745
-    np.maximum(sigma, 1e-10, out=sigma)  # floor avoids zero threshold on clean signals
-
-    thresh = threshold * sigma * np.sqrt(2 * np.log(N))  # shape (M,)
-
+    np.maximum(sigma, 1e-10, out=sigma)
+    thresh = threshold * sigma * np.sqrt(2 * np.log(N))
     coeffs_denoised = [coeffs_all[0]] + [
         pywt.threshold(c, thresh[np.newaxis, :], mode='soft')
         for c in coeffs_all[1:]
     ]
 
-    # Reconstruct x_hat and differentiate column by column.
-    # pywt.waverec is 1-D only, so the column loop is unavoidable here;
-    # the vectorised operations above have already moved all Python-level
-    # arithmetic outside this loop.
-    #
-    # After wavelet denoising we have a smooth, noise-free signal. We
-    # differentiate it analytically in the Fourier domain: multiplying the
-    # FFT by i*omega is equivalent to applying the derivative operator exactly,
-    # with no finite-difference truncation error. This keeps the two concerns
-    # cleanly separated — wavelets handle denoising, Fourier handles
-    # differentiation.
-    x_hat_flat    = np.empty_like(x_flat)
-    dxdt_hat_flat = np.empty_like(x_flat)
-
-    # Angular frequency axis for a length-N signal sampled at dt.
-    # fftfreq returns cycles/sample; multiplying by 2*pi/dt gives rad/s.
-    k = np.fft.fftfreq(N, d=dt) * 2 * np.pi
+    Dphi, matrix_coeff_lengths = _wavelet_derivative_synthesis_matrix(
+        N, float(dt), wavelet, int(level), mode
+    )
+    if tuple(coeff_lengths) != tuple(matrix_coeff_lengths):
+        raise RuntimeError("Cached wavelet derivative matrix coefficient layout does not match wavedec output.")
+
+    x_hat_flat = np.empty_like(x_flat)
+    coeffs_flat = np.empty((sum(coeff_lengths), M), dtype=x_flat.dtype)
+    offsets = np.cumsum((0,) + tuple(coeff_lengths[:-1]))
 
     for col in range(M):
         col_coeffs = [coeffs_denoised[i][:, col] for i in range(n_levels)]
-        x_hat_col = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N]
-        x_hat_flat[:, col]    = x_hat_col
-        X = np.fft.fft(x_hat_col)
-        dxdt_hat_flat[:, col] = np.real(np.fft.ifft(1j * k * X))
+        x_hat_flat[:, col] = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N]
+        for i, (offset, length) in enumerate(zip(offsets, coeff_lengths)):
+            coeffs_flat[offset:offset + length, col] = coeffs_denoised[i][:, col]
+
+    dxdt_hat_flat = Dphi @ coeffs_flat
 
-    # Restore original shape and axis order.
-    x_hat    = np.moveaxis(x_hat_flat.reshape(shape),    0, axis)
+    x_hat = np.moveaxis(x_hat_flat.reshape(shape), 0, axis)
     dxdt_hat = np.moveaxis(dxdt_hat_flat.reshape(shape), 0, axis)
 
     return x_hat, dxdt_hat

From baf9946105d5f027586be2e9c5bbbe8e0b3cd2ae Mon Sep 17 00:00:00 2001
From: pavelkomarov <pvlkmrv@gmail.com>
Date: Fri, 12 Jun 2026 12:16:01 -0700
Subject: [PATCH 08/11] Compute wavelet derivative analytically via connection
 coefficients

Replace the cubic-spline-of-sampled-basis derivative with the exact
analytic derivative of the wavelet basis. The denoised reconstruction is
the wavelet interpolant x(t) = sum_n a_n phi(t/dt - n), so its derivative
is x' = Phi' Phi^-1 x_hat, where Phi and Phi' are circulant samples of the
scaling function phi and its derivative phi' at integers. Those samples are
the eigenvalue-1 and eigenvalue-1/2 eigenvectors of the wavelet refinement
relation (connection coefficients), normalized to reproduce constants and
differentiate ramps exactly. No splines or finite differences.

- Antisymmetrically extend x_hat before applying the periodic operator so
  edge derivatives stay accurate instead of spiking at the wrap.
- Reject Haar/db1, whose step scaling function has no derivative.
- Default to db8 (smoother -> better derivatives on noisy data) and collapse
  the two old helpers into one operator builder; vectorize the DWT over columns.
- Retighten the waveletdiff error bounds for the new, exact derivative.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 pynumdiff/basis_fit.py               | 259 +++++++++++----------------
 pynumdiff/tests/test_diff_methods.py |  18 +-
 2 files changed, 117 insertions(+), 160 deletions(-)

diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py
index 028b031..23e48b0 100644
--- a/pynumdiff/basis_fit.py
+++ b/pynumdiff/basis_fit.py
@@ -3,7 +3,6 @@
 from warnings import warn
 import numpy as np
 from scipy import sparse
-from scipy.interpolate import CubicSpline
 import pywt
 
 from pynumdiff.utils import utility
@@ -139,93 +138,70 @@ def rbfdiff(x, dt_or_t, sigma=1, lmbd=0.01, axis=0):
 
 
 @lru_cache(maxsize=32)
-def _wavelet_derivative_synthesis_matrix(N, dt, wavelet, level, mode):
-    """Build sparse samples of d/dt of the inverse-DWT synthesis basis.
-
-    For a fixed wavelet/level/mode/length, wavedec/waverec define a linear
-    synthesis map
-
-        x(t_n) = sum_k c_k phi_k(t_n).
-
-    This routine samples phi'_k(t_n) once, stores those samples sparsely, and
-    lets waveletdiff compute
-
-        x'(t_n) = sum_k c_k phi'_k(t_n)
-
-    without differentiating the reconstructed signal.  The derivative samples
-    are obtained from a local cubic interpolant of each compactly supported
-    synthesis basis vector; this is bookkeeping on the basis functions, not a
-    finite-difference derivative of the data.
+def _wavelet_derivative_operator(N, dt, wavelet):
+    """Build the sparse operators that turn denoised samples into a derivative.
+
+    PyWavelets treats the input samples as the finest-level scaling coefficients,
+    so the denoised reconstruction x_hat represents the continuous interpolant
+    x(t) = sum_n a_n phi(t/dt - n), where phi is the wavelet's scaling function.
+    Sampling x and its analytic derivative on the grid t_m = m*dt gives two
+    convolutions against phi and phi' evaluated at *integers*:
+
+        x_hat[m] = sum_n a_n phi(m - n)            ->  x_hat = Phi @ a
+        x'(t_m)  = (1/dt) sum_n a_n phi'(m - n)    ->  x'    = Phi_prime @ a
+
+    so x' = Phi_prime @ Phi^-1 @ x_hat. This is the exact derivative of the
+    wavelet interpolant, with no spline or finite-difference approximation.
+
+    phi and phi' at the integers are the eigenvectors of the wavelet's refinement
+    (dilation) relation: differentiating phi(t) = sqrt2 * sum_k h_k phi(2t - k)
+    and evaluating at integers shows phi sampled at integers is the eigenvalue-1
+    eigenvector and phi' the eigenvalue-1/2 eigenvector of T[p,q] = sqrt2 * h_{2p-q}.
+    Normalizations come from reproduction of constants (sum phi(p) = 1) and of
+    linears (sum p*phi'(p) = -1, so the operator differentiates a ramp exactly).
+
+    :return: - **Phi** (csc_matrix) -- circulant samples of phi, to be inverted
+             - **Phi_prime** (csr_matrix) -- circulant samples of phi'/dt
     """
-    zero = np.zeros(N)
-    template = pywt.wavedec(zero, wavelet, level=level, mode=mode)
-    coeff_lengths = tuple(len(c) for c in template)
-    coeff_offsets = np.cumsum((0,) + coeff_lengths[:-1])
-    n_coeffs = sum(coeff_lengths)
-    t = np.arange(N, dtype=float) * dt
-
-    rows, cols, vals = [], [], []
-    eps = 1e-12
-
-    for band, (offset, length) in enumerate(zip(coeff_offsets, coeff_lengths)):
-        for local_idx in range(length):
-            coeffs = [np.zeros_like(c, dtype=float) for c in template]
-            coeffs[band][local_idx] = 1.0
-            basis = pywt.waverec(coeffs, wavelet, mode=mode)[:N]
-
-            # Basis functions are compactly supported, but boundary extension can
-            # split support across the two ends.  Differentiating only the active
-            # samples keeps the matrix sparse and avoids global sinusoidal bases.
-            active = np.flatnonzero(np.abs(basis) > eps)
-            if active.size == 0:
-                continue
-
-            # Include one-sample padding around active support so the cubic has
-            # enough context near the edges of the support.  If support wraps or
-            # covers most of the signal, fall back to all samples.
-            support = np.zeros(N, dtype=bool)
-            support[active] = True
-            support[np.maximum(active - 1, 0)] = True
-            support[np.minimum(active + 1, N - 1)] = True
-            idx = np.flatnonzero(support)
-            if idx.size < 4 or (idx[-1] - idx[0] + 1) > 2 * idx.size:
-                idx = np.arange(N)
-
-            # CubicSpline requires strictly increasing x and at least two points.
-            # With >=4 points the not-a-knot default is well-defined; with fewer,
-            # fall back to clamped end slopes of zero.
-            bc_type = 'not-a-knot' if idx.size >= 4 else ((1, 0.0), (1, 0.0))
-            spline = CubicSpline(t[idx], basis[idx], bc_type=bc_type, extrapolate=False)
-            deriv_vals = spline(t[idx], 1)
-            keep = np.isfinite(deriv_vals) & (np.abs(deriv_vals) > eps)
-
-            rows.extend(idx[keep])
-            cols.extend(np.full(np.count_nonzero(keep), offset + local_idx))
-            vals.extend(deriv_vals[keep])
-
-    return sparse.csr_matrix((vals, (rows, cols)), shape=(N, n_coeffs)), coeff_lengths
-
-
-def _flatten_wavelet_coeffs(coeffs):
-    """Stack a wavedec coefficient list into a 2-D coefficient matrix."""
-    return np.vstack([c for band in coeffs for c in band])
-
-
-def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='periodization'):
-    """Smooth and differentiate noisy data with a wavelet-basis derivative sum.
-
-    Decomposes x into wavelet approximation/detail coefficients, soft-thresholds
-    the detail coefficients to denoise, reconstructs a smoothed signal, and then
-    estimates the derivative directly from the denoised wavelet coefficients:
-
-        x(t_n)  = sum_k c_k phi_k(t_n)
-        x'(t_n) = sum_k c_k phi'_k(t_n)
-
-    The first sum is the ordinary inverse wavelet transform.  The second sum is
-    evaluated by precomputing sparse samples of the derivative of each synthesis
-    basis function and multiplying that sparse matrix by the denoised
-    coefficients.  This avoids the previous reconstruct-then-FFT derivative path
-    and does not call finite differences or np.gradient on the signal.
+    h = np.array(pywt.Wavelet(wavelet).rec_lo)   # reconstruction low-pass = refinement filter h_k
+    h = h / h.sum() * np.sqrt(2)                  # enforce sum(h) = sqrt2, i.e. integral of phi is 1
+    L = len(h)                                    # phi is supported on [0, L-1]; sample those integers
+    p = np.arange(L)
+
+    # Transition matrix T[p,q] = sqrt2 * h_{2p-q}; entries outside the filter are 0.
+    cols = 2 * p[:, None] - p[None, :]
+    T = np.where((cols >= 0) & (cols < L), np.sqrt(2) * h[np.clip(cols, 0, L - 1)], 0.0)
+
+    evals, evecs = np.linalg.eig(T)
+    phi = np.real(evecs[:, np.argmin(np.abs(evals - 1.0))])    # phi(p):  eigenvalue 1
+    dphi = np.real(evecs[:, np.argmin(np.abs(evals - 0.5))])   # phi'(p): eigenvalue 1/2
+    phi = phi / phi.sum()                                      # sum_p phi(p) = 1
+    dphi = dphi / np.dot(p, dphi) * (-1.0)                     # sum_p p*phi'(p) = -1
+
+    # Both kernels become circulant matrices under periodic boundaries; a common
+    # shift of both cancels in Phi_prime @ Phi^-1, so the offset choice is cosmetic.
+    def circulant(kernel):
+        rows, cols, vals = [], [], []
+        m = np.arange(N)
+        for offset, val in zip(p, kernel):
+            if abs(val) < 1e-12: continue
+            rows.extend(m); cols.extend((m - offset) % N); vals.extend([val] * N)
+        return sparse.csr_matrix((vals, (rows, cols)), shape=(N, N))
+
+    return circulant(phi).tocsc(), circulant(dphi / dt)
+
+
+def waveletdiff(x, dt, wavelet='db8', level=None, threshold=1.0, axis=0, mode='periodization'):
+    """Smooth and differentiate noisy data in a wavelet basis.
+
+    Three steps: (1) decompose x with the DWT and soft-threshold the detail
+    coefficients to denoise (Donoho-Johnstone universal threshold), reconstructing
+    a smoothed x_hat; (2) extend x_hat antisymmetrically so the periodic derivative
+    operator stays accurate at the edges; (3) recover the scaling coefficients of
+    x_hat and apply the analytic derivative of the wavelet basis to get the
+    derivative. The derivative operator differentiates the basis functions
+    themselves (see :func:`_wavelet_derivative_operator`) rather than
+    finite-differencing the signal, so it is exact for signals the basis can represent.
 
     Because the DWT requires uniform spacing, this method only accepts a scalar
     time step dt (not a vector of sample times). For non-uniformly sampled data,
@@ -233,81 +209,62 @@ def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='p
 
     :param np.array x: data to differentiate. May be multidimensional; see :code:`axis`.
     :param float dt: uniform time step between samples.
-    :param str wavelet: PyWavelets wavelet name, e.g. 'db4', 'sym4', 'coif2'.
+    :param str wavelet: PyWavelets wavelet name. Must have a differentiable scaling
+        function, so smoother wavelets give better derivatives: 'db8' (default) and
+        'sym8' are best for noisy data; 'db4', 'sym4', and 'coif2' also work well.
     :param int level: decomposition depth. None (default) resolves to
         min(pywt.dwt_max_level(N, wavelet), 5) to avoid over-decomposing short signals.
     :param float threshold: soft-thresholding scale factor in [0, inf).
     :param int axis: axis along which to differentiate (default 0).
-    :param str mode: PyWavelets signal extension mode passed to wavedec/waverec.
-        'periodization' keeps coefficient arrays compact; 'reflect' is often a
-        better choice for clearly non-periodic signals.
+    :param str mode: PyWavelets signal extension mode for the denoising transform.
+        'periodization' keeps coefficient arrays compact. The derivative operator is
+        periodic, so x_hat is antisymmetrically extended before it is applied (see below).
     :return: - **x_hat** (np.array) -- estimated (smoothed) x
              - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if not np.isscalar(dt):
-        raise ValueError(
-            "`dt` must be a scalar. The DWT requires uniformly sampled data. "
-            "For variable step sizes, use rbfdiff or splinediff instead."
-        )
+        raise ValueError("`dt` must be a scalar. The DWT requires uniformly sampled data. "
+            "For variable step sizes, use rbfdiff or splinediff instead.")
+
+    # The Haar scaling function is a step, so it has no pointwise derivative and the
+    # connection-coefficient operator below is undefined for it. Haar/db1 is the only
+    # orthonormal wavelet with a 2-tap filter, so dec_len identifies it.
+    if pywt.Wavelet(wavelet).dec_len == 2:
+        raise ValueError("The Haar/db1 wavelet has a discontinuous (piecewise-constant) scaling "
+            "function with no derivative, so it cannot be used to differentiate. Pick a smoother "
+            "wavelet such as 'db4', 'sym4', or 'coif2'.")
 
     N = x.shape[axis]
-    x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0))
-    shape = x_work.shape
-    x_flat = x_work.reshape(N, -1)
-    M = x_flat.shape[1]
+    x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0)) # differentiation axis to front
+    shape = x_work.shape                                   # remember it to restore the input's dimensionality
+    x_flat = x_work.reshape(N, -1)                         # rest of the dims flattened into columns
 
     if level is None:
-        max_level = pywt.dwt_max_level(N, wavelet)
-        level = min(max_level, 5)
-
-    # Decompose all columns; probe column 0 first to learn coefficient lengths
-    # and pre-allocate, reusing that result so we only pay N+1 wavedec calls.
-    _probe = pywt.wavedec(x_flat[:, 0], wavelet, level=level, mode=mode)
-    coeff_lengths = [len(c) for c in _probe]
-    n_levels = len(_probe)
-
-    coeffs_all = [
-        np.empty((coeff_lengths[i], M), dtype=x_flat.dtype)
-        for i in range(n_levels)
-    ]
-    for i, c in enumerate(_probe):
-        coeffs_all[i][:, 0] = c
-
-    for col in range(1, M):
-        for i, c in enumerate(
-            pywt.wavedec(x_flat[:, col], wavelet, level=level, mode=mode)
-        ):
-            coeffs_all[i][:, col] = c
-
-    # Robust noise estimate from finest details, then Donoho-Johnstone
-    # soft-thresholding on detail bands only.
-    sigma = np.median(np.abs(coeffs_all[-1]), axis=0) / 0.6745
-    np.maximum(sigma, 1e-10, out=sigma)
-    thresh = threshold * sigma * np.sqrt(2 * np.log(N))
-    coeffs_denoised = [coeffs_all[0]] + [
-        pywt.threshold(c, thresh[np.newaxis, :], mode='soft')
-        for c in coeffs_all[1:]
-    ]
-
-    Dphi, matrix_coeff_lengths = _wavelet_derivative_synthesis_matrix(
-        N, float(dt), wavelet, int(level), mode
-    )
-    if tuple(coeff_lengths) != tuple(matrix_coeff_lengths):
-        raise RuntimeError("Cached wavelet derivative matrix coefficient layout does not match wavedec output.")
-
-    x_hat_flat = np.empty_like(x_flat)
-    coeffs_flat = np.empty((sum(coeff_lengths), M), dtype=x_flat.dtype)
-    offsets = np.cumsum((0,) + tuple(coeff_lengths[:-1]))
-
-    for col in range(M):
-        col_coeffs = [coeffs_denoised[i][:, col] for i in range(n_levels)]
-        x_hat_flat[:, col] = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N]
-        for i, (offset, length) in enumerate(zip(offsets, coeff_lengths)):
-            coeffs_flat[offset:offset + length, col] = coeffs_denoised[i][:, col]
-
-    dxdt_hat_flat = Dphi @ coeffs_flat
-
-    x_hat = np.moveaxis(x_hat_flat.reshape(shape), 0, axis)
-    dxdt_hat = np.moveaxis(dxdt_hat_flat.reshape(shape), 0, axis)
+        level = min(pywt.dwt_max_level(N, wavelet), 5)
 
+    # 1. Denoise: DWT all columns at once, then soft-threshold the detail bands. The
+    # noise level is estimated robustly per column from the finest details (coeffs[-1]).
+    coeffs = pywt.wavedec(x_flat, wavelet, level=level, mode=mode, axis=0)
+    sigma = np.maximum(np.median(np.abs(coeffs[-1]), axis=0) / 0.6745, 1e-10)
+    thresh = threshold * sigma * np.sqrt(2 * np.log(N))
+    coeffs = [coeffs[0]] + [pywt.threshold(c, thresh[np.newaxis, :], mode='soft') for c in coeffs[1:]]
+    x_hat = pywt.waverec(coeffs, wavelet, mode=mode, axis=0)[:N]
+
+    # 2. The derivative operator is periodic, but x_hat usually isn't. Extend it
+    # antisymmetrically (reflect through each endpoint: x[-1-k] -> 2*x[0]-x[1+k]) so the
+    # periodic wrap is continuous in both value and slope, which keeps the derivative
+    # accurate at the edges instead of spiking there. This is the odd-symmetry analog of
+    # spectraldiff's even extension; a ramp extends to a ramp, so slopes survive exactly.
+    left = 2 * x_hat[0] - x_hat[1:][::-1]
+    right = 2 * x_hat[-1] - x_hat[:-1][::-1]
+    x_ext = np.concatenate([left, x_hat, right], axis=0)  # length 3N-2, original at [N-1:2N-1]
+
+    # 3. Differentiate the basis: recover the scaling coefficients a = Phi^-1 @ x_ext, then
+    # apply the analytic basis derivative dxdt = Phi_prime @ a, and crop back to the original.
+    Phi, Phi_prime = _wavelet_derivative_operator(x_ext.shape[0], float(dt), wavelet)
+    a = sparse.linalg.spsolve(Phi, x_ext)
+    dxdt_flat = (Phi_prime @ a.reshape(x_ext.shape[0], -1))[N - 1:2 * N - 1]
+
+    x_hat = np.moveaxis(x_hat.reshape(shape), 0, axis)
+    dxdt_hat = np.moveaxis(dxdt_flat.reshape(shape), 0, axis)
     return x_hat, dxdt_hat
diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py
index ebe9c69..cb9abce 100644
--- a/pynumdiff/tests/test_diff_methods.py
+++ b/pynumdiff/tests/test_diff_methods.py
@@ -51,7 +51,7 @@ def polydiff_irreg_step(*args, **kwargs): return polydiff(*args, **kwargs)
     (spline_irreg_step, {'degree':5, 's':2}),
     (spectraldiff, {'high_freq_cutoff':0.2}), (spectraldiff, [0.2]),
     (rbfdiff, {'sigma':0.5, 'lmbd':0.001}),
-    (waveletdiff, {'wavelet':'db4', 'threshold':1.0}),
+    (waveletdiff, {'wavelet':'db8', 'threshold':1.0}),
     (constant_velocity, {'r':1e-2, 'q':1e3}), (constant_velocity, [1e-2, 1e3]),
     (constant_acceleration, {'r':1e-3, 'q':1e4}), (constant_acceleration, [1e-3, 1e4]),
     (constant_jerk, {'r':1e-4, 'q':1e5}), (constant_jerk, [1e-4, 1e5]),
@@ -174,12 +174,12 @@ def polydiff_irreg_step(*args, **kwargs): return polydiff(*args, **kwargs)
               [(-2, -2), (0, 0), (0, -1), (0, 0)],
               [(0, 0), (2, 2), (0, 0), (2, 2)],
               [(1, 1), (3, 3), (1, 1), (3, 3)]],
-    waveletdiff: [[(-14, -15), (-14, -14), (-1, -1), (0, 0)],
-                  [(-9, -9), (-8, -8), (0, 0), (1, 1)],
-                  [(-9, -9), (0, 0), (0, 0), (1, 1)],
-                  [(-1, -1), (0, 0), (0, 0), (1, 1)],
-                  [(1, 0), (2, 2), (1, 1), (2, 2)],
-                  [(0, 0), (3, 3), (1, 0), (3, 3)]], 
+    waveletdiff: [[(-15, -15), (-13, -14), (0, -1), (1, 0)],
+                  [(-2, -2), (-1, -1), (0, 0), (1, 1)],
+                  [(-2, -2), (-1, -1), (0, 0), (1, 1)],
+                  [(-3, -3), (-1, -1), (0, 0), (1, 1)],
+                  [(0, -1), (2, 2), (0, 0), (2, 2)],
+                  [(0, -1), (3, 3), (0, 0), (3, 3)]],
     velocity: [[(-25, -25), (-18, -19), (0, -1), (1, 0)],
                [(-12, -12), (-11, -12), (-1, -1), (-1, -2)],
                [(0, -1), (1, 0), (0, -1), (1, 0)],
@@ -334,7 +334,7 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re
     (finitediff, {}),
     (polydiff, {'degree': 2, 'window_size': 5}),
     (savgoldiff, {'degree': 3, 'window_size': 11, 'smoothing_win': 3}),
-    (waveletdiff, {'wavelet': 'db4', 'threshold': 1.0}),
+    (waveletdiff, {'wavelet': 'db8', 'threshold': 1.0}),
     (rtsdiff, {'order':2, 'log_qr_ratio':7, 'forwardbackward':True}),
     (spectraldiff, {'high_freq_cutoff': 0.25, 'pad_to_zero_dxdt': False}),
     (rbfdiff, {'sigma': 0.5, 'lmbd': 1e-6}),
@@ -351,7 +351,7 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re
     kerneldiff: [(2, 1), (3, 2)],
     butterdiff: [(0, -1), (1, -1)],
     finitediff: [(0, -1), (1, -1)],
-    waveletdiff: [(1, 0), (2, 1)],
+    waveletdiff: [(1, 0), (2, 2)],
     polydiff: [(1, -1), (1, 0)],
     savgoldiff: [(0, -1), (1, 1)],
     rtsdiff: [(1, -1), (1, 0)],

From 142ebe4673a4d119dbcd84120d03bdce999be9d3 Mon Sep 17 00:00:00 2001
From: pavelkomarov <pvlkmrv@gmail.com>
Date: Fri, 12 Jun 2026 12:20:36 -0700
Subject: [PATCH 09/11] Loosen near-machine-precision wavelet derivative bound
 for CI portability

The clean constant-derivative Linf error lands at ~1.2e-14 on Linux vs
<=1e-14 on macOS; give that one bound a decade of headroom.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 pynumdiff/tests/test_diff_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py
index cb9abce..29470df 100644
--- a/pynumdiff/tests/test_diff_methods.py
+++ b/pynumdiff/tests/test_diff_methods.py
@@ -174,7 +174,7 @@ def polydiff_irreg_step(*args, **kwargs): return polydiff(*args, **kwargs)
               [(-2, -2), (0, 0), (0, -1), (0, 0)],
               [(0, 0), (2, 2), (0, 0), (2, 2)],
               [(1, 1), (3, 3), (1, 1), (3, 3)]],
-    waveletdiff: [[(-15, -15), (-13, -14), (0, -1), (1, 0)],
+    waveletdiff: [[(-15, -15), (-13, -13), (0, -1), (1, 0)],
                   [(-2, -2), (-1, -1), (0, 0), (1, 1)],
                   [(-2, -2), (-1, -1), (0, 0), (1, 1)],
                   [(-3, -3), (-1, -1), (0, 0), (1, 1)],

From c8003a4a7b71b4352ce3079bcd1f3a21ac45c425 Mon Sep 17 00:00:00 2001
From: pavelkomarov <pvlkmrv@gmail.com>
Date: Fri, 12 Jun 2026 12:31:56 -0700
Subject: [PATCH 10/11] Drop lru_cache on wavelet derivative operator

The operator is built once per waveletdiff call and applied to all columns
at once via the vectorized spsolve, so within-call reuse is already free.
The cache only helped across separate same-length calls, a marginal win that
didn't justify retaining up to 32 sparse-matrix pairs in memory.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 pynumdiff/basis_fit.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py
index 23e48b0..5c06357 100644
--- a/pynumdiff/basis_fit.py
+++ b/pynumdiff/basis_fit.py
@@ -1,5 +1,4 @@
 """Methods based on fitting basis functions to data"""
-from functools import lru_cache
 from warnings import warn
 import numpy as np
 from scipy import sparse
@@ -137,10 +136,12 @@ def rbfdiff(x, dt_or_t, sigma=1, lmbd=0.01, axis=0):
     return np.moveaxis(x_hat_flattened.reshape(plump), 0, axis), np.moveaxis(dxdt_hat_flattened.reshape(plump), 0, axis)
 
 
-@lru_cache(maxsize=32)
 def _wavelet_derivative_operator(N, dt, wavelet):
     """Build the sparse operators that turn denoised samples into a derivative.
 
+    Depends only on the grid (N, dt) and the wavelet, not on the data, and is
+    built once per waveletdiff call then applied to every column at once.
+
     PyWavelets treats the input samples as the finest-level scaling coefficients,
     so the denoised reconstruction x_hat represents the continuous interpolant
     x(t) = sum_n a_n phi(t/dt - n), where phi is the wavelet's scaling function.

From 8e8d46e1c1893726aedfbd857c15c1fdc33c91ee Mon Sep 17 00:00:00 2001
From: pavelkomarov <pvlkmrv@gmail.com>
Date: Fri, 12 Jun 2026 12:35:44 -0700
Subject: [PATCH 11/11] Inline the wavelet derivative operator into waveletdiff

Fold the private _wavelet_derivative_operator into waveletdiff as a
standalone block, mirroring how rbfdiff builds its basis and derivative
matrices in one loop. No other caller needs it, and the connection-coefficient
math now lives in waveletdiff's own docstring.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 pynumdiff/basis_fit.py | 99 +++++++++++++++---------------------------
 1 file changed, 36 insertions(+), 63 deletions(-)

diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py
index 5c06357..f4764f4 100644
--- a/pynumdiff/basis_fit.py
+++ b/pynumdiff/basis_fit.py
@@ -136,73 +136,27 @@ def rbfdiff(x, dt_or_t, sigma=1, lmbd=0.01, axis=0):
     return np.moveaxis(x_hat_flattened.reshape(plump), 0, axis), np.moveaxis(dxdt_hat_flattened.reshape(plump), 0, axis)
 
 
-def _wavelet_derivative_operator(N, dt, wavelet):
-    """Build the sparse operators that turn denoised samples into a derivative.
-
-    Depends only on the grid (N, dt) and the wavelet, not on the data, and is
-    built once per waveletdiff call then applied to every column at once.
-
-    PyWavelets treats the input samples as the finest-level scaling coefficients,
-    so the denoised reconstruction x_hat represents the continuous interpolant
-    x(t) = sum_n a_n phi(t/dt - n), where phi is the wavelet's scaling function.
-    Sampling x and its analytic derivative on the grid t_m = m*dt gives two
-    convolutions against phi and phi' evaluated at *integers*:
-
-        x_hat[m] = sum_n a_n phi(m - n)            ->  x_hat = Phi @ a
-        x'(t_m)  = (1/dt) sum_n a_n phi'(m - n)    ->  x'    = Phi_prime @ a
-
-    so x' = Phi_prime @ Phi^-1 @ x_hat. This is the exact derivative of the
-    wavelet interpolant, with no spline or finite-difference approximation.
-
-    phi and phi' at the integers are the eigenvectors of the wavelet's refinement
-    (dilation) relation: differentiating phi(t) = sqrt2 * sum_k h_k phi(2t - k)
-    and evaluating at integers shows phi sampled at integers is the eigenvalue-1
-    eigenvector and phi' the eigenvalue-1/2 eigenvector of T[p,q] = sqrt2 * h_{2p-q}.
-    Normalizations come from reproduction of constants (sum phi(p) = 1) and of
-    linears (sum p*phi'(p) = -1, so the operator differentiates a ramp exactly).
-
-    :return: - **Phi** (csc_matrix) -- circulant samples of phi, to be inverted
-             - **Phi_prime** (csr_matrix) -- circulant samples of phi'/dt
-    """
-    h = np.array(pywt.Wavelet(wavelet).rec_lo)   # reconstruction low-pass = refinement filter h_k
-    h = h / h.sum() * np.sqrt(2)                  # enforce sum(h) = sqrt2, i.e. integral of phi is 1
-    L = len(h)                                    # phi is supported on [0, L-1]; sample those integers
-    p = np.arange(L)
-
-    # Transition matrix T[p,q] = sqrt2 * h_{2p-q}; entries outside the filter are 0.
-    cols = 2 * p[:, None] - p[None, :]
-    T = np.where((cols >= 0) & (cols < L), np.sqrt(2) * h[np.clip(cols, 0, L - 1)], 0.0)
-
-    evals, evecs = np.linalg.eig(T)
-    phi = np.real(evecs[:, np.argmin(np.abs(evals - 1.0))])    # phi(p):  eigenvalue 1
-    dphi = np.real(evecs[:, np.argmin(np.abs(evals - 0.5))])   # phi'(p): eigenvalue 1/2
-    phi = phi / phi.sum()                                      # sum_p phi(p) = 1
-    dphi = dphi / np.dot(p, dphi) * (-1.0)                     # sum_p p*phi'(p) = -1
-
-    # Both kernels become circulant matrices under periodic boundaries; a common
-    # shift of both cancels in Phi_prime @ Phi^-1, so the offset choice is cosmetic.
-    def circulant(kernel):
-        rows, cols, vals = [], [], []
-        m = np.arange(N)
-        for offset, val in zip(p, kernel):
-            if abs(val) < 1e-12: continue
-            rows.extend(m); cols.extend((m - offset) % N); vals.extend([val] * N)
-        return sparse.csr_matrix((vals, (rows, cols)), shape=(N, N))
-
-    return circulant(phi).tocsc(), circulant(dphi / dt)
-
-
 def waveletdiff(x, dt, wavelet='db8', level=None, threshold=1.0, axis=0, mode='periodization'):
     """Smooth and differentiate noisy data in a wavelet basis.
 
     Three steps: (1) decompose x with the DWT and soft-threshold the detail
     coefficients to denoise (Donoho-Johnstone universal threshold), reconstructing
     a smoothed x_hat; (2) extend x_hat antisymmetrically so the periodic derivative
-    operator stays accurate at the edges; (3) recover the scaling coefficients of
-    x_hat and apply the analytic derivative of the wavelet basis to get the
-    derivative. The derivative operator differentiates the basis functions
-    themselves (see :func:`_wavelet_derivative_operator`) rather than
-    finite-differencing the signal, so it is exact for signals the basis can represent.
+    operator stays accurate at the edges; (3) recover the wavelet scaling
+    coefficients of x_hat and apply the analytic derivative of the wavelet basis.
+
+    The derivative differentiates the basis functions themselves rather than
+    finite-differencing the signal. PyWavelets treats the samples as finest-level
+    scaling coefficients, so x_hat is the interpolant x(t) = sum_n a_n phi(t/dt - n)
+    for the scaling function phi. Sampling x and its analytic derivative on the grid
+    gives two convolutions against phi and phi' evaluated at *integers*,
+
+        x_hat = Phi @ a     and     x' = Phi_prime @ a,
+
+    so x' = Phi_prime @ Phi^-1 @ x_hat, exact for signals the basis can represent.
+    The integer samples phi(p), phi'(p) are the eigenvalue-1 and eigenvalue-1/2
+    eigenvectors of the refinement relation phi(t) = sqrt2 sum_k h_k phi(2t - k)
+    (the "connection coefficients"), normalized to reproduce constants and ramps.
 
     Because the DWT requires uniform spacing, this method only accepts a scalar
     time step dt (not a vector of sample times). For non-uniformly sampled data,
@@ -239,6 +193,26 @@ def waveletdiff(x, dt, wavelet='db8', level=None, threshold=1.0, axis=0, mode='p
     x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0)) # differentiation axis to front
     shape = x_work.shape                                   # remember it to restore the input's dimensionality
     x_flat = x_work.reshape(N, -1)                         # rest of the dims flattened into columns
+    Ne = 3 * N - 2                                         # length after the antisymmetric extension in step 2
+
+    # Build the wavelet-basis derivative operator (depends only on the grid and wavelet).
+    # Sampling the refinement relation phi(t) = sqrt2 sum_k h_k phi(2t - k) at integers makes
+    # phi(p) the eigenvalue-1 and phi'(p) the eigenvalue-1/2 eigenvector of T[p,q] = sqrt2 h_{2p-q}.
+    h = np.array(pywt.Wavelet(wavelet).rec_lo); h = h / h.sum() * np.sqrt(2) # refinement filter, integral of phi = 1
+    L = len(h); p = np.arange(L)                            # phi is supported on the integers [0, L-1]
+    shift = 2 * p[:, None] - p[None, :]
+    T = np.where((shift >= 0) & (shift < L), np.sqrt(2) * h[np.clip(shift, 0, L - 1)], 0.0)
+    evals, evecs = np.linalg.eig(T)
+    phi = np.real(evecs[:, np.argmin(np.abs(evals - 1.0))]); phi /= phi.sum()           # sum_p phi(p) = 1
+    dphi = np.real(evecs[:, np.argmin(np.abs(evals - 0.5))]); dphi /= np.dot(p, dphi)*-1 # sum_p p*phi'(p) = -1
+    # Phi and Phi_prime hold circulant samples of phi and phi'/dt on the extended grid; both
+    # share a common shift that cancels in Phi_prime @ Phi^-1, so the offset choice is cosmetic.
+    rows, cols, phi_vals, dphi_vals = [], [], [], []
+    m = np.arange(Ne)
+    for offset, phi_p, dphi_p in zip(p, phi, dphi / dt):
+        rows.extend(m); cols.extend((m - offset) % Ne); phi_vals.extend([phi_p]*Ne); dphi_vals.extend([dphi_p]*Ne)
+    Phi = sparse.csr_matrix((phi_vals, (rows, cols)), shape=(Ne, Ne)).tocsc()           # to invert
+    Phi_prime = sparse.csr_matrix((dphi_vals, (rows, cols)), shape=(Ne, Ne))            # to apply
 
     if level is None:
         level = min(pywt.dwt_max_level(N, wavelet), 5)
@@ -262,9 +236,8 @@ def waveletdiff(x, dt, wavelet='db8', level=None, threshold=1.0, axis=0, mode='p
 
     # 3. Differentiate the basis: recover the scaling coefficients a = Phi^-1 @ x_ext, then
     # apply the analytic basis derivative dxdt = Phi_prime @ a, and crop back to the original.
-    Phi, Phi_prime = _wavelet_derivative_operator(x_ext.shape[0], float(dt), wavelet)
     a = sparse.linalg.spsolve(Phi, x_ext)
-    dxdt_flat = (Phi_prime @ a.reshape(x_ext.shape[0], -1))[N - 1:2 * N - 1]
+    dxdt_flat = (Phi_prime @ a.reshape(Ne, -1))[N - 1:2 * N - 1]
 
     x_hat = np.moveaxis(x_hat.reshape(shape), 0, axis)
     dxdt_hat = np.moveaxis(dxdt_flat.reshape(shape), 0, axis)