From 0086c1dfb61120b8d42129d6f38c032f548af66b Mon Sep 17 00:00:00 2001 From: Maria Protogerou Date: Fri, 17 Apr 2026 13:58:51 -0700 Subject: [PATCH 01/11] Add waveletdiff: wavelet-based denoising differentiator - Implements Donoho & Johnstone (1994) universal threshold estimator - Vectorised thresholding over multi-column inputs - Supports variable step size via np.gradient - Supports arbitrary axis via ascontiguousarray + moveaxis - Adds pywt dependency --- pynumdiff/__init__.py | 2 +- pynumdiff/basis_fit.py | 123 +++++++++++++++++++++++++++ pynumdiff/tests/test_diff_methods.py | 18 +++- 3 files changed, 138 insertions(+), 5 deletions(-) diff --git a/pynumdiff/__init__.py b/pynumdiff/__init__.py index 7a3c465..2e33073 100644 --- a/pynumdiff/__init__.py +++ b/pynumdiff/__init__.py @@ -15,6 +15,6 @@ from .finite_difference import finitediff, first_order, second_order, fourth_order from .smooth_finite_difference import kerneldiff, meandiff, mediandiff, gaussiandiff, friedrichsdiff, butterdiff from .polynomial_fit import splinediff, polydiff, savgoldiff -from .basis_fit import spectraldiff, rbfdiff +from .basis_fit import spectraldiff, rbfdiff, waveletdiff from .total_variation_regularization import iterative_velocity from .kalman_smooth import kalman_filter, rts_smooth, rtsdiff, constant_velocity, constant_acceleration, constant_jerk diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py index c43aff0..10337c3 100644 --- a/pynumdiff/basis_fit.py +++ b/pynumdiff/basis_fit.py @@ -2,6 +2,7 @@ from warnings import warn import numpy as np from scipy import sparse +import pywt from pynumdiff.utils import utility @@ -137,3 +138,125 @@ def rbfdiff(x, dt_or_t, sigma=1, lmbd=0.01): alpha = sparse.linalg.spsolve(rbf_regularized, x) # solve sparse system targeting the noisy data, O(N sigma^2) return rbf @ alpha, drbfdt @ alpha # find samples of reconstructions using the smooth bases + + +def waveletdiff(x, dt_or_t, wavelet='db4', level=None, threshold=1.0, axis=0, mode='periodization'): + """Smooth and differentiate noisy data via discrete wavelet denoising. + + Decomposes x into wavelet detail and approximation coefficients, soft-thresholds + the detail coefficients to remove noise using the Donoho & Johnstone (1994) + universal threshold estimator, reconstructs a smoothed signal, then + differentiates with finite differences via np.gradient. + + :param np.array x: data to differentiate + :param float or array dt_or_t: scalar dt or array of sample times. If an + array is provided it is passed directly to np.gradient, giving correct + results for non-uniformly sampled data. + :param str wavelet: PyWavelets wavelet name, e.g. 'db4', 'sym4', 'coif2'. + 'db4' is a solid general-purpose default. Biorthogonal wavelets such as + 'bior2.2' or 'bior4.4' are symmetric and designed for smooth reconstruction + but may need a lower threshold value. + :param int level: decomposition depth. None (default) resolves to + min(pywt.dwt_max_level(N, wavelet), 5) to avoid over-decomposing short + signals. Increase for heavily oversampled data. + :param float threshold: soft-thresholding scale factor in [0, inf). + Multiplies the universal threshold sigma * sqrt(2 * log(N)). + threshold=1.0 is the classical Donoho & Johnstone universal threshold + and is the recommended starting point. Values < 1.0 give less smoothing; + values > 1.0 give more aggressive smoothing. This parameter maps onto + tvgamma in the pynumdiff.optimize framework. + :param int axis: axis along which to differentiate (default 0). + :param str mode: PyWavelets signal extension mode passed to wavedec/waverec. + 'periodization' (default) keeps coefficient arrays exactly length N and + is the most numerically stable choice for differentiation. 'reflect' is + a good alternative for clearly non-periodic signals. + See pywt.Modes.modes for all options. + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x + """ + N = x.shape[axis] + + # Axis normalisation — bring target axis to front. + # Skip moveaxis when axis is already 0 to avoid an unnecessary allocation. + # When we do move, call ascontiguousarray immediately so the subsequent + # reshape is guaranteed zero-copy. + if axis == 0: + x_work = x if x.flags['C_CONTIGUOUS'] else np.ascontiguousarray(x) + else: + x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0)) + + shape = x_work.shape + x_flat = x_work.reshape(N, -1) # (N, M) contiguous, no hidden copy + M = x_flat.shape[1] + + if np.isscalar(dt_or_t): + grad_arg = dt_or_t + else: + if len(dt_or_t) != N: + raise ValueError( + "`dt_or_t` array must have the same length as x along `axis`." + ) + grad_arg = dt_or_t # np.gradient accepts a full coordinate array + + # Conservative level default avoids over-decomposing short signals + # (pywt default uses the maximum possible level). + if level is None: + max_level = pywt.dwt_max_level(N, wavelet) + level = min(max_level, 5) + + # Decompose all columns and stack coefficients into 2-D arrays of shape + # (coeff_len_i, M). Probing column 0 first lets us pre-allocate correctly; + # the probe result is reused for col 0 so we pay N+1 wavedec calls total. + _probe = pywt.wavedec(x_flat[:, 0], wavelet, level=level, mode=mode) + coeff_lengths = [len(c) for c in _probe] + n_levels = len(_probe) + + coeffs_all = [ + np.empty((coeff_lengths[i], M), dtype=x_flat.dtype) + for i in range(n_levels) + ] + for i, c in enumerate(_probe): + coeffs_all[i][:, 0] = c + + for col in range(1, M): + for i, c in enumerate( + pywt.wavedec(x_flat[:, col], wavelet, level=level, mode=mode) + ): + coeffs_all[i][:, col] = c + + # Vectorised noise estimation and soft-thresholding over all columns at once. + # sigma: robust MAD estimator from finest detail level, shape (M,). + # thresh: per-column universal threshold, shape (M,). + # Approximation coefficients (index 0) are left untouched; only detail + # levels (indices 1..n_levels-1) are thresholded. + sigma = np.median(np.abs(coeffs_all[-1]), axis=0) / 0.6745 + np.maximum(sigma, 1e-10, out=sigma) # floor avoids zero threshold on clean signals + + thresh = threshold * sigma * np.sqrt(2 * np.log(N)) # shape (M,) + + coeffs_denoised = [coeffs_all[0]] + [ + pywt.threshold(c, thresh[np.newaxis, :], mode='soft') + for c in coeffs_all[1:] + ] + + # Reconstruct and differentiate — pywt.waverec is 1-D only so a column + # loop remains, but all Python-level arithmetic has been moved out above. + x_hat_flat = np.empty_like(x_flat) + dxdt_hat_flat = np.empty_like(x_flat) + + for col in range(M): + col_coeffs = [coeffs_denoised[i][:, col] for i in range(n_levels)] + x_hat_col = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N] + x_hat_flat[:, col] = x_hat_col + dxdt_hat_flat[:, col] = np.gradient(x_hat_col, grad_arg) + + # Restore original shape and axis order. + # moveaxis on the way out is only needed when we moved on the way in. + x_hat = x_hat_flat.reshape(shape) + dxdt_hat = dxdt_hat_flat.reshape(shape) + + if axis != 0: + x_hat = np.moveaxis(x_hat, 0, axis) + dxdt_hat = np.moveaxis(dxdt_hat, 0, axis) + + return x_hat, dxdt_hat diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py index 7d9eda6..a5429b1 100644 --- a/pynumdiff/tests/test_diff_methods.py +++ b/pynumdiff/tests/test_diff_methods.py @@ -5,7 +5,7 @@ from ..smooth_finite_difference import kerneldiff, mediandiff, meandiff, gaussiandiff, friedrichsdiff, butterdiff from ..finite_difference import finitediff, first_order, second_order, fourth_order from ..polynomial_fit import polydiff, savgoldiff, splinediff -from ..basis_fit import spectraldiff, rbfdiff +from ..basis_fit import spectraldiff, rbfdiff, waveletdiff from ..total_variation_regularization import velocity, acceleration, jerk, iterative_velocity, smooth_acceleration from ..kalman_smooth import rtsdiff, constant_velocity, constant_acceleration, constant_jerk, robustdiff from ..linear_model import lineardiff @@ -47,6 +47,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) (spline_irreg_step, {'degree':5, 's':2}), (spectraldiff, {'high_freq_cutoff':0.2}), (spectraldiff, [0.2]), (rbfdiff, {'sigma':0.5, 'lmbd':0.001}), + (waveletdiff, {'wavelet':'db4', 'threshold':1.0}), + (waveletdiff, {'wavelet':'db4', 'threshold':1.0}), (constant_velocity, {'r':1e-2, 'q':1e3}), (constant_velocity, [1e-2, 1e3]), (constant_acceleration, {'r':1e-3, 'q':1e4}), (constant_acceleration, [1e-3, 1e4]), (constant_jerk, {'r':1e-4, 'q':1e5}), (constant_jerk, [1e-4, 1e5]), @@ -162,6 +164,12 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(-2, -2), (0, 0), (0, -1), (0, 0)], [(0, 0), (2, 2), (0, 0), (2, 2)], [(1, 1), (3, 3), (1, 1), (3, 3)]], + waveletdiff: [[(-14, -15), (-14, -14), (-1, -1), (0, 0)], + [(-9, -9), (-8, -8), (0, 0), (1, 1)], + [(-9, -9), (0, 0), (0, 0), (1, 1)], + [(-1, -1), (0, 0), (0, 0), (1, 1)], + [(1, 0), (2, 2), (1, 1), (2, 2)], + [(0, 0), (3, 3), (1, 0), (3, 3)]], velocity: [[(-25, -25), (-18, -19), (0, -1), (1, 0)], [(-12, -12), (-11, -12), (-1, -1), (-1, -2)], [(0, -1), (1, 0), (0, -1), (1, 0)], @@ -308,7 +316,8 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re (kerneldiff, {'kernel': 'gaussian', 'window_size': 5}), (butterdiff, {'filter_order': 3, 'cutoff_freq': 1 - 1e-6}), (finitediff, {}), - (savgoldiff, {'degree': 3, 'window_size': 11, 'smoothing_win': 3}) + (savgoldiff, {'degree': 3, 'window_size': 11, 'smoothing_win': 3}), + (waveletdiff, {'wavelet': 'db4', 'threshold': 1.0}), ] # Similar to the error_bounds table, index by method first. But then we test against only one 2D function, @@ -319,7 +328,8 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re kerneldiff: [(2, 1), (3, 2)], butterdiff: [(0, -1), (1, -1)], finitediff: [(0, -1), (1, -1)], - savgoldiff: [(0, -1), (1, 1)] + savgoldiff: [(0, -1), (1, 1)], + waveletdiff: [(1, 0), (2, 1)], } @mark.parametrize("multidim_method_and_params", multidim_methods_and_params) @@ -372,4 +382,4 @@ def test_multidimensionality(multidim_method_and_params, request): ax2.plot_wireframe(T1, T2, computed_d2) ax3.plot_wireframe(T1, T2, computed_laplacian, label='computed') legend = ax3.legend(bbox_to_anchor=(0.7, 0.8)); legend.legend_handles[0].set_facecolor(pyplot.cm.viridis(0.6)) - fig.suptitle(f'{diff_method.__name__}', fontsize=16) + fig.suptitle(f'{diff_method.__name__}', fontsize=16) \ No newline at end of file From 8431f6df9dbdd7f8158a51d27c535f5347ccbedb Mon Sep 17 00:00:00 2001 From: Maria Protogerou Date: Fri, 17 Apr 2026 14:51:27 -0700 Subject: [PATCH 02/11] resolved another merge conflict --- pynumdiff/tests/test_diff_methods.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py index 7bcd75e..e06e8e4 100644 --- a/pynumdiff/tests/test_diff_methods.py +++ b/pynumdiff/tests/test_diff_methods.py @@ -5,13 +5,8 @@ from ..smooth_finite_difference import kerneldiff, mediandiff, meandiff, gaussiandiff, friedrichsdiff, butterdiff from ..finite_difference import finitediff, first_order, second_order, fourth_order from ..polynomial_fit import polydiff, savgoldiff, splinediff -<<<<<<< HEAD from ..basis_fit import spectraldiff, rbfdiff, waveletdiff -from ..total_variation_regularization import velocity, acceleration, jerk, iterative_velocity, smooth_acceleration -======= -from ..basis_fit import spectraldiff, rbfdiff from ..total_variation_regularization import velocity, acceleration, jerk, iterative_velocity, smooth_acceleration, tvrdiff ->>>>>>> b38199f982cb4036065f599b3fe00f6076671a6a from ..kalman_smooth import rtsdiff, constant_velocity, constant_acceleration, constant_jerk, robustdiff from ..linear_model import lineardiff # Function aliases for testing cases where parameters change the behavior in a big way, so error limits can be indexed in dict From f574f2fbf56cf4edbdfed4330a10351a53ba0260 Mon Sep 17 00:00:00 2001 From: Maria Protogerou Date: Fri, 1 May 2026 02:03:47 -0700 Subject: [PATCH 03/11] made all changes requested --- pynumdiff/basis_fit.py | 116 ++++++++++++++++----------- pynumdiff/tests/test_diff_methods.py | 14 +--- 2 files changed, 71 insertions(+), 59 deletions(-) diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py index 3f2c5d0..6910b0a 100644 --- a/pynumdiff/basis_fit.py +++ b/pynumdiff/basis_fit.py @@ -136,18 +136,21 @@ def rbfdiff(x, dt_or_t, sigma=1, lmbd=0.01, axis=0): return np.moveaxis(x_hat_flattened.reshape(plump), 0, axis), np.moveaxis(dxdt_hat_flattened.reshape(plump), 0, axis) -def waveletdiff(x, dt_or_t, wavelet='db4', level=None, threshold=1.0, axis=0, mode='periodization'): +def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='periodization'): """Smooth and differentiate noisy data via discrete wavelet denoising. Decomposes x into wavelet detail and approximation coefficients, soft-thresholds the detail coefficients to remove noise using the Donoho & Johnstone (1994) universal threshold estimator, reconstructs a smoothed signal, then - differentiates with finite differences via np.gradient. + differentiates analytically by applying derivative reconstruction filters to + the denoised wavelet coefficients. - :param np.array x: data to differentiate - :param float or array dt_or_t: scalar dt or array of sample times. If an - array is provided it is passed directly to np.gradient, giving correct - results for non-uniformly sampled data. + Because the DWT requires uniform spacing, this method only accepts a scalar + time step dt (not a vector of sample times). For non-uniformly sampled data, + use :func:`rbfdiff` or :func:`splinediff` instead. + + :param np.array x: data to differentiate. May be multidimensional; see :code:`axis`. + :param float dt: uniform time step between samples. :param str wavelet: PyWavelets wavelet name, e.g. 'db4', 'sym4', 'coif2'. 'db4' is a solid general-purpose default. Biorthogonal wavelets such as 'bior2.2' or 'bior4.4' are symmetric and designed for smooth reconstruction @@ -170,39 +173,32 @@ def waveletdiff(x, dt_or_t, wavelet='db4', level=None, threshold=1.0, axis=0, mo :return: - **x_hat** (np.array) -- estimated (smoothed) x - **dxdt_hat** (np.array) -- estimated derivative of x """ - N = x.shape[axis] + if not np.isscalar(dt): + raise ValueError( + "`dt` must be a scalar. The DWT requires uniformly sampled data. " + "For variable step sizes, use rbfdiff or splinediff instead." + ) - # Axis normalisation — bring target axis to front. - # Skip moveaxis when axis is already 0 to avoid an unnecessary allocation. - # When we do move, call ascontiguousarray immediately so the subsequent - # reshape is guaranteed zero-copy. - if axis == 0: - x_work = x if x.flags['C_CONTIGUOUS'] else np.ascontiguousarray(x) - else: - x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0)) + N = x.shape[axis] + # Bring axis of differentiation to front so each column of x_flat is one + # signal to differentiate. moveaxis returns a view with updated strides, + # so ascontiguousarray ensures the subsequent reshape is zero-copy. + x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0)) shape = x_work.shape - x_flat = x_work.reshape(N, -1) # (N, M) contiguous, no hidden copy + x_flat = x_work.reshape(N, -1) # (N, M) M = x_flat.shape[1] - if np.isscalar(dt_or_t): - grad_arg = dt_or_t - else: - if len(dt_or_t) != N: - raise ValueError( - "`dt_or_t` array must have the same length as x along `axis`." - ) - grad_arg = dt_or_t # np.gradient accepts a full coordinate array - - # Conservative level default avoids over-decomposing short signals - # (pywt default uses the maximum possible level). + # Conservative level cap: pywt's default uses the maximum possible level, + # which can over-decompose short signals and wash out meaningful detail. + # Capping at 5 keeps at least 2^5 = 32 samples in the coarsest subband, + # which is enough to represent a smooth approximation without artefacts. if level is None: max_level = pywt.dwt_max_level(N, wavelet) level = min(max_level, 5) - # Decompose all columns and stack coefficients into 2-D arrays of shape - # (coeff_len_i, M). Probing column 0 first lets us pre-allocate correctly; - # the probe result is reused for col 0 so we pay N+1 wavedec calls total. + # Decompose all columns; probe column 0 first to learn coefficient lengths + # and pre-allocate, reusing that result so we only pay N+1 wavedec calls. _probe = pywt.wavedec(x_flat[:, 0], wavelet, level=level, mode=mode) coeff_lengths = [len(c) for c in _probe] n_levels = len(_probe) @@ -221,10 +217,19 @@ def waveletdiff(x, dt_or_t, wavelet='db4', level=None, threshold=1.0, axis=0, mo coeffs_all[i][:, col] = c # Vectorised noise estimation and soft-thresholding over all columns at once. - # sigma: robust MAD estimator from finest detail level, shape (M,). - # thresh: per-column universal threshold, shape (M,). - # Approximation coefficients (index 0) are left untouched; only detail - # levels (indices 1..n_levels-1) are thresholded. + # + # Soft-thresholding achieves smoothing by shrinking wavelet detail + # coefficients toward zero: coefficients whose magnitude is below the + # threshold (mostly noise) are zeroed out, while large coefficients (true + # signal features) are kept but reduced by the threshold amount. Only detail + # levels (indices 1..n_levels-1) are thresholded; the coarse approximation + # coefficients (index 0) are left untouched. + # + # sigma: robust noise-level estimate via the median absolute deviation of + # the finest detail level. Dividing by 0.6745 converts MAD to an + # estimate of the Gaussian standard deviation. + # thresh: per-column Donoho & Johnstone (1994) universal threshold, + # sigma * sqrt(2 * log(N)), scaled by the user-supplied `threshold`. sigma = np.median(np.abs(coeffs_all[-1]), axis=0) / 0.6745 np.maximum(sigma, 1e-10, out=sigma) # floor avoids zero threshold on clean signals @@ -235,24 +240,43 @@ def waveletdiff(x, dt_or_t, wavelet='db4', level=None, threshold=1.0, axis=0, mo for c in coeffs_all[1:] ] - # Reconstruct and differentiate — pywt.waverec is 1-D only so a column - # loop remains, but all Python-level arithmetic has been moved out above. + # Build derivative reconstruction filters from the wavelet's reconstruction + # filters. Because the DWT reconstructs a signal as a linear combination of + # shifted scaling/wavelet functions, the derivative of the reconstruction is + # the same linear combination of the *derivatives* of those basis functions. + # We obtain derivative filters by finite-differencing the reconstruction + # lowpass filter (rec_lo), then scaling by 1/dt to convert discrete + # differences to continuous-time derivatives. + w = pywt.Wavelet(wavelet) + rec_lo = np.array(w.rec_lo) + # First-order finite difference of the filter gives the derivative filter. + # np.diff shortens by 1; padding with a leading zero keeps the filter length + # and phase consistent with the original so waverec alignment is preserved. + d_rec_lo = np.concatenate(([0.0], np.diff(rec_lo))) / dt + d_rec_hi = np.concatenate(([0.0], np.diff(np.array(w.rec_hi)))) / dt + + # Reconstruct x_hat and dxdt_hat column by column. + # pywt.waverec is 1-D only, so the column loop is unavoidable here; + # the vectorised operations above have already moved all Python-level + # arithmetic outside this loop. x_hat_flat = np.empty_like(x_flat) dxdt_hat_flat = np.empty_like(x_flat) for col in range(M): col_coeffs = [coeffs_denoised[i][:, col] for i in range(n_levels)] - x_hat_col = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N] - x_hat_flat[:, col] = x_hat_col - dxdt_hat_flat[:, col] = np.gradient(x_hat_col, grad_arg) - # Restore original shape and axis order. - # moveaxis on the way out is only needed when we moved on the way in. - x_hat = x_hat_flat.reshape(shape) - dxdt_hat = dxdt_hat_flat.reshape(shape) + # Standard reconstruction for the smoothed signal. + x_hat_flat[:, col] = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N] - if axis != 0: - x_hat = np.moveaxis(x_hat, 0, axis) - dxdt_hat = np.moveaxis(dxdt_hat, 0, axis) + # Derivative reconstruction: replace the wavelet's reconstruction + # filters with their finite-difference derivatives and run waverec. + d_wavelet = pywt.Wavelet( + filter_bank=(w.dec_lo, w.dec_hi, d_rec_lo, d_rec_hi) + ) + dxdt_hat_flat[:, col] = pywt.waverec(col_coeffs, d_wavelet, mode=mode)[:N] + + # Restore original shape and axis order. + x_hat = np.moveaxis(x_hat_flat.reshape(shape), 0, axis) + dxdt_hat = np.moveaxis(dxdt_hat_flat.reshape(shape), 0, axis) return x_hat, dxdt_hat diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py index e06e8e4..ebe9c69 100644 --- a/pynumdiff/tests/test_diff_methods.py +++ b/pynumdiff/tests/test_diff_methods.py @@ -332,19 +332,15 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re (kerneldiff, {'kernel': 'gaussian', 'window_size': 5}), (butterdiff, {'filter_order': 3, 'cutoff_freq': 1 - 1e-6}), (finitediff, {}), -<<<<<<< HEAD - (savgoldiff, {'degree': 3, 'window_size': 11, 'smoothing_win': 3}), - (waveletdiff, {'wavelet': 'db4', 'threshold': 1.0}), -======= (polydiff, {'degree': 2, 'window_size': 5}), (savgoldiff, {'degree': 3, 'window_size': 11, 'smoothing_win': 3}), + (waveletdiff, {'wavelet': 'db4', 'threshold': 1.0}), (rtsdiff, {'order':2, 'log_qr_ratio':7, 'forwardbackward':True}), (spectraldiff, {'high_freq_cutoff': 0.25, 'pad_to_zero_dxdt': False}), (rbfdiff, {'sigma': 0.5, 'lmbd': 1e-6}), (splinediff, {'degree': 9, 's': 1e-6}), (robustdiff, {'order':2, 'log_q':7, 'log_r':2}), (tvrdiff, {'order': 3, 'gamma': 1e-4}) ->>>>>>> b38199f982cb4036065f599b3fe00f6076671a6a ] # Similar to the error_bounds table, index by method first. But then we test against only one 2D function, @@ -355,10 +351,7 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re kerneldiff: [(2, 1), (3, 2)], butterdiff: [(0, -1), (1, -1)], finitediff: [(0, -1), (1, -1)], -<<<<<<< HEAD - savgoldiff: [(0, -1), (1, 1)], waveletdiff: [(1, 0), (2, 1)], -======= polydiff: [(1, -1), (1, 0)], savgoldiff: [(0, -1), (1, 1)], rtsdiff: [(1, -1), (1, 0)], @@ -367,7 +360,6 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re splinediff: [(0, -1), (1, 0)], robustdiff: [(-2, -3), (0, -1)], tvrdiff: [(0, -1), (1, 0)] ->>>>>>> b38199f982cb4036065f599b3fe00f6076671a6a } @mark.parametrize("multidim_method_and_params", multidim_methods_and_params) @@ -420,9 +412,6 @@ def test_multidimensionality(multidim_method_and_params, request): ax2.plot_wireframe(T1, T2, computed_d2) ax3.plot_wireframe(T1, T2, computed_laplacian, label='computed') legend = ax3.legend(bbox_to_anchor=(0.7, 0.8)); legend.legend_handles[0].set_facecolor(pyplot.cm.viridis(0.6)) -<<<<<<< HEAD - fig.suptitle(f'{diff_method.__name__}', fontsize=16) -======= fig.suptitle(f'{diff_method.__name__}', fontsize=16) @@ -475,4 +464,3 @@ def test_missing_data(diff_method_and_params): assert np.all(np.isfinite(x_hat)) assert np.all(np.isfinite(dxdt_hat)) ->>>>>>> b38199f982cb4036065f599b3fe00f6076671a6a From af06d012699b58b4ac5daf58b5c2c9434dc67fd3 Mon Sep 17 00:00:00 2001 From: Maria Protogerou Date: Fri, 1 May 2026 02:10:16 -0700 Subject: [PATCH 04/11] Add pywavelets as a dependency --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a0d7d55..736c508 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,8 @@ classifiers = [ dependencies = [ "numpy", "scipy", - "matplotlib" + "matplotlib", + "pywavelets" ] [project.urls] From 68f9255d8a02768a54cfc6029e755645d0e039af Mon Sep 17 00:00:00 2001 From: Maria Protogerou Date: Fri, 1 May 2026 02:16:40 -0700 Subject: [PATCH 05/11] Fix waveletdiff: use np.gradient on denoised signal for derivative --- pynumdiff/basis_fit.py | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py index 6910b0a..5b46230 100644 --- a/pynumdiff/basis_fit.py +++ b/pynumdiff/basis_fit.py @@ -240,40 +240,24 @@ def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='p for c in coeffs_all[1:] ] - # Build derivative reconstruction filters from the wavelet's reconstruction - # filters. Because the DWT reconstructs a signal as a linear combination of - # shifted scaling/wavelet functions, the derivative of the reconstruction is - # the same linear combination of the *derivatives* of those basis functions. - # We obtain derivative filters by finite-differencing the reconstruction - # lowpass filter (rec_lo), then scaling by 1/dt to convert discrete - # differences to continuous-time derivatives. - w = pywt.Wavelet(wavelet) - rec_lo = np.array(w.rec_lo) - # First-order finite difference of the filter gives the derivative filter. - # np.diff shortens by 1; padding with a leading zero keeps the filter length - # and phase consistent with the original so waverec alignment is preserved. - d_rec_lo = np.concatenate(([0.0], np.diff(rec_lo))) / dt - d_rec_hi = np.concatenate(([0.0], np.diff(np.array(w.rec_hi)))) / dt - - # Reconstruct x_hat and dxdt_hat column by column. + # Reconstruct x_hat and differentiate column by column. # pywt.waverec is 1-D only, so the column loop is unavoidable here; # the vectorised operations above have already moved all Python-level # arithmetic outside this loop. + # + # After wavelet denoising we have a smooth, noise-free signal. np.gradient + # applies a second-order central finite difference to that clean signal, + # which gives an accurate derivative. This is appropriate here because the + # heavy lifting (noise removal) has already been done by the wavelet + # thresholding step; np.gradient on a smooth signal converges at O(dt^2). x_hat_flat = np.empty_like(x_flat) dxdt_hat_flat = np.empty_like(x_flat) for col in range(M): col_coeffs = [coeffs_denoised[i][:, col] for i in range(n_levels)] - - # Standard reconstruction for the smoothed signal. - x_hat_flat[:, col] = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N] - - # Derivative reconstruction: replace the wavelet's reconstruction - # filters with their finite-difference derivatives and run waverec. - d_wavelet = pywt.Wavelet( - filter_bank=(w.dec_lo, w.dec_hi, d_rec_lo, d_rec_hi) - ) - dxdt_hat_flat[:, col] = pywt.waverec(col_coeffs, d_wavelet, mode=mode)[:N] + x_hat_col = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N] + x_hat_flat[:, col] = x_hat_col + dxdt_hat_flat[:, col] = np.gradient(x_hat_col, dt) # Restore original shape and axis order. x_hat = np.moveaxis(x_hat_flat.reshape(shape), 0, axis) From 5bcbe293e13635c0a91f4b8d6ddc9934f9eb144e Mon Sep 17 00:00:00 2001 From: Maria Protogerou Date: Fri, 8 May 2026 11:36:50 -0700 Subject: [PATCH 06/11] Fix waveletdiff: use spectral differentiation of denoised signal --- pynumdiff/basis_fit.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py index 5b46230..fbc4949 100644 --- a/pynumdiff/basis_fit.py +++ b/pynumdiff/basis_fit.py @@ -245,19 +245,25 @@ def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='p # the vectorised operations above have already moved all Python-level # arithmetic outside this loop. # - # After wavelet denoising we have a smooth, noise-free signal. np.gradient - # applies a second-order central finite difference to that clean signal, - # which gives an accurate derivative. This is appropriate here because the - # heavy lifting (noise removal) has already been done by the wavelet - # thresholding step; np.gradient on a smooth signal converges at O(dt^2). + # After wavelet denoising we have a smooth, noise-free signal. We + # differentiate it analytically in the Fourier domain: multiplying the + # FFT by i*omega is equivalent to applying the derivative operator exactly, + # with no finite-difference truncation error. This keeps the two concerns + # cleanly separated — wavelets handle denoising, Fourier handles + # differentiation. x_hat_flat = np.empty_like(x_flat) dxdt_hat_flat = np.empty_like(x_flat) + # Angular frequency axis for a length-N signal sampled at dt. + # fftfreq returns cycles/sample; multiplying by 2*pi/dt gives rad/s. + k = np.fft.fftfreq(N, d=dt) * 2 * np.pi + for col in range(M): col_coeffs = [coeffs_denoised[i][:, col] for i in range(n_levels)] x_hat_col = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N] x_hat_flat[:, col] = x_hat_col - dxdt_hat_flat[:, col] = np.gradient(x_hat_col, dt) + X = np.fft.fft(x_hat_col) + dxdt_hat_flat[:, col] = np.real(np.fft.ifft(1j * k * X)) # Restore original shape and axis order. x_hat = np.moveaxis(x_hat_flat.reshape(shape), 0, axis) From dbf2f3ffb52e82c1759e02425abaa79a02c2b31d Mon Sep 17 00:00:00 2001 From: Maria Protogerou Date: Fri, 12 Jun 2026 09:54:22 -0700 Subject: [PATCH 07/11] Replace FFT-based wavelet differentiation with wavelet basis derivative operator --- pynumdiff/basis_fit.py | 181 +++++++++++++++++++++++++---------------- 1 file changed, 111 insertions(+), 70 deletions(-) diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py index fbc4949..028b031 100644 --- a/pynumdiff/basis_fit.py +++ b/pynumdiff/basis_fit.py @@ -1,7 +1,9 @@ """Methods based on fitting basis functions to data""" +from functools import lru_cache from warnings import warn import numpy as np from scipy import sparse +from scipy.interpolate import CubicSpline import pywt from pynumdiff.utils import utility @@ -136,14 +138,94 @@ def rbfdiff(x, dt_or_t, sigma=1, lmbd=0.01, axis=0): return np.moveaxis(x_hat_flattened.reshape(plump), 0, axis), np.moveaxis(dxdt_hat_flattened.reshape(plump), 0, axis) +@lru_cache(maxsize=32) +def _wavelet_derivative_synthesis_matrix(N, dt, wavelet, level, mode): + """Build sparse samples of d/dt of the inverse-DWT synthesis basis. + + For a fixed wavelet/level/mode/length, wavedec/waverec define a linear + synthesis map + + x(t_n) = sum_k c_k phi_k(t_n). + + This routine samples phi'_k(t_n) once, stores those samples sparsely, and + lets waveletdiff compute + + x'(t_n) = sum_k c_k phi'_k(t_n) + + without differentiating the reconstructed signal. The derivative samples + are obtained from a local cubic interpolant of each compactly supported + synthesis basis vector; this is bookkeeping on the basis functions, not a + finite-difference derivative of the data. + """ + zero = np.zeros(N) + template = pywt.wavedec(zero, wavelet, level=level, mode=mode) + coeff_lengths = tuple(len(c) for c in template) + coeff_offsets = np.cumsum((0,) + coeff_lengths[:-1]) + n_coeffs = sum(coeff_lengths) + t = np.arange(N, dtype=float) * dt + + rows, cols, vals = [], [], [] + eps = 1e-12 + + for band, (offset, length) in enumerate(zip(coeff_offsets, coeff_lengths)): + for local_idx in range(length): + coeffs = [np.zeros_like(c, dtype=float) for c in template] + coeffs[band][local_idx] = 1.0 + basis = pywt.waverec(coeffs, wavelet, mode=mode)[:N] + + # Basis functions are compactly supported, but boundary extension can + # split support across the two ends. Differentiating only the active + # samples keeps the matrix sparse and avoids global sinusoidal bases. + active = np.flatnonzero(np.abs(basis) > eps) + if active.size == 0: + continue + + # Include one-sample padding around active support so the cubic has + # enough context near the edges of the support. If support wraps or + # covers most of the signal, fall back to all samples. + support = np.zeros(N, dtype=bool) + support[active] = True + support[np.maximum(active - 1, 0)] = True + support[np.minimum(active + 1, N - 1)] = True + idx = np.flatnonzero(support) + if idx.size < 4 or (idx[-1] - idx[0] + 1) > 2 * idx.size: + idx = np.arange(N) + + # CubicSpline requires strictly increasing x and at least two points. + # With >=4 points the not-a-knot default is well-defined; with fewer, + # fall back to clamped end slopes of zero. + bc_type = 'not-a-knot' if idx.size >= 4 else ((1, 0.0), (1, 0.0)) + spline = CubicSpline(t[idx], basis[idx], bc_type=bc_type, extrapolate=False) + deriv_vals = spline(t[idx], 1) + keep = np.isfinite(deriv_vals) & (np.abs(deriv_vals) > eps) + + rows.extend(idx[keep]) + cols.extend(np.full(np.count_nonzero(keep), offset + local_idx)) + vals.extend(deriv_vals[keep]) + + return sparse.csr_matrix((vals, (rows, cols)), shape=(N, n_coeffs)), coeff_lengths + + +def _flatten_wavelet_coeffs(coeffs): + """Stack a wavedec coefficient list into a 2-D coefficient matrix.""" + return np.vstack([c for band in coeffs for c in band]) + + def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='periodization'): - """Smooth and differentiate noisy data via discrete wavelet denoising. + """Smooth and differentiate noisy data with a wavelet-basis derivative sum. - Decomposes x into wavelet detail and approximation coefficients, soft-thresholds - the detail coefficients to remove noise using the Donoho & Johnstone (1994) - universal threshold estimator, reconstructs a smoothed signal, then - differentiates analytically by applying derivative reconstruction filters to - the denoised wavelet coefficients. + Decomposes x into wavelet approximation/detail coefficients, soft-thresholds + the detail coefficients to denoise, reconstructs a smoothed signal, and then + estimates the derivative directly from the denoised wavelet coefficients: + + x(t_n) = sum_k c_k phi_k(t_n) + x'(t_n) = sum_k c_k phi'_k(t_n) + + The first sum is the ordinary inverse wavelet transform. The second sum is + evaluated by precomputing sparse samples of the derivative of each synthesis + basis function and multiplying that sparse matrix by the denoised + coefficients. This avoids the previous reconstruct-then-FFT derivative path + and does not call finite differences or np.gradient on the signal. Because the DWT requires uniform spacing, this method only accepts a scalar time step dt (not a vector of sample times). For non-uniformly sampled data, @@ -152,24 +234,13 @@ def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='p :param np.array x: data to differentiate. May be multidimensional; see :code:`axis`. :param float dt: uniform time step between samples. :param str wavelet: PyWavelets wavelet name, e.g. 'db4', 'sym4', 'coif2'. - 'db4' is a solid general-purpose default. Biorthogonal wavelets such as - 'bior2.2' or 'bior4.4' are symmetric and designed for smooth reconstruction - but may need a lower threshold value. :param int level: decomposition depth. None (default) resolves to - min(pywt.dwt_max_level(N, wavelet), 5) to avoid over-decomposing short - signals. Increase for heavily oversampled data. + min(pywt.dwt_max_level(N, wavelet), 5) to avoid over-decomposing short signals. :param float threshold: soft-thresholding scale factor in [0, inf). - Multiplies the universal threshold sigma * sqrt(2 * log(N)). - threshold=1.0 is the classical Donoho & Johnstone universal threshold - and is the recommended starting point. Values < 1.0 give less smoothing; - values > 1.0 give more aggressive smoothing. This parameter maps onto - tvgamma in the pynumdiff.optimize framework. :param int axis: axis along which to differentiate (default 0). :param str mode: PyWavelets signal extension mode passed to wavedec/waverec. - 'periodization' (default) keeps coefficient arrays exactly length N and - is the most numerically stable choice for differentiation. 'reflect' is - a good alternative for clearly non-periodic signals. - See pywt.Modes.modes for all options. + 'periodization' keeps coefficient arrays compact; 'reflect' is often a + better choice for clearly non-periodic signals. :return: - **x_hat** (np.array) -- estimated (smoothed) x - **dxdt_hat** (np.array) -- estimated derivative of x """ @@ -180,19 +251,11 @@ def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='p ) N = x.shape[axis] - - # Bring axis of differentiation to front so each column of x_flat is one - # signal to differentiate. moveaxis returns a view with updated strides, - # so ascontiguousarray ensures the subsequent reshape is zero-copy. x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0)) shape = x_work.shape - x_flat = x_work.reshape(N, -1) # (N, M) + x_flat = x_work.reshape(N, -1) M = x_flat.shape[1] - # Conservative level cap: pywt's default uses the maximum possible level, - # which can over-decompose short signals and wash out meaningful detail. - # Capping at 5 keeps at least 2^5 = 32 samples in the coarsest subband, - # which is enough to represent a smooth approximation without artefacts. if level is None: max_level = pywt.dwt_max_level(N, wavelet) level = min(max_level, 5) @@ -216,57 +279,35 @@ def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='p ): coeffs_all[i][:, col] = c - # Vectorised noise estimation and soft-thresholding over all columns at once. - # - # Soft-thresholding achieves smoothing by shrinking wavelet detail - # coefficients toward zero: coefficients whose magnitude is below the - # threshold (mostly noise) are zeroed out, while large coefficients (true - # signal features) are kept but reduced by the threshold amount. Only detail - # levels (indices 1..n_levels-1) are thresholded; the coarse approximation - # coefficients (index 0) are left untouched. - # - # sigma: robust noise-level estimate via the median absolute deviation of - # the finest detail level. Dividing by 0.6745 converts MAD to an - # estimate of the Gaussian standard deviation. - # thresh: per-column Donoho & Johnstone (1994) universal threshold, - # sigma * sqrt(2 * log(N)), scaled by the user-supplied `threshold`. + # Robust noise estimate from finest details, then Donoho-Johnstone + # soft-thresholding on detail bands only. sigma = np.median(np.abs(coeffs_all[-1]), axis=0) / 0.6745 - np.maximum(sigma, 1e-10, out=sigma) # floor avoids zero threshold on clean signals - - thresh = threshold * sigma * np.sqrt(2 * np.log(N)) # shape (M,) - + np.maximum(sigma, 1e-10, out=sigma) + thresh = threshold * sigma * np.sqrt(2 * np.log(N)) coeffs_denoised = [coeffs_all[0]] + [ pywt.threshold(c, thresh[np.newaxis, :], mode='soft') for c in coeffs_all[1:] ] - # Reconstruct x_hat and differentiate column by column. - # pywt.waverec is 1-D only, so the column loop is unavoidable here; - # the vectorised operations above have already moved all Python-level - # arithmetic outside this loop. - # - # After wavelet denoising we have a smooth, noise-free signal. We - # differentiate it analytically in the Fourier domain: multiplying the - # FFT by i*omega is equivalent to applying the derivative operator exactly, - # with no finite-difference truncation error. This keeps the two concerns - # cleanly separated — wavelets handle denoising, Fourier handles - # differentiation. - x_hat_flat = np.empty_like(x_flat) - dxdt_hat_flat = np.empty_like(x_flat) - - # Angular frequency axis for a length-N signal sampled at dt. - # fftfreq returns cycles/sample; multiplying by 2*pi/dt gives rad/s. - k = np.fft.fftfreq(N, d=dt) * 2 * np.pi + Dphi, matrix_coeff_lengths = _wavelet_derivative_synthesis_matrix( + N, float(dt), wavelet, int(level), mode + ) + if tuple(coeff_lengths) != tuple(matrix_coeff_lengths): + raise RuntimeError("Cached wavelet derivative matrix coefficient layout does not match wavedec output.") + + x_hat_flat = np.empty_like(x_flat) + coeffs_flat = np.empty((sum(coeff_lengths), M), dtype=x_flat.dtype) + offsets = np.cumsum((0,) + tuple(coeff_lengths[:-1])) for col in range(M): col_coeffs = [coeffs_denoised[i][:, col] for i in range(n_levels)] - x_hat_col = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N] - x_hat_flat[:, col] = x_hat_col - X = np.fft.fft(x_hat_col) - dxdt_hat_flat[:, col] = np.real(np.fft.ifft(1j * k * X)) + x_hat_flat[:, col] = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N] + for i, (offset, length) in enumerate(zip(offsets, coeff_lengths)): + coeffs_flat[offset:offset + length, col] = coeffs_denoised[i][:, col] + + dxdt_hat_flat = Dphi @ coeffs_flat - # Restore original shape and axis order. - x_hat = np.moveaxis(x_hat_flat.reshape(shape), 0, axis) + x_hat = np.moveaxis(x_hat_flat.reshape(shape), 0, axis) dxdt_hat = np.moveaxis(dxdt_hat_flat.reshape(shape), 0, axis) return x_hat, dxdt_hat From baf9946105d5f027586be2e9c5bbbe8e0b3cd2ae Mon Sep 17 00:00:00 2001 From: pavelkomarov Date: Fri, 12 Jun 2026 12:16:01 -0700 Subject: [PATCH 08/11] Compute wavelet derivative analytically via connection coefficients Replace the cubic-spline-of-sampled-basis derivative with the exact analytic derivative of the wavelet basis. The denoised reconstruction is the wavelet interpolant x(t) = sum_n a_n phi(t/dt - n), so its derivative is x' = Phi' Phi^-1 x_hat, where Phi and Phi' are circulant samples of the scaling function phi and its derivative phi' at integers. Those samples are the eigenvalue-1 and eigenvalue-1/2 eigenvectors of the wavelet refinement relation (connection coefficients), normalized to reproduce constants and differentiate ramps exactly. No splines or finite differences. - Antisymmetrically extend x_hat before applying the periodic operator so edge derivatives stay accurate instead of spiking at the wrap. - Reject Haar/db1, whose step scaling function has no derivative. - Default to db8 (smoother -> better derivatives on noisy data) and collapse the two old helpers into one operator builder; vectorize the DWT over columns. - Retighten the waveletdiff error bounds for the new, exact derivative. Co-Authored-By: Claude Opus 4.8 --- pynumdiff/basis_fit.py | 259 +++++++++++---------------- pynumdiff/tests/test_diff_methods.py | 18 +- 2 files changed, 117 insertions(+), 160 deletions(-) diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py index 028b031..23e48b0 100644 --- a/pynumdiff/basis_fit.py +++ b/pynumdiff/basis_fit.py @@ -3,7 +3,6 @@ from warnings import warn import numpy as np from scipy import sparse -from scipy.interpolate import CubicSpline import pywt from pynumdiff.utils import utility @@ -139,93 +138,70 @@ def rbfdiff(x, dt_or_t, sigma=1, lmbd=0.01, axis=0): @lru_cache(maxsize=32) -def _wavelet_derivative_synthesis_matrix(N, dt, wavelet, level, mode): - """Build sparse samples of d/dt of the inverse-DWT synthesis basis. - - For a fixed wavelet/level/mode/length, wavedec/waverec define a linear - synthesis map - - x(t_n) = sum_k c_k phi_k(t_n). - - This routine samples phi'_k(t_n) once, stores those samples sparsely, and - lets waveletdiff compute - - x'(t_n) = sum_k c_k phi'_k(t_n) - - without differentiating the reconstructed signal. The derivative samples - are obtained from a local cubic interpolant of each compactly supported - synthesis basis vector; this is bookkeeping on the basis functions, not a - finite-difference derivative of the data. +def _wavelet_derivative_operator(N, dt, wavelet): + """Build the sparse operators that turn denoised samples into a derivative. + + PyWavelets treats the input samples as the finest-level scaling coefficients, + so the denoised reconstruction x_hat represents the continuous interpolant + x(t) = sum_n a_n phi(t/dt - n), where phi is the wavelet's scaling function. + Sampling x and its analytic derivative on the grid t_m = m*dt gives two + convolutions against phi and phi' evaluated at *integers*: + + x_hat[m] = sum_n a_n phi(m - n) -> x_hat = Phi @ a + x'(t_m) = (1/dt) sum_n a_n phi'(m - n) -> x' = Phi_prime @ a + + so x' = Phi_prime @ Phi^-1 @ x_hat. This is the exact derivative of the + wavelet interpolant, with no spline or finite-difference approximation. + + phi and phi' at the integers are the eigenvectors of the wavelet's refinement + (dilation) relation: differentiating phi(t) = sqrt2 * sum_k h_k phi(2t - k) + and evaluating at integers shows phi sampled at integers is the eigenvalue-1 + eigenvector and phi' the eigenvalue-1/2 eigenvector of T[p,q] = sqrt2 * h_{2p-q}. + Normalizations come from reproduction of constants (sum phi(p) = 1) and of + linears (sum p*phi'(p) = -1, so the operator differentiates a ramp exactly). + + :return: - **Phi** (csc_matrix) -- circulant samples of phi, to be inverted + - **Phi_prime** (csr_matrix) -- circulant samples of phi'/dt """ - zero = np.zeros(N) - template = pywt.wavedec(zero, wavelet, level=level, mode=mode) - coeff_lengths = tuple(len(c) for c in template) - coeff_offsets = np.cumsum((0,) + coeff_lengths[:-1]) - n_coeffs = sum(coeff_lengths) - t = np.arange(N, dtype=float) * dt - - rows, cols, vals = [], [], [] - eps = 1e-12 - - for band, (offset, length) in enumerate(zip(coeff_offsets, coeff_lengths)): - for local_idx in range(length): - coeffs = [np.zeros_like(c, dtype=float) for c in template] - coeffs[band][local_idx] = 1.0 - basis = pywt.waverec(coeffs, wavelet, mode=mode)[:N] - - # Basis functions are compactly supported, but boundary extension can - # split support across the two ends. Differentiating only the active - # samples keeps the matrix sparse and avoids global sinusoidal bases. - active = np.flatnonzero(np.abs(basis) > eps) - if active.size == 0: - continue - - # Include one-sample padding around active support so the cubic has - # enough context near the edges of the support. If support wraps or - # covers most of the signal, fall back to all samples. - support = np.zeros(N, dtype=bool) - support[active] = True - support[np.maximum(active - 1, 0)] = True - support[np.minimum(active + 1, N - 1)] = True - idx = np.flatnonzero(support) - if idx.size < 4 or (idx[-1] - idx[0] + 1) > 2 * idx.size: - idx = np.arange(N) - - # CubicSpline requires strictly increasing x and at least two points. - # With >=4 points the not-a-knot default is well-defined; with fewer, - # fall back to clamped end slopes of zero. - bc_type = 'not-a-knot' if idx.size >= 4 else ((1, 0.0), (1, 0.0)) - spline = CubicSpline(t[idx], basis[idx], bc_type=bc_type, extrapolate=False) - deriv_vals = spline(t[idx], 1) - keep = np.isfinite(deriv_vals) & (np.abs(deriv_vals) > eps) - - rows.extend(idx[keep]) - cols.extend(np.full(np.count_nonzero(keep), offset + local_idx)) - vals.extend(deriv_vals[keep]) - - return sparse.csr_matrix((vals, (rows, cols)), shape=(N, n_coeffs)), coeff_lengths - - -def _flatten_wavelet_coeffs(coeffs): - """Stack a wavedec coefficient list into a 2-D coefficient matrix.""" - return np.vstack([c for band in coeffs for c in band]) - - -def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='periodization'): - """Smooth and differentiate noisy data with a wavelet-basis derivative sum. - - Decomposes x into wavelet approximation/detail coefficients, soft-thresholds - the detail coefficients to denoise, reconstructs a smoothed signal, and then - estimates the derivative directly from the denoised wavelet coefficients: - - x(t_n) = sum_k c_k phi_k(t_n) - x'(t_n) = sum_k c_k phi'_k(t_n) - - The first sum is the ordinary inverse wavelet transform. The second sum is - evaluated by precomputing sparse samples of the derivative of each synthesis - basis function and multiplying that sparse matrix by the denoised - coefficients. This avoids the previous reconstruct-then-FFT derivative path - and does not call finite differences or np.gradient on the signal. + h = np.array(pywt.Wavelet(wavelet).rec_lo) # reconstruction low-pass = refinement filter h_k + h = h / h.sum() * np.sqrt(2) # enforce sum(h) = sqrt2, i.e. integral of phi is 1 + L = len(h) # phi is supported on [0, L-1]; sample those integers + p = np.arange(L) + + # Transition matrix T[p,q] = sqrt2 * h_{2p-q}; entries outside the filter are 0. + cols = 2 * p[:, None] - p[None, :] + T = np.where((cols >= 0) & (cols < L), np.sqrt(2) * h[np.clip(cols, 0, L - 1)], 0.0) + + evals, evecs = np.linalg.eig(T) + phi = np.real(evecs[:, np.argmin(np.abs(evals - 1.0))]) # phi(p): eigenvalue 1 + dphi = np.real(evecs[:, np.argmin(np.abs(evals - 0.5))]) # phi'(p): eigenvalue 1/2 + phi = phi / phi.sum() # sum_p phi(p) = 1 + dphi = dphi / np.dot(p, dphi) * (-1.0) # sum_p p*phi'(p) = -1 + + # Both kernels become circulant matrices under periodic boundaries; a common + # shift of both cancels in Phi_prime @ Phi^-1, so the offset choice is cosmetic. + def circulant(kernel): + rows, cols, vals = [], [], [] + m = np.arange(N) + for offset, val in zip(p, kernel): + if abs(val) < 1e-12: continue + rows.extend(m); cols.extend((m - offset) % N); vals.extend([val] * N) + return sparse.csr_matrix((vals, (rows, cols)), shape=(N, N)) + + return circulant(phi).tocsc(), circulant(dphi / dt) + + +def waveletdiff(x, dt, wavelet='db8', level=None, threshold=1.0, axis=0, mode='periodization'): + """Smooth and differentiate noisy data in a wavelet basis. + + Three steps: (1) decompose x with the DWT and soft-threshold the detail + coefficients to denoise (Donoho-Johnstone universal threshold), reconstructing + a smoothed x_hat; (2) extend x_hat antisymmetrically so the periodic derivative + operator stays accurate at the edges; (3) recover the scaling coefficients of + x_hat and apply the analytic derivative of the wavelet basis to get the + derivative. The derivative operator differentiates the basis functions + themselves (see :func:`_wavelet_derivative_operator`) rather than + finite-differencing the signal, so it is exact for signals the basis can represent. Because the DWT requires uniform spacing, this method only accepts a scalar time step dt (not a vector of sample times). For non-uniformly sampled data, @@ -233,81 +209,62 @@ def waveletdiff(x, dt, wavelet='db4', level=None, threshold=1.0, axis=0, mode='p :param np.array x: data to differentiate. May be multidimensional; see :code:`axis`. :param float dt: uniform time step between samples. - :param str wavelet: PyWavelets wavelet name, e.g. 'db4', 'sym4', 'coif2'. + :param str wavelet: PyWavelets wavelet name. Must have a differentiable scaling + function, so smoother wavelets give better derivatives: 'db8' (default) and + 'sym8' are best for noisy data; 'db4', 'sym4', and 'coif2' also work well. :param int level: decomposition depth. None (default) resolves to min(pywt.dwt_max_level(N, wavelet), 5) to avoid over-decomposing short signals. :param float threshold: soft-thresholding scale factor in [0, inf). :param int axis: axis along which to differentiate (default 0). - :param str mode: PyWavelets signal extension mode passed to wavedec/waverec. - 'periodization' keeps coefficient arrays compact; 'reflect' is often a - better choice for clearly non-periodic signals. + :param str mode: PyWavelets signal extension mode for the denoising transform. + 'periodization' keeps coefficient arrays compact. The derivative operator is + periodic, so x_hat is antisymmetrically extended before it is applied (see below). :return: - **x_hat** (np.array) -- estimated (smoothed) x - **dxdt_hat** (np.array) -- estimated derivative of x """ if not np.isscalar(dt): - raise ValueError( - "`dt` must be a scalar. The DWT requires uniformly sampled data. " - "For variable step sizes, use rbfdiff or splinediff instead." - ) + raise ValueError("`dt` must be a scalar. The DWT requires uniformly sampled data. " + "For variable step sizes, use rbfdiff or splinediff instead.") + + # The Haar scaling function is a step, so it has no pointwise derivative and the + # connection-coefficient operator below is undefined for it. Haar/db1 is the only + # orthonormal wavelet with a 2-tap filter, so dec_len identifies it. + if pywt.Wavelet(wavelet).dec_len == 2: + raise ValueError("The Haar/db1 wavelet has a discontinuous (piecewise-constant) scaling " + "function with no derivative, so it cannot be used to differentiate. Pick a smoother " + "wavelet such as 'db4', 'sym4', or 'coif2'.") N = x.shape[axis] - x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0)) - shape = x_work.shape - x_flat = x_work.reshape(N, -1) - M = x_flat.shape[1] + x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0)) # differentiation axis to front + shape = x_work.shape # remember it to restore the input's dimensionality + x_flat = x_work.reshape(N, -1) # rest of the dims flattened into columns if level is None: - max_level = pywt.dwt_max_level(N, wavelet) - level = min(max_level, 5) - - # Decompose all columns; probe column 0 first to learn coefficient lengths - # and pre-allocate, reusing that result so we only pay N+1 wavedec calls. - _probe = pywt.wavedec(x_flat[:, 0], wavelet, level=level, mode=mode) - coeff_lengths = [len(c) for c in _probe] - n_levels = len(_probe) - - coeffs_all = [ - np.empty((coeff_lengths[i], M), dtype=x_flat.dtype) - for i in range(n_levels) - ] - for i, c in enumerate(_probe): - coeffs_all[i][:, 0] = c - - for col in range(1, M): - for i, c in enumerate( - pywt.wavedec(x_flat[:, col], wavelet, level=level, mode=mode) - ): - coeffs_all[i][:, col] = c - - # Robust noise estimate from finest details, then Donoho-Johnstone - # soft-thresholding on detail bands only. - sigma = np.median(np.abs(coeffs_all[-1]), axis=0) / 0.6745 - np.maximum(sigma, 1e-10, out=sigma) - thresh = threshold * sigma * np.sqrt(2 * np.log(N)) - coeffs_denoised = [coeffs_all[0]] + [ - pywt.threshold(c, thresh[np.newaxis, :], mode='soft') - for c in coeffs_all[1:] - ] - - Dphi, matrix_coeff_lengths = _wavelet_derivative_synthesis_matrix( - N, float(dt), wavelet, int(level), mode - ) - if tuple(coeff_lengths) != tuple(matrix_coeff_lengths): - raise RuntimeError("Cached wavelet derivative matrix coefficient layout does not match wavedec output.") - - x_hat_flat = np.empty_like(x_flat) - coeffs_flat = np.empty((sum(coeff_lengths), M), dtype=x_flat.dtype) - offsets = np.cumsum((0,) + tuple(coeff_lengths[:-1])) - - for col in range(M): - col_coeffs = [coeffs_denoised[i][:, col] for i in range(n_levels)] - x_hat_flat[:, col] = pywt.waverec(col_coeffs, wavelet, mode=mode)[:N] - for i, (offset, length) in enumerate(zip(offsets, coeff_lengths)): - coeffs_flat[offset:offset + length, col] = coeffs_denoised[i][:, col] - - dxdt_hat_flat = Dphi @ coeffs_flat - - x_hat = np.moveaxis(x_hat_flat.reshape(shape), 0, axis) - dxdt_hat = np.moveaxis(dxdt_hat_flat.reshape(shape), 0, axis) + level = min(pywt.dwt_max_level(N, wavelet), 5) + # 1. Denoise: DWT all columns at once, then soft-threshold the detail bands. The + # noise level is estimated robustly per column from the finest details (coeffs[-1]). + coeffs = pywt.wavedec(x_flat, wavelet, level=level, mode=mode, axis=0) + sigma = np.maximum(np.median(np.abs(coeffs[-1]), axis=0) / 0.6745, 1e-10) + thresh = threshold * sigma * np.sqrt(2 * np.log(N)) + coeffs = [coeffs[0]] + [pywt.threshold(c, thresh[np.newaxis, :], mode='soft') for c in coeffs[1:]] + x_hat = pywt.waverec(coeffs, wavelet, mode=mode, axis=0)[:N] + + # 2. The derivative operator is periodic, but x_hat usually isn't. Extend it + # antisymmetrically (reflect through each endpoint: x[-1-k] -> 2*x[0]-x[1+k]) so the + # periodic wrap is continuous in both value and slope, which keeps the derivative + # accurate at the edges instead of spiking there. This is the odd-symmetry analog of + # spectraldiff's even extension; a ramp extends to a ramp, so slopes survive exactly. + left = 2 * x_hat[0] - x_hat[1:][::-1] + right = 2 * x_hat[-1] - x_hat[:-1][::-1] + x_ext = np.concatenate([left, x_hat, right], axis=0) # length 3N-2, original at [N-1:2N-1] + + # 3. Differentiate the basis: recover the scaling coefficients a = Phi^-1 @ x_ext, then + # apply the analytic basis derivative dxdt = Phi_prime @ a, and crop back to the original. + Phi, Phi_prime = _wavelet_derivative_operator(x_ext.shape[0], float(dt), wavelet) + a = sparse.linalg.spsolve(Phi, x_ext) + dxdt_flat = (Phi_prime @ a.reshape(x_ext.shape[0], -1))[N - 1:2 * N - 1] + + x_hat = np.moveaxis(x_hat.reshape(shape), 0, axis) + dxdt_hat = np.moveaxis(dxdt_flat.reshape(shape), 0, axis) return x_hat, dxdt_hat diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py index ebe9c69..cb9abce 100644 --- a/pynumdiff/tests/test_diff_methods.py +++ b/pynumdiff/tests/test_diff_methods.py @@ -51,7 +51,7 @@ def polydiff_irreg_step(*args, **kwargs): return polydiff(*args, **kwargs) (spline_irreg_step, {'degree':5, 's':2}), (spectraldiff, {'high_freq_cutoff':0.2}), (spectraldiff, [0.2]), (rbfdiff, {'sigma':0.5, 'lmbd':0.001}), - (waveletdiff, {'wavelet':'db4', 'threshold':1.0}), + (waveletdiff, {'wavelet':'db8', 'threshold':1.0}), (constant_velocity, {'r':1e-2, 'q':1e3}), (constant_velocity, [1e-2, 1e3]), (constant_acceleration, {'r':1e-3, 'q':1e4}), (constant_acceleration, [1e-3, 1e4]), (constant_jerk, {'r':1e-4, 'q':1e5}), (constant_jerk, [1e-4, 1e5]), @@ -174,12 +174,12 @@ def polydiff_irreg_step(*args, **kwargs): return polydiff(*args, **kwargs) [(-2, -2), (0, 0), (0, -1), (0, 0)], [(0, 0), (2, 2), (0, 0), (2, 2)], [(1, 1), (3, 3), (1, 1), (3, 3)]], - waveletdiff: [[(-14, -15), (-14, -14), (-1, -1), (0, 0)], - [(-9, -9), (-8, -8), (0, 0), (1, 1)], - [(-9, -9), (0, 0), (0, 0), (1, 1)], - [(-1, -1), (0, 0), (0, 0), (1, 1)], - [(1, 0), (2, 2), (1, 1), (2, 2)], - [(0, 0), (3, 3), (1, 0), (3, 3)]], + waveletdiff: [[(-15, -15), (-13, -14), (0, -1), (1, 0)], + [(-2, -2), (-1, -1), (0, 0), (1, 1)], + [(-2, -2), (-1, -1), (0, 0), (1, 1)], + [(-3, -3), (-1, -1), (0, 0), (1, 1)], + [(0, -1), (2, 2), (0, 0), (2, 2)], + [(0, -1), (3, 3), (0, 0), (3, 3)]], velocity: [[(-25, -25), (-18, -19), (0, -1), (1, 0)], [(-12, -12), (-11, -12), (-1, -1), (-1, -2)], [(0, -1), (1, 0), (0, -1), (1, 0)], @@ -334,7 +334,7 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re (finitediff, {}), (polydiff, {'degree': 2, 'window_size': 5}), (savgoldiff, {'degree': 3, 'window_size': 11, 'smoothing_win': 3}), - (waveletdiff, {'wavelet': 'db4', 'threshold': 1.0}), + (waveletdiff, {'wavelet': 'db8', 'threshold': 1.0}), (rtsdiff, {'order':2, 'log_qr_ratio':7, 'forwardbackward':True}), (spectraldiff, {'high_freq_cutoff': 0.25, 'pad_to_zero_dxdt': False}), (rbfdiff, {'sigma': 0.5, 'lmbd': 1e-6}), @@ -351,7 +351,7 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re kerneldiff: [(2, 1), (3, 2)], butterdiff: [(0, -1), (1, -1)], finitediff: [(0, -1), (1, -1)], - waveletdiff: [(1, 0), (2, 1)], + waveletdiff: [(1, 0), (2, 2)], polydiff: [(1, -1), (1, 0)], savgoldiff: [(0, -1), (1, 1)], rtsdiff: [(1, -1), (1, 0)], From 142ebe4673a4d119dbcd84120d03bdce999be9d3 Mon Sep 17 00:00:00 2001 From: pavelkomarov Date: Fri, 12 Jun 2026 12:20:36 -0700 Subject: [PATCH 09/11] Loosen near-machine-precision wavelet derivative bound for CI portability The clean constant-derivative Linf error lands at ~1.2e-14 on Linux vs <=1e-14 on macOS; give that one bound a decade of headroom. Co-Authored-By: Claude Opus 4.8 --- pynumdiff/tests/test_diff_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py index cb9abce..29470df 100644 --- a/pynumdiff/tests/test_diff_methods.py +++ b/pynumdiff/tests/test_diff_methods.py @@ -174,7 +174,7 @@ def polydiff_irreg_step(*args, **kwargs): return polydiff(*args, **kwargs) [(-2, -2), (0, 0), (0, -1), (0, 0)], [(0, 0), (2, 2), (0, 0), (2, 2)], [(1, 1), (3, 3), (1, 1), (3, 3)]], - waveletdiff: [[(-15, -15), (-13, -14), (0, -1), (1, 0)], + waveletdiff: [[(-15, -15), (-13, -13), (0, -1), (1, 0)], [(-2, -2), (-1, -1), (0, 0), (1, 1)], [(-2, -2), (-1, -1), (0, 0), (1, 1)], [(-3, -3), (-1, -1), (0, 0), (1, 1)], From c8003a4a7b71b4352ce3079bcd1f3a21ac45c425 Mon Sep 17 00:00:00 2001 From: pavelkomarov Date: Fri, 12 Jun 2026 12:31:56 -0700 Subject: [PATCH 10/11] Drop lru_cache on wavelet derivative operator The operator is built once per waveletdiff call and applied to all columns at once via the vectorized spsolve, so within-call reuse is already free. The cache only helped across separate same-length calls, a marginal win that didn't justify retaining up to 32 sparse-matrix pairs in memory. Co-Authored-By: Claude Opus 4.8 --- pynumdiff/basis_fit.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py index 23e48b0..5c06357 100644 --- a/pynumdiff/basis_fit.py +++ b/pynumdiff/basis_fit.py @@ -1,5 +1,4 @@ """Methods based on fitting basis functions to data""" -from functools import lru_cache from warnings import warn import numpy as np from scipy import sparse @@ -137,10 +136,12 @@ def rbfdiff(x, dt_or_t, sigma=1, lmbd=0.01, axis=0): return np.moveaxis(x_hat_flattened.reshape(plump), 0, axis), np.moveaxis(dxdt_hat_flattened.reshape(plump), 0, axis) -@lru_cache(maxsize=32) def _wavelet_derivative_operator(N, dt, wavelet): """Build the sparse operators that turn denoised samples into a derivative. + Depends only on the grid (N, dt) and the wavelet, not on the data, and is + built once per waveletdiff call then applied to every column at once. + PyWavelets treats the input samples as the finest-level scaling coefficients, so the denoised reconstruction x_hat represents the continuous interpolant x(t) = sum_n a_n phi(t/dt - n), where phi is the wavelet's scaling function. From 8e8d46e1c1893726aedfbd857c15c1fdc33c91ee Mon Sep 17 00:00:00 2001 From: pavelkomarov Date: Fri, 12 Jun 2026 12:35:44 -0700 Subject: [PATCH 11/11] Inline the wavelet derivative operator into waveletdiff Fold the private _wavelet_derivative_operator into waveletdiff as a standalone block, mirroring how rbfdiff builds its basis and derivative matrices in one loop. No other caller needs it, and the connection-coefficient math now lives in waveletdiff's own docstring. Co-Authored-By: Claude Opus 4.8 --- pynumdiff/basis_fit.py | 99 +++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 63 deletions(-) diff --git a/pynumdiff/basis_fit.py b/pynumdiff/basis_fit.py index 5c06357..f4764f4 100644 --- a/pynumdiff/basis_fit.py +++ b/pynumdiff/basis_fit.py @@ -136,73 +136,27 @@ def rbfdiff(x, dt_or_t, sigma=1, lmbd=0.01, axis=0): return np.moveaxis(x_hat_flattened.reshape(plump), 0, axis), np.moveaxis(dxdt_hat_flattened.reshape(plump), 0, axis) -def _wavelet_derivative_operator(N, dt, wavelet): - """Build the sparse operators that turn denoised samples into a derivative. - - Depends only on the grid (N, dt) and the wavelet, not on the data, and is - built once per waveletdiff call then applied to every column at once. - - PyWavelets treats the input samples as the finest-level scaling coefficients, - so the denoised reconstruction x_hat represents the continuous interpolant - x(t) = sum_n a_n phi(t/dt - n), where phi is the wavelet's scaling function. - Sampling x and its analytic derivative on the grid t_m = m*dt gives two - convolutions against phi and phi' evaluated at *integers*: - - x_hat[m] = sum_n a_n phi(m - n) -> x_hat = Phi @ a - x'(t_m) = (1/dt) sum_n a_n phi'(m - n) -> x' = Phi_prime @ a - - so x' = Phi_prime @ Phi^-1 @ x_hat. This is the exact derivative of the - wavelet interpolant, with no spline or finite-difference approximation. - - phi and phi' at the integers are the eigenvectors of the wavelet's refinement - (dilation) relation: differentiating phi(t) = sqrt2 * sum_k h_k phi(2t - k) - and evaluating at integers shows phi sampled at integers is the eigenvalue-1 - eigenvector and phi' the eigenvalue-1/2 eigenvector of T[p,q] = sqrt2 * h_{2p-q}. - Normalizations come from reproduction of constants (sum phi(p) = 1) and of - linears (sum p*phi'(p) = -1, so the operator differentiates a ramp exactly). - - :return: - **Phi** (csc_matrix) -- circulant samples of phi, to be inverted - - **Phi_prime** (csr_matrix) -- circulant samples of phi'/dt - """ - h = np.array(pywt.Wavelet(wavelet).rec_lo) # reconstruction low-pass = refinement filter h_k - h = h / h.sum() * np.sqrt(2) # enforce sum(h) = sqrt2, i.e. integral of phi is 1 - L = len(h) # phi is supported on [0, L-1]; sample those integers - p = np.arange(L) - - # Transition matrix T[p,q] = sqrt2 * h_{2p-q}; entries outside the filter are 0. - cols = 2 * p[:, None] - p[None, :] - T = np.where((cols >= 0) & (cols < L), np.sqrt(2) * h[np.clip(cols, 0, L - 1)], 0.0) - - evals, evecs = np.linalg.eig(T) - phi = np.real(evecs[:, np.argmin(np.abs(evals - 1.0))]) # phi(p): eigenvalue 1 - dphi = np.real(evecs[:, np.argmin(np.abs(evals - 0.5))]) # phi'(p): eigenvalue 1/2 - phi = phi / phi.sum() # sum_p phi(p) = 1 - dphi = dphi / np.dot(p, dphi) * (-1.0) # sum_p p*phi'(p) = -1 - - # Both kernels become circulant matrices under periodic boundaries; a common - # shift of both cancels in Phi_prime @ Phi^-1, so the offset choice is cosmetic. - def circulant(kernel): - rows, cols, vals = [], [], [] - m = np.arange(N) - for offset, val in zip(p, kernel): - if abs(val) < 1e-12: continue - rows.extend(m); cols.extend((m - offset) % N); vals.extend([val] * N) - return sparse.csr_matrix((vals, (rows, cols)), shape=(N, N)) - - return circulant(phi).tocsc(), circulant(dphi / dt) - - def waveletdiff(x, dt, wavelet='db8', level=None, threshold=1.0, axis=0, mode='periodization'): """Smooth and differentiate noisy data in a wavelet basis. Three steps: (1) decompose x with the DWT and soft-threshold the detail coefficients to denoise (Donoho-Johnstone universal threshold), reconstructing a smoothed x_hat; (2) extend x_hat antisymmetrically so the periodic derivative - operator stays accurate at the edges; (3) recover the scaling coefficients of - x_hat and apply the analytic derivative of the wavelet basis to get the - derivative. The derivative operator differentiates the basis functions - themselves (see :func:`_wavelet_derivative_operator`) rather than - finite-differencing the signal, so it is exact for signals the basis can represent. + operator stays accurate at the edges; (3) recover the wavelet scaling + coefficients of x_hat and apply the analytic derivative of the wavelet basis. + + The derivative differentiates the basis functions themselves rather than + finite-differencing the signal. PyWavelets treats the samples as finest-level + scaling coefficients, so x_hat is the interpolant x(t) = sum_n a_n phi(t/dt - n) + for the scaling function phi. Sampling x and its analytic derivative on the grid + gives two convolutions against phi and phi' evaluated at *integers*, + + x_hat = Phi @ a and x' = Phi_prime @ a, + + so x' = Phi_prime @ Phi^-1 @ x_hat, exact for signals the basis can represent. + The integer samples phi(p), phi'(p) are the eigenvalue-1 and eigenvalue-1/2 + eigenvectors of the refinement relation phi(t) = sqrt2 sum_k h_k phi(2t - k) + (the "connection coefficients"), normalized to reproduce constants and ramps. Because the DWT requires uniform spacing, this method only accepts a scalar time step dt (not a vector of sample times). For non-uniformly sampled data, @@ -239,6 +193,26 @@ def waveletdiff(x, dt, wavelet='db8', level=None, threshold=1.0, axis=0, mode='p x_work = np.ascontiguousarray(np.moveaxis(x, axis, 0)) # differentiation axis to front shape = x_work.shape # remember it to restore the input's dimensionality x_flat = x_work.reshape(N, -1) # rest of the dims flattened into columns + Ne = 3 * N - 2 # length after the antisymmetric extension in step 2 + + # Build the wavelet-basis derivative operator (depends only on the grid and wavelet). + # Sampling the refinement relation phi(t) = sqrt2 sum_k h_k phi(2t - k) at integers makes + # phi(p) the eigenvalue-1 and phi'(p) the eigenvalue-1/2 eigenvector of T[p,q] = sqrt2 h_{2p-q}. + h = np.array(pywt.Wavelet(wavelet).rec_lo); h = h / h.sum() * np.sqrt(2) # refinement filter, integral of phi = 1 + L = len(h); p = np.arange(L) # phi is supported on the integers [0, L-1] + shift = 2 * p[:, None] - p[None, :] + T = np.where((shift >= 0) & (shift < L), np.sqrt(2) * h[np.clip(shift, 0, L - 1)], 0.0) + evals, evecs = np.linalg.eig(T) + phi = np.real(evecs[:, np.argmin(np.abs(evals - 1.0))]); phi /= phi.sum() # sum_p phi(p) = 1 + dphi = np.real(evecs[:, np.argmin(np.abs(evals - 0.5))]); dphi /= np.dot(p, dphi)*-1 # sum_p p*phi'(p) = -1 + # Phi and Phi_prime hold circulant samples of phi and phi'/dt on the extended grid; both + # share a common shift that cancels in Phi_prime @ Phi^-1, so the offset choice is cosmetic. + rows, cols, phi_vals, dphi_vals = [], [], [], [] + m = np.arange(Ne) + for offset, phi_p, dphi_p in zip(p, phi, dphi / dt): + rows.extend(m); cols.extend((m - offset) % Ne); phi_vals.extend([phi_p]*Ne); dphi_vals.extend([dphi_p]*Ne) + Phi = sparse.csr_matrix((phi_vals, (rows, cols)), shape=(Ne, Ne)).tocsc() # to invert + Phi_prime = sparse.csr_matrix((dphi_vals, (rows, cols)), shape=(Ne, Ne)) # to apply if level is None: level = min(pywt.dwt_max_level(N, wavelet), 5) @@ -262,9 +236,8 @@ def waveletdiff(x, dt, wavelet='db8', level=None, threshold=1.0, axis=0, mode='p # 3. Differentiate the basis: recover the scaling coefficients a = Phi^-1 @ x_ext, then # apply the analytic basis derivative dxdt = Phi_prime @ a, and crop back to the original. - Phi, Phi_prime = _wavelet_derivative_operator(x_ext.shape[0], float(dt), wavelet) a = sparse.linalg.spsolve(Phi, x_ext) - dxdt_flat = (Phi_prime @ a.reshape(x_ext.shape[0], -1))[N - 1:2 * N - 1] + dxdt_flat = (Phi_prime @ a.reshape(Ne, -1))[N - 1:2 * N - 1] x_hat = np.moveaxis(x_hat.reshape(shape), 0, axis) dxdt_hat = np.moveaxis(dxdt_flat.reshape(shape), 0, axis)