Merge pull request #132 from florisvb/autometamethod

pavelkomarov · web-flow · commit 4477454ff6aa · 2025-07-24T17:02:31.000-07:00
Merging this as a first draft, especially because I did so many other random things in this one. We can revisit to iteratively improve the method and more fully address #70, should we desire to.
diff --git a/examples/1_basic_tutorial.ipynb b/examples/1_basic_tutorial.ipynb
@@ -78,7 +78,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -89,15 +89,8 @@
     "\n",
     "# time step size and time series length in TIME\n",
     "dt = 0.01\n",
-    "duration = 4"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "duration = 4\n",
+    "\n",
     "x, x_truth, dxdt_truth, _ = simulate.pi_control(duration=duration, dt=dt, \n",
     "                                                noise_type=noise_type, noise_parameters=noise_parameters)"
    ]
diff --git a/examples/2a_optimizing_parameters_with_dxdt_known.ipynb b/examples/2a_optimizing_parameters_with_dxdt_known.ipynb
diff --git a/examples/2b_optimizing_parameters_with_dxdt_unknown.ipynb b/examples/2b_optimizing_parameters_with_dxdt_unknown.ipynb
diff --git a/examples/3_automatic_method_suggestion.ipynb b/examples/3_automatic_method_suggestion.ipynb
diff --git a/pynumdiff/finite_difference/_finite_difference.py b/pynumdiff/finite_difference/_finite_difference.py
@@ -27,8 +27,10 @@ def _finite_difference(x, dt, num_iterations, order):
             dxdt_hat[-1] = x_hat[-1] - x_hat[-2] # use first-order endpoint formulas so as not to amplify noise. See #104
         elif order == 4:
             dxdt_hat[2:-2] = (8*(x_hat[3:-1] - x_hat[1:-3]) - x_hat[4:] + x_hat[:-4])/12 # fourth-order center-difference
-            dxdt_hat[:2] = np.diff(x_hat[:3])
-            dxdt_hat[-2:] = np.diff(x_hat[-3:])
+            dxdt_hat[1] = (x_hat[2] - x_hat[0])/2
+            dxdt_hat[-2] = (x_hat[-1] - x_hat[-3])/2 # use second-order formula for next-to-endpoints so as not to amplify noise
+            dxdt_hat[0] = x_hat[1] - x_hat[0]
+            dxdt_hat[-1] = x_hat[-1] - x_hat[-2] # use first-order endpoint formulas so as not to amplify noise. See #104
 
         x_hat = utility.integrate_dxdt_hat(dxdt_hat, dt=1) # estimate new x_hat by integrating derivative
         # We can skip dividing by dt here and pass dt=1, because the integration multiplies dt back in.
diff --git a/pynumdiff/linear_model/_linear_model.py b/pynumdiff/linear_model/_linear_model.py
@@ -38,7 +38,9 @@ def savgoldiff(x, dt, params=None, options=None, poly_order=None, window_size=No
         raise ValueError("`poly_order`, `window_size`, and `smoothing_win` must be given.")
 
     window_size = np.clip(window_size, poly_order + 1, len(x)-1)
-    if window_size % 2 == 0: window_size += 1 # window_size needs to be odd
+    if window_size % 2 == 0:
+        window_size += 1 # window_size needs to be odd
+        warn("Kernel window size should be odd. Added 1 to length.")
     smoothing_win = min(smoothing_win, len(x)-1)
 
     dxdt_hat = scipy.signal.savgol_filter(x, window_size, poly_order, deriv=1)/dt
diff --git a/pynumdiff/optimize/__init__.py b/pynumdiff/optimize/__init__.py
@@ -8,6 +8,6 @@
         "Some functions in the `total_variation_regularization` and `linear_model` modules require " +
         "CVXPY to be installed. You can still pynumdiff.optimize for other functions.")
 
-from ._optimize import optimize
+from ._optimize import optimize, suggest_method
 
-__all__ = ['optimize'] # So these get treated as direct members of the module by sphinx
+__all__ = ['optimize', 'suggest_method'] # So these get treated as direct members of the module by sphinx
diff --git a/pynumdiff/optimize/_optimize.py b/pynumdiff/optimize/_optimize.py
@@ -3,8 +3,9 @@
 import numpy as np
 from itertools import product
 from functools import partial
-from warnings import filterwarnings
+from warnings import filterwarnings, warn
 from multiprocessing import Pool
+from tqdm import tqdm
 
 from ..utils import evaluate
 from ..finite_difference import first_order, second_order, fourth_order
@@ -68,7 +69,7 @@
     smooth_acceleration: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000],
                            'window_size': [3, 10, 30, 50, 90, 130]},
                           {'gamma': (1e-4, 1e7),
-                           'window_size': (1, 1000)}),
+                           'window_size': (3, 1000)}),
     constant_velocity: ({'forwardbackward': [True, False],
                          'q': [1e-8, 1e-4, 1e-1, 1e1, 1e4, 1e8],
                          'r': [1e-8, 1e-4, 1e-1, 1e1, 1e4, 1e8]},
@@ -86,7 +87,7 @@
     method_params_and_bounds[method] = method_params_and_bounds[constant_velocity]
 
 
-# This function has to be at the top level for multiprocessing
+# This function has to be at the top level for multiprocessing but is only used by optimize.
 def _objective_function(point, func, x, dt, singleton_params, search_space_types, dxdt_truth, metric,
     tvgamma, padding):
     """Function minimized by scipy.optimize.minimize, needs to have the form: (point, *args) -> float
@@ -123,7 +124,7 @@ def optimize(func, x, dt, search_space={}, dxdt_truth=None, tvgamma=1e-2, paddin
     """Find the optimal parameters for a given differentiation method.
 
     :param function func: differentiation method to optimize parameters for, e.g. linear_model.savgoldiff
-    :param np.array[float]: data to differentiate
+    :param np.array[float] x: data to differentiate
     :param float dt: step size
     :param dict search_space: function parameter settings to use as initial starting points in optimization,
                     structured as :code:`{param1:[values], param2:[values], param3:value, ...}`. The search space
@@ -155,7 +156,7 @@ def optimize(func, x, dt, search_space={}, dxdt_truth=None, tvgamma=1e-2, paddin
     singleton_params = {k:v for k,v in params.items() if not isinstance(v, list)}
 
     # The search space is the Cartesian product of all dimensions where multiple options are given
-    search_space_types = {k:type(v[0]) for k,v in params.items() if isinstance(v, list)} # map param name -> type for converting to and from point
+    search_space_types = {k:type(v[0]) for k,v in params.items() if isinstance(v, list)} # map param name -> type, for converting to and from point
     if any(v not in [float, int, bool] for v in search_space_types.values()):
         raise ValueError("Optimization over categorical strings currently not supported")
     # If excluding string type, I can just cast ints and bools to floats, and we're good to go
@@ -183,3 +184,58 @@ def optimize(func, x, dt, search_space={}, dxdt_truth=None, tvgamma=1e-2, paddin
     opt_params.update(singleton_params)
 
     return opt_params, results[opt_idx].fun
+
+
+def suggest_method(x, dt, dxdt_truth=None, cutoff_frequency=None):
+    """This is meant as an easy-to-use, automatic way for users with some time on their hands to determine
+    a good method and settings for their data. It calls the optimizer over (almost) all methods in the repo
+    using default search spaces defined at the top of the :code:`pynumdiff/optimize/_optimize.py` file.
+    This routine will take a few minutes to run.
+    
+    Excluded:
+        - ``first_order``, because iterating causes drift
+        - ``lineardiff``, ``iterative_velocity``, and ``jerk_sliding``, because they either take too long,
+          can be fragile, or tend not to do best
+        - all ``cvxpy``-based methods if it is not installed
+        - ``velocity`` because it tends to not be best but dominates the optimization process by directly
+          optimizing the second term of the metric :math:`L = \\text{RMSE} \\Big( \\text{trapz}(\\mathbf{
+          \\hat{\\dot{x}}}(\\Phi)) + \\mu, \\mathbf{y} \\Big) + \\gamma \\Big({TV}\\big(\\mathbf{\\hat{
+          \\dot{x}}}(\\Phi)\\big)\\Big)`
+
+    :param np.array[float] x: data to differentiate
+    :param float dt: step size, because most methods are not designed to work with variable step sizes
+    :param np.array[float] dxdt_truth: if known, you can pass true derivative values; otherwise you must use
+            :code: `cutoff_frequency`
+    :param float cutoff_frequency: in Hz, the highest dominant frequency of interest in the signal,
+            used to find parameter :math:`\\gamma` for regularization of the optimization process
+            in the absence of ground truth. See https://ieeexplore.ieee.org/document/9241009.
+            Estimate by (a) counting real number of peaks per second in the data, (b) looking at
+            power spectrum and choosing a cutoff, or (c) making an educated guess.
+
+    :return: tuple[callable, dict, np.array, np.array] of\n
+            - **method** -- a reference to the function handle of the differentiation method that worked best
+            - **opt_params** -- optimal parameter settings for the differentation method
+    """
+    tvgamma = None
+    if dxdt_truth is None: # parameter checking
+        if cutoff_frequency is None:
+            raise ValueError('Either dxdt_truth or cutoff_frequency must be provided.')
+        tvgamma = np.exp(-1.6*np.log(cutoff_frequency) -0.71*np.log(dt) - 5.1) # See https://ieeexplore.ieee.org/document/9241009
+
+    methods = [second_order, fourth_order, mediandiff, meandiff, gaussiandiff, friedrichsdiff, butterdiff,
+        splinediff, spectraldiff, polydiff, savgoldiff, constant_velocity, constant_acceleration, constant_jerk]
+    try: # optionally skip some methods
+        import cvxpy
+        methods += [acceleration, jerk, smooth_acceleration]
+    except ImportError:
+        warn("CVXPY not installed, skipping acceleration, jerk, and smooth_acceleration")
+
+    best_value = float('inf') # core loop
+    for func in tqdm(methods):
+        p, v = optimize(func, x, dt, dxdt_truth=dxdt_truth, tvgamma=tvgamma)
+        if v < best_value:
+            method = func
+            best_value = v
+            opt_params = p
+
+    return method, opt_params
diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py
@@ -56,6 +56,8 @@ def iterated_first_order(*args, **kwargs): return first_order(*args, **kwargs)
 # All the testing methodology follows the exact same pattern; the only thing that changes is the
 # closeness to the right answer various methods achieve with the given parameterizations. So index a
 # big ol' table by the method, then the test function, then the pair of quantities we're comparing.
+# The tuples are order of magnitude of (L2,Linf) distances for pairs
+# (x,x_hat), (dxdt,dxdt_hat), (x,x_hat_noisy), (dxdt,dxdt_hat_noisy).
 error_bounds = {
     first_order: [[(-25, -25), (-25, -25), (0, 0), (1, 1)],
                   [(-25, -25), (-13, -13), (0, 0), (1, 1)],
diff --git a/pynumdiff/total_variation_regularization/_total_variation_regularization.py b/pynumdiff/total_variation_regularization/_total_variation_regularization.py
@@ -253,10 +253,13 @@ def jerk_sliding(x, dt, params=None, options=None, gamma=None, solver=None, wind
     elif gamma == None:
         raise ValueError("`gamma` must be given.")
 
-    if len(x) < window_size:
-        warn("len(x) <= window_size, calling standard jerk() without sliding")
+    if len(x) < window_size or window_size < 15:
+        warn("len(x) should be > window_size >= 15, calling standard jerk() without sliding")
         return _total_variation_regularized_derivative(x, dt, 3, gamma, solver=solver)
 
+    if window_size % 2 == 0:
+        window_size += 1 # has to be odd
+        warn("Kernel window size should be odd. Added 1 to length.")
     ramp = window_size//5
-    kernel = np.hstack((np.arange(1, ramp+1)/ramp, np.ones(window_size - 2*ramp), (np.arange(1, ramp+1)/ramp)[::-1]))
+    kernel = np.hstack((np.arange(1, ramp+1)/ramp, np.ones(window_size - 2*ramp), np.arange(ramp, 0, -1)/ramp))
     return utility.slide_function(_total_variation_regularized_derivative, x, dt, kernel, 3, gamma, stride=ramp, solver=solver)
diff --git a/pynumdiff/utils/utility.py b/pynumdiff/utils/utility.py
@@ -185,7 +185,7 @@ def slide_function(func, x, dt, kernel, *args, stride=1, pass_weights=False, **k
     for i,midpoint in enumerate(range(0, len(x), stride)): # iterate window midpoints
         # find where to index data and kernel, taking care at edges
         window = slice(max(0, midpoint - half_window_size),
-                        min(len(x), midpoint + half_window_size + 1)) # +1 because slicing works [,)
+                        min(len(x), midpoint + half_window_size + 1)) # +1 because slicing is exclusive of end
         kslice = slice(max(0, half_window_size - midpoint),
                         min(len(kernel), len(kernel) - (midpoint + half_window_size + 1 - len(x))))
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,14 +36,14 @@ package = "https://pypi.org/project/pynumdiff/"
 
 [project.optional-dependencies]
 advanced = [
-    "cvxpy"
+    "cvxpy",
+    "tqdm"
 ]
 dev = [
     "pylint",
     "pytest",
     "cvxopt",
-    "cvxpy",
-    "Mosek"
+    "cvxpy"
 ]
 docs = [
     "sphinx",