Skip to content

Commit 4477454

Browse files
authored
Merge pull request #132 from florisvb/autometamethod
Merging this as a first draft, especially because I did so many other random things in this one. We can revisit to iteratively improve the method and more fully address #70, should we desire to.
2 parents 4b36391 + 159fd76 commit 4477454

12 files changed

Lines changed: 345 additions & 84 deletions

examples/1_basic_tutorial.ipynb

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
},
7979
{
8080
"cell_type": "code",
81-
"execution_count": 3,
81+
"execution_count": null,
8282
"metadata": {},
8383
"outputs": [],
8484
"source": [
@@ -89,15 +89,8 @@
8989
"\n",
9090
"# time step size and time series length in TIME\n",
9191
"dt = 0.01\n",
92-
"duration = 4"
93-
]
94-
},
95-
{
96-
"cell_type": "code",
97-
"execution_count": 4,
98-
"metadata": {},
99-
"outputs": [],
100-
"source": [
92+
"duration = 4\n",
93+
"\n",
10194
"x, x_truth, dxdt_truth, _ = simulate.pi_control(duration=duration, dt=dt, \n",
10295
" noise_type=noise_type, noise_parameters=noise_parameters)"
10396
]

examples/2a_optimizing_parameters_with_dxdt_known.ipynb

Lines changed: 7 additions & 14 deletions
Large diffs are not rendered by default.

examples/2b_optimizing_parameters_with_dxdt_unknown.ipynb

Lines changed: 36 additions & 43 deletions
Large diffs are not rendered by default.

examples/3_automatic_method_suggestion.ipynb

Lines changed: 217 additions & 0 deletions
Large diffs are not rendered by default.

pynumdiff/finite_difference/_finite_difference.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@ def _finite_difference(x, dt, num_iterations, order):
2727
dxdt_hat[-1] = x_hat[-1] - x_hat[-2] # use first-order endpoint formulas so as not to amplify noise. See #104
2828
elif order == 4:
2929
dxdt_hat[2:-2] = (8*(x_hat[3:-1] - x_hat[1:-3]) - x_hat[4:] + x_hat[:-4])/12 # fourth-order center-difference
30-
dxdt_hat[:2] = np.diff(x_hat[:3])
31-
dxdt_hat[-2:] = np.diff(x_hat[-3:])
30+
dxdt_hat[1] = (x_hat[2] - x_hat[0])/2
31+
dxdt_hat[-2] = (x_hat[-1] - x_hat[-3])/2 # use second-order formula for next-to-endpoints so as not to amplify noise
32+
dxdt_hat[0] = x_hat[1] - x_hat[0]
33+
dxdt_hat[-1] = x_hat[-1] - x_hat[-2] # use first-order endpoint formulas so as not to amplify noise. See #104
3234

3335
x_hat = utility.integrate_dxdt_hat(dxdt_hat, dt=1) # estimate new x_hat by integrating derivative
3436
# We can skip dividing by dt here and pass dt=1, because the integration multiplies dt back in.

pynumdiff/linear_model/_linear_model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ def savgoldiff(x, dt, params=None, options=None, poly_order=None, window_size=No
3838
raise ValueError("`poly_order`, `window_size`, and `smoothing_win` must be given.")
3939

4040
window_size = np.clip(window_size, poly_order + 1, len(x)-1)
41-
if window_size % 2 == 0: window_size += 1 # window_size needs to be odd
41+
if window_size % 2 == 0:
42+
window_size += 1 # window_size needs to be odd
43+
warn("Kernel window size should be odd. Added 1 to length.")
4244
smoothing_win = min(smoothing_win, len(x)-1)
4345

4446
dxdt_hat = scipy.signal.savgol_filter(x, window_size, poly_order, deriv=1)/dt

pynumdiff/optimize/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@
88
"Some functions in the `total_variation_regularization` and `linear_model` modules require " +
99
"CVXPY to be installed. You can still pynumdiff.optimize for other functions.")
1010

11-
from ._optimize import optimize
11+
from ._optimize import optimize, suggest_method
1212

13-
__all__ = ['optimize'] # So these get treated as direct members of the module by sphinx
13+
__all__ = ['optimize', 'suggest_method'] # So these get treated as direct members of the module by sphinx

pynumdiff/optimize/_optimize.py

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
import numpy as np
44
from itertools import product
55
from functools import partial
6-
from warnings import filterwarnings
6+
from warnings import filterwarnings, warn
77
from multiprocessing import Pool
8+
from tqdm import tqdm
89

910
from ..utils import evaluate
1011
from ..finite_difference import first_order, second_order, fourth_order
@@ -68,7 +69,7 @@
6869
smooth_acceleration: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000],
6970
'window_size': [3, 10, 30, 50, 90, 130]},
7071
{'gamma': (1e-4, 1e7),
71-
'window_size': (1, 1000)}),
72+
'window_size': (3, 1000)}),
7273
constant_velocity: ({'forwardbackward': [True, False],
7374
'q': [1e-8, 1e-4, 1e-1, 1e1, 1e4, 1e8],
7475
'r': [1e-8, 1e-4, 1e-1, 1e1, 1e4, 1e8]},
@@ -86,7 +87,7 @@
8687
method_params_and_bounds[method] = method_params_and_bounds[constant_velocity]
8788

8889

89-
# This function has to be at the top level for multiprocessing
90+
# This function has to be at the top level for multiprocessing but is only used by optimize.
9091
def _objective_function(point, func, x, dt, singleton_params, search_space_types, dxdt_truth, metric,
9192
tvgamma, padding):
9293
"""Function minimized by scipy.optimize.minimize, needs to have the form: (point, *args) -> float
@@ -123,7 +124,7 @@ def optimize(func, x, dt, search_space={}, dxdt_truth=None, tvgamma=1e-2, paddin
123124
"""Find the optimal parameters for a given differentiation method.
124125
125126
:param function func: differentiation method to optimize parameters for, e.g. linear_model.savgoldiff
126-
:param np.array[float]: data to differentiate
127+
:param np.array[float] x: data to differentiate
127128
:param float dt: step size
128129
:param dict search_space: function parameter settings to use as initial starting points in optimization,
129130
structured as :code:`{param1:[values], param2:[values], param3:value, ...}`. The search space
@@ -155,7 +156,7 @@ def optimize(func, x, dt, search_space={}, dxdt_truth=None, tvgamma=1e-2, paddin
155156
singleton_params = {k:v for k,v in params.items() if not isinstance(v, list)}
156157

157158
# The search space is the Cartesian product of all dimensions where multiple options are given
158-
search_space_types = {k:type(v[0]) for k,v in params.items() if isinstance(v, list)} # map param name -> type for converting to and from point
159+
search_space_types = {k:type(v[0]) for k,v in params.items() if isinstance(v, list)} # map param name -> type, for converting to and from point
159160
if any(v not in [float, int, bool] for v in search_space_types.values()):
160161
raise ValueError("Optimization over categorical strings currently not supported")
161162
# If excluding string type, I can just cast ints and bools to floats, and we're good to go
@@ -183,3 +184,58 @@ def optimize(func, x, dt, search_space={}, dxdt_truth=None, tvgamma=1e-2, paddin
183184
opt_params.update(singleton_params)
184185

185186
return opt_params, results[opt_idx].fun
187+
188+
189+
def suggest_method(x, dt, dxdt_truth=None, cutoff_frequency=None):
190+
"""This is meant as an easy-to-use, automatic way for users with some time on their hands to determine
191+
a good method and settings for their data. It calls the optimizer over (almost) all methods in the repo
192+
using default search spaces defined at the top of the :code:`pynumdiff/optimize/_optimize.py` file.
193+
This routine will take a few minutes to run.
194+
195+
Excluded:
196+
- ``first_order``, because iterating causes drift
197+
- ``lineardiff``, ``iterative_velocity``, and ``jerk_sliding``, because they either take too long,
198+
can be fragile, or tend not to do best
199+
- all ``cvxpy``-based methods if it is not installed
200+
- ``velocity`` because it tends to not be best but dominates the optimization process by directly
201+
optimizing the second term of the metric :math:`L = \\text{RMSE} \\Big( \\text{trapz}(\\mathbf{
202+
\\hat{\\dot{x}}}(\\Phi)) + \\mu, \\mathbf{y} \\Big) + \\gamma \\Big({TV}\\big(\\mathbf{\\hat{
203+
\\dot{x}}}(\\Phi)\\big)\\Big)`
204+
205+
:param np.array[float] x: data to differentiate
206+
:param float dt: step size, because most methods are not designed to work with variable step sizes
207+
:param np.array[float] dxdt_truth: if known, you can pass true derivative values; otherwise you must use
208+
:code: `cutoff_frequency`
209+
:param float cutoff_frequency: in Hz, the highest dominant frequency of interest in the signal,
210+
used to find parameter :math:`\\gamma` for regularization of the optimization process
211+
in the absence of ground truth. See https://ieeexplore.ieee.org/document/9241009.
212+
Estimate by (a) counting real number of peaks per second in the data, (b) looking at
213+
power spectrum and choosing a cutoff, or (c) making an educated guess.
214+
215+
:return: tuple[callable, dict, np.array, np.array] of\n
216+
- **method** -- a reference to the function handle of the differentiation method that worked best
217+
- **opt_params** -- optimal parameter settings for the differentation method
218+
"""
219+
tvgamma = None
220+
if dxdt_truth is None: # parameter checking
221+
if cutoff_frequency is None:
222+
raise ValueError('Either dxdt_truth or cutoff_frequency must be provided.')
223+
tvgamma = np.exp(-1.6*np.log(cutoff_frequency) -0.71*np.log(dt) - 5.1) # See https://ieeexplore.ieee.org/document/9241009
224+
225+
methods = [second_order, fourth_order, mediandiff, meandiff, gaussiandiff, friedrichsdiff, butterdiff,
226+
splinediff, spectraldiff, polydiff, savgoldiff, constant_velocity, constant_acceleration, constant_jerk]
227+
try: # optionally skip some methods
228+
import cvxpy
229+
methods += [acceleration, jerk, smooth_acceleration]
230+
except ImportError:
231+
warn("CVXPY not installed, skipping acceleration, jerk, and smooth_acceleration")
232+
233+
best_value = float('inf') # core loop
234+
for func in tqdm(methods):
235+
p, v = optimize(func, x, dt, dxdt_truth=dxdt_truth, tvgamma=tvgamma)
236+
if v < best_value:
237+
method = func
238+
best_value = v
239+
opt_params = p
240+
241+
return method, opt_params

pynumdiff/tests/test_diff_methods.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ def iterated_first_order(*args, **kwargs): return first_order(*args, **kwargs)
5656
# All the testing methodology follows the exact same pattern; the only thing that changes is the
5757
# closeness to the right answer various methods achieve with the given parameterizations. So index a
5858
# big ol' table by the method, then the test function, then the pair of quantities we're comparing.
59+
# The tuples are order of magnitude of (L2,Linf) distances for pairs
60+
# (x,x_hat), (dxdt,dxdt_hat), (x,x_hat_noisy), (dxdt,dxdt_hat_noisy).
5961
error_bounds = {
6062
first_order: [[(-25, -25), (-25, -25), (0, 0), (1, 1)],
6163
[(-25, -25), (-13, -13), (0, 0), (1, 1)],

pynumdiff/total_variation_regularization/_total_variation_regularization.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,13 @@ def jerk_sliding(x, dt, params=None, options=None, gamma=None, solver=None, wind
253253
elif gamma == None:
254254
raise ValueError("`gamma` must be given.")
255255

256-
if len(x) < window_size:
257-
warn("len(x) <= window_size, calling standard jerk() without sliding")
256+
if len(x) < window_size or window_size < 15:
257+
warn("len(x) should be > window_size >= 15, calling standard jerk() without sliding")
258258
return _total_variation_regularized_derivative(x, dt, 3, gamma, solver=solver)
259259

260+
if window_size % 2 == 0:
261+
window_size += 1 # has to be odd
262+
warn("Kernel window size should be odd. Added 1 to length.")
260263
ramp = window_size//5
261-
kernel = np.hstack((np.arange(1, ramp+1)/ramp, np.ones(window_size - 2*ramp), (np.arange(1, ramp+1)/ramp)[::-1]))
264+
kernel = np.hstack((np.arange(1, ramp+1)/ramp, np.ones(window_size - 2*ramp), np.arange(ramp, 0, -1)/ramp))
262265
return utility.slide_function(_total_variation_regularized_derivative, x, dt, kernel, 3, gamma, stride=ramp, solver=solver)

0 commit comments

Comments
 (0)