Parcels/src/parcels/_core/particleset.py at fdd667441bbffb8a4fca7d114ba0842747efb6fb · Parcels-code/Parcels · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
from __future__ import annotations

import datetime
import sys
import types
import warnings
from collections.abc import Iterable
from typing import TYPE_CHECKING, Literal

import numpy as np
import xarray as xr
from tqdm import tqdm

from parcels._core.kernel import Kernel
from parcels._core.particle import Particle, create_particle_data
from parcels._core.particlesetview import ParticleSetView
from parcels._core.statuscodes import StatusCode
from parcels._core.utils.time import (
    TimeInterval,
    float_to_datelike,
    timedelta_to_float,
)
from parcels._core.warnings import ParticleSetWarning
from parcels._logger import logger
from parcels._reprs import _format_zarr_output_location, particleset_repr

if TYPE_CHECKING:
    from parcels import FieldSet, ParticleClass, ParticleFile

__all__ = ["ParticleSet"]


class ParticleSet:
    """Class for storing particles and executing kernel over them.

    Please note that this currently only supports fixed size particle sets, meaning that the particle set only
    holds the particles defined on construction. Individual particles can neither be added nor deleted individually,
    and individual particles can only be deleted as a set procedurally (i.e. by changing their state to 'StatusCode.Delete'
    during kernel execution).

    Parameters
    ----------
    fieldset :
        mod:`parcels.fieldset.FieldSet` object from which to sample velocity.
    pclass : parcels.particle.Particle
        Optional object that inherits from :mod:`parcels.particle.Particle` object that defines custom particle
    lon :
        List of initial longitude values for particles
    lat :
        List of initial latitude values for particles
    z :
        Optional list of initial z values for particles. Default is 0m
    time :
        Optional list of initial time values for particles. Default is fieldset.U.grid.time[0]
    repeatdt : datetime.timedelta or float, optional
        Optional interval on which to repeat the release of the ParticleSet. Either timedelta object, or float in seconds.
    trajectory_ids :
        Optional list of "trajectory" values (integers) for the particle IDs
    partition_function :
        Function to use for partitioning particles over processors. Default is to use kMeans

        Other Variables can be initialised using further arguments (e.g. v=... for a Variable named 'v')
    """

    def __init__(
        self,
        fieldset: FieldSet,
        pclass: ParticleClass = Particle,
        lon: np.array[float] = None,
        lat: np.array[float] = None,
        z=None,
        time=None,
        trajectory_ids=None,
        **kwargs,
    ):
        self._data = None
        self._kernel = None

        self.fieldset = fieldset
        lon = np.empty(shape=0) if lon is None else np.array(lon).flatten()
        lat = np.empty(shape=0) if lat is None else np.array(lat).flatten()
        time = np.empty(shape=0) if time is None else np.array(time).flatten()

        if trajectory_ids is None:
            trajectory_ids = np.arange(lon.size)

        if z is None:
            minz = 0
            for field in self.fieldset.fields.values():
                if field.grid.depth is not None:
                    minz = min(minz, field.grid.depth[0])
            z = np.ones(lon.size) * minz
        else:
            z = np.array(z).flatten()
        assert lon.size == lat.size and lon.size == z.size, "lon, lat, z don't all have the same lenghts"

        if time is None or len(time) == 0:
            # do not set a time yet (because sign_dt not known)
            time = np.array(np.nan)
        elif isinstance(time[0], np.datetime64) and self.fieldset.time_interval:
            time = timedelta_to_float(time - self.fieldset.time_interval.left)
        elif isinstance(time[0], np.timedelta64):
            time = timedelta_to_float(time)
        else:
            raise TypeError("particle time must be a datetime, timedelta, or date object")
        time = np.repeat(time, lon.size) if time.size == 1 else time

        assert lon.size == time.size, "time and positions (lon, lat, z) do not have the same lengths."

        if fieldset.time_interval:
            _warn_particle_times_outside_fieldset_time_bounds(time, fieldset.time_interval)

        for kwvar in kwargs:
            if kwvar not in ["partition_function"]:
                kwargs[kwvar] = np.array(kwargs[kwvar]).flatten()
                assert lon.size == kwargs[kwvar].size, (
                    f"{kwvar} and positions (lon, lat, z) don't have the same lengths."
                )

        self._data = create_particle_data(
            pclass=pclass,
            nparticles=lon.size,
            ngrids=len(fieldset.gridset),
            time_interval=fieldset.time_interval,
            initial=dict(
                lon=lon,
                lat=lat,
                z=z,
                time=time,
                trajectory=trajectory_ids,
            ),
        )
        self._ptype = pclass

        # update initial values provided on ParticleSet creation # TODO: Wrap this into create_particle_data
        particle_variables = [v.name for v in pclass.variables]
        for kwvar, kwval in kwargs.items():
            if kwvar not in particle_variables:
                raise RuntimeError(f"Particle class does not have Variable {kwvar}")
            self._data[kwvar][:] = kwval

        self._kernel = None

    def __del__(self):
        if self._data is not None and isinstance(self._data, xr.Dataset):
            del self._data
        self._data = None

    def __iter__(self):
        self._index = 0
        return self

    def __next__(self):
        if self._index < len(self):
            p = self.__getitem__(self._index)
            self._index += 1
            return p
        raise StopIteration

    def __getattr__(self, name):
        """
        Access a single property of all particles.

        Parameters
        ----------
        name : str
            Name of the property
        """
        return self._data[name]

    def __getitem__(self, index):
        """Get a single particle by index."""
        return ParticleSetView(self._data, index=index, ptype=self._ptype)

    def __setattr__(self, name, value):
        if name in ["_data"]:
            object.__setattr__(self, name, value)
        elif isinstance(self._data, dict) and name in self._data.keys():
            self._data[name][:] = value
        else:
            object.__setattr__(self, name, value)

    @property
    def size(self):
        return len(self)

    def __repr__(self):
        return particleset_repr(self)

    def __len__(self):
        return len(self._data["trajectory"])

    def add(self, particles):
        """Add particles to the ParticleSet. Note that this is an
        incremental add, the particles will be added to the ParticleSet
        on which this function is called.

        Parameters
        ----------
        particles :
            Another ParticleSet containing particles to add to this one.

        Returns
        -------
        type
            The current ParticleSet

        """
        assert particles is not None, (
            f"Trying to add another {type(self)} to this one, but the other one is None - invalid operation."
        )
        assert type(particles) is type(self)

        if len(particles) == 0:
            return

        if len(self) == 0:
            self._data = particles._data
            return

        if isinstance(particles, type(self)):
            if len(self._data["trajectory"]) > 0:
                offset = self._data["trajectory"].max() + 1
            else:
                offset = 0
            particles._data["trajectory"] = particles._data["trajectory"] + offset

        for d in self._data:
            self._data[d] = np.concatenate((self._data[d], particles._data[d]))

        return self

    def __iadd__(self, particles):
        """Add particles to the ParticleSet.

        Note that this is an incremental add, the particles will be added to the ParticleSet
        on which this function is called.

        Parameters
        ----------
        particles :
            Another ParticleSet containing particles to add to this one.

        Returns
        -------
        type
            The current ParticleSet
        """
        self.add(particles)
        return self

    def remove_indices(self, indices):
        """Method to remove particles from the ParticleSet, based on their `indices`."""
        for d in self._data:
            self._data[d] = np.delete(self._data[d], indices, axis=0)

    def populate_indices(self):
        """Pre-populate guesses of particle ei (element id) indices"""
        for i, grid in enumerate(self.fieldset.gridset):
            grid_positions = grid.search(self.z, self.lat, self.lon)
            self._data["ei"][:, i] = grid.ravel_index(
                {
                    "X": grid_positions["X"]["index"],
                    "Y": grid_positions["Y"]["index"],
                    "Z": grid_positions["Z"]["index"],
                }
            )

    @classmethod
    def from_particlefile(cls, fieldset, pclass, filename, restart=True, restarttime=None, **kwargs):
        """Initialise the ParticleSet from a zarr ParticleFile.
        This creates a new ParticleSet based on locations of all particles written
        in a zarr ParticleFile at a certain time. Particle IDs are preserved if restart=True

        Parameters
        ----------
        fieldset : parcels.fieldset.FieldSet
            mod:`parcels.fieldset.FieldSet` object from which to sample velocity
        pclass :
            Particle class. May be a parcels.particle.Particle class as defined in parcels, or a subclass defining a custom particle.
        filename : str
            Name of the particlefile from which to read initial conditions
        restart : bool
            BSignal if pset is used for a restart (default is True).
            In that case, Particle IDs are preserved.
        restarttime :
            time at which the Particles will be restarted. Default is the last time written.
            Alternatively, restarttime could be a time value (including np.datetime64) or
            a callable function such as np.nanmin. The last is useful when running with dt < 0.
        repeatdt : datetime.timedelta or float, optional
            Optional interval on which to repeat the release of the ParticleSet. Either timedelta object, or float in seconds.
        **kwargs :
            Keyword arguments passed to the particleset constructor.
        """
        raise NotImplementedError(
            "ParticleSet.from_particlefile is not yet implemented in v4."
        )  # TODO implement this when ParticleFile is implemented in v4

    def Kernel(self, pyfunc):
        """Wrapper method to convert a `pyfunc` into a :class:`parcels.kernel.Kernel` object.

        Conversion is based on `fieldset` and `ptype` of the ParticleSet.

        Parameters
        ----------
        pyfunc : function or list of functions
            Python function to convert into kernel. If a list of functions is provided,
            the functions will be converted to kernels and combined into a single kernel.
        """
        if isinstance(pyfunc, list):
            return Kernel.from_list(
                self.fieldset,
                self._ptype,
                pyfunc,
            )
        return Kernel(
            self.fieldset,
            self._ptype,
            pyfuncs=[pyfunc],
        )

    def data_indices(self, variable_name, compare_values, invert=False):
        """Get the indices of all particles where the value of `variable_name` equals (one of) `compare_values`.

        Parameters
        ----------
        variable_name : str
            Name of the variable to check.
        compare_values :
            Value or list of values to compare to.
        invert :
            Whether to invert the selection. I.e., when True,
            return all indices that do not equal (one of)
            `compare_values`. (Default value = False)

        Returns
        -------
        np.ndarray
            Numpy array of indices that satisfy the test.

        """
        compare_values = (
            np.array([compare_values]) if type(compare_values) not in [list, dict, np.ndarray] else compare_values
        )
        return np.where(np.isin(self._data[variable_name], compare_values, invert=invert))[
            0
        ]  # TODO check if this can be faster with xarray indexing?

    @property
    def _error_particles(self):
        """Get indices of all particles that are in an error state.

        Returns
        -------
        indices
            Indices of error particles.
        """
        return self.data_indices("state", [StatusCode.Success, StatusCode.Evaluate], invert=True)

    @property
    def _num_error_particles(self):
        """Get the number of particles that are in an error state.

        Returns
        -------
        int
            Number of error particles.
        """
        return np.sum(np.isin(self._data["state"], [StatusCode.Success, StatusCode.Evaluate], invert=True))

    def set_variable_write_status(self, var, write_status):
        """Method to set the write status of a Variable.

        Parameters
        ----------
        var :
            Name of the variable (string)
        write_status :
            Write status of the variable (True, False or 'once')
        """
        self._data[var].set_variable_write_status(write_status)

    def execute(
        self,
        pyfunc: types.FunctionType | Kernel,
        dt: datetime.timedelta | np.timedelta64 | float,
        endtime: np.timedelta64 | np.datetime64 | None = None,
        runtime: datetime.timedelta | np.timedelta64 | float | None = None,
        output_file: ParticleFile = None,
        verbose_progress: bool = True,
    ):
        """Execute a given kernel function over the particle set for multiple timesteps.

        Optionally also provide sub-timestepping
        for particle output.

        Parameters
        ----------
        pyfunc :
            Kernel function to execute. This can be the name of a
            defined Python function or a :class:`parcels.kernel.Kernel` object.
            Kernels can be concatenated using the + operator.
        dt (np.timedelta64 or float):
            Timestep interval (as a np.timedelta64 object of float in seconds) to be passed to the kernel.
            Use a negative value for a backward-in-time simulation.
        endtime (np.datetime64 or np.timedelta64): :
            End time for the timestepping loop. If a np.timedelta64 is provided, it is interpreted as the total simulation time. In this case,
            the absolute end time is the start of the fieldset's time interval plus the np.timedelta64.
            If a datetime is provided, it is interpreted as the absolute end time of the simulation.
        runtime (np.timedelta64 or float):
            The duration of the simulation execution. Must be a float (in seconds) or a np.timedelta64 object and is required to be set when the `fieldset.time_interval` is not defined.
            If the `fieldset.time_interval` is defined and the runtime is provided, the end time will be the start of the fieldset's time interval plus the runtime.
        output_file :
            mod:`parcels.particlefile.ParticleFile` object for particle output (Default value = None)
        verbose_progress : bool
            Boolean for providing a progress bar for the kernel execution loop. (Default value = True)

        Notes
        -----
        ``ParticleSet.execute()`` acts as the main entrypoint for simulations, and provides the simulation time-loop. This method encapsulates the logic controlling the switching between kernel execution, output file writing, reading in fields for new timesteps, adding new particles to the simulation domain, stopping the simulation, and executing custom functions (``postIterationCallbacks`` provided by the user).
        """
        # check if particleset is empty. If so, return immediately
        if len(self) == 0:
            return

        if not isinstance(pyfunc, Kernel):
            pyfunc = self.Kernel(pyfunc)

        self._kernel = pyfunc

        if output_file is not None:
            output_file.set_metadata(self.fieldset.gridset[0]._mesh)
            output_file.metadata["parcels_kernels"] = self._kernel.funcname

        dt, sign_dt = _convert_dt_to_float(dt)
        self._data["dt"][:] = dt

        runtime = _convert_runtime_to_float(runtime)

        start_time, end_time = _get_simulation_start_and_end_times(
            self.fieldset.time_interval, self._data["time"], runtime, endtime, sign_dt
        )

        # Set the time of the particles if it hadn't been set on initialisation
        if np.isnan(self._data["time"]).any():
            self._data["time"][:] = start_time

        outputdt = output_file.outputdt if output_file else None
        _warn_outputdt_release_desync(outputdt, start_time, self._data["time"][:])

        # Set up pbar
        if output_file:
            logger.info(f"Output files are stored in {_format_zarr_output_location(output_file.store)}")

        if verbose_progress:
            pbar = tqdm(total=end_time - start_time, file=sys.stdout)
            pbar.set_description("Integration time: " + str(start_time))

        next_output = start_time if output_file else None

        time = start_time
        while sign_dt * (time - end_time) < 0:
            if next_output is not None:
                f = min if sign_dt > 0 else max
                next_time = f(next_output, end_time)
            else:
                next_time = end_time

            self._kernel.execute(self, endtime=next_time, dt=dt)

            if next_output is not None:
                if np.abs(next_time - next_output) < 0.001:
                    if output_file:
                        output_file.write(self, next_output)
                    if np.isfinite(outputdt):
                        next_output += outputdt * sign_dt

            if verbose_progress:
                pbar.set_description("Integration time: " + str(float_to_datelike(time, self.fieldset.time_interval)))
                pbar.update(next_time - time)

            time = next_time

        if verbose_progress:
            pbar.close()


def _warn_outputdt_release_desync(outputdt: float, starttime: float, release_times: Iterable[float]):
    """Gives the user a warning if the release time isn't a multiple of outputdt."""
    if outputdt and any((np.isfinite(t) and (t - starttime) % outputdt != 0) for t in release_times):
        warnings.warn(
            "Some of the particles have a start time difference that is not a multiple of outputdt. "
            "This could cause the first output of some of the particles that start later "
            "in the simulation to be at a different time than expected.",
            ParticleSetWarning,
            stacklevel=2,
        )


def _warn_particle_times_outside_fieldset_time_bounds(release_times: np.ndarray, time: TimeInterval):
    if np.isnan(release_times).all():
        return

    if np.any(release_times < 0) or np.any(release_times > timedelta_to_float(time.right - time.left)):
        warnings.warn(
            "Some particles are set to be released outside the FieldSet's executable time domain.",
            ParticleSetWarning,
            stacklevel=2,
        )


def _convert_dt_to_float(dt):
    try:
        dt = timedelta_to_float(dt)
        assert dt is not None
        sign_dt = np.sign(dt).astype(int)
        assert sign_dt in [-1, 1]
    except (ValueError, TypeError, AssertionError) as e:
        raise ValueError(f"dt must be a non-zero datetime.timedelta or np.timedelta64 object, got {dt=!r}") from e

    return dt, sign_dt


def _convert_runtime_to_float(runtime):
    if runtime is not None:
        try:
            runtime = timedelta_to_float(runtime)
        except (ValueError, TypeError) as e:
            raise ValueError(
                f"The runtime must be a datetime.timedelta, np.timedelta64 or float object. Got {type(runtime)}"
            ) from e
        if runtime < 0:
            raise ValueError(f"The runtime must be a non-negative timedelta or float. Got {runtime=!r}")

    return runtime


def _get_simulation_start_and_end_times(
    time_interval: TimeInterval,
    particle_release_times: np.ndarray,
    runtime: np.timedelta64 | None,
    endtime: np.datetime64 | None,
    sign_dt: Literal[-1, 1],
) -> tuple[np.datetime64, np.datetime64]:
    if runtime is not None and endtime is not None:
        raise ValueError(
            f"runtime and endtime are mutually exclusive - provide one or the other. Got {runtime=!r}, {endtime=!r}"
        )

    if runtime is None and time_interval is None:
        raise ValueError("The runtime must be provided when the time_interval is not defined for a fieldset.")

    if runtime is None and endtime is None:
        raise ValueError("Either runtime or endtime must be provided.")

    if sign_dt == 1:
        first_release_time = particle_release_times.min()
    else:
        first_release_time = particle_release_times.max()

    if (time_interval is not None) and (endtime is not None):
        if type(endtime) != type(time_interval.left):  # noqa: E721
            raise ValueError(
                f"The endtime must be of the same type as the fieldset.time_interval start time. Got {endtime=!r} with {time_interval=!r}"
            )
        if endtime not in time_interval:
            msg = (
                f"Calculated/provided end time of {endtime!r} is not in fieldset time interval {time_interval!r}. Either reduce your runtime, modify your "
                "provided endtime, or change your release timing."
                "Important info:\n"
                f"    First particle release: {first_release_time!r}\n"
                f"    runtime: {runtime!r}\n"
                f"    (calculated) endtime: {endtime!r}"
            )
            raise ValueError(msg)

        endtime = timedelta_to_float(endtime - time_interval.left)

    start_time = _get_start_time(first_release_time, time_interval, sign_dt, runtime)

    if isinstance(runtime, np.timedelta64):
        runtime = timedelta_to_float(runtime)

    if endtime is None:
        endtime = start_time + sign_dt * runtime

    return start_time, endtime


def _get_start_time(first_release_time, time_interval, sign_dt, runtime):
    if time_interval is None:
        time_interval = TimeInterval(np.timedelta64(0, "s"), right=np.timedelta64(int(runtime * 1e9), "ns"))

    if sign_dt == 1:
        fieldset_start = 0.0
    else:
        fieldset_start = timedelta_to_float(time_interval.right - time_interval.left)

    start_time = first_release_time if not np.isnan(first_release_time) else fieldset_start
    return start_time