libpetab-python/petab/v1/priors.py at 6b1ca4f393a4cb61065ac221a049a5ad20a7fa9b · PEtab-dev/libpetab-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
"""Functions related to prior handling."""

from __future__ import annotations

import copy
from typing import Literal

import numpy as np
import pandas as pd

from ..v1.C import PREEQUILIBRATION_CONDITION_ID
from . import (
    ESTIMATE,
    LAPLACE,
    LIN,
    LOG,
    LOG10,
    LOG_LAPLACE,
    LOG_NORMAL,
    MEASUREMENT,
    NOISE_DISTRIBUTION,
    NOISE_FORMULA,
    NOISE_PARAMETERS,
    NORMAL,
    OBJECTIVE_PRIOR_PARAMETERS,
    OBJECTIVE_PRIOR_TYPE,
    OBSERVABLE_FORMULA,
    OBSERVABLE_ID,
    OBSERVABLE_TRANSFORMATION,
    PARAMETER_SCALE,
    PARAMETER_SCALE_LAPLACE,
    PARAMETER_SCALE_NORMAL,
    PARAMETER_SEPARATOR,
    SIMULATION_CONDITION_ID,
    TIME,
    C,
    Problem,
)
from .distributions import *
from .parameters import scale, unscale

__all__ = ["priors_to_measurements"]


class Prior:
    """A PEtab parameter prior.

    Different from the general :class:`Distribution`, this class is used to
    represent the prior distribution of a PEtab parameter using the
    PEtab-specific options like `parameterScale`, `*PriorType`,
    `*PriorParameters`, and `lowerBound` / `upperBounds`.

    :param type_: The type of the distribution.
    :param transformation: The transformation to be applied to the sample.
        Ignored if `parameter_scale` is `True`.
    :param parameters: The parameters of the distribution (unaffected by
        `parameter_scale` and `transformation`, but in the case of
        `parameterScale*` distribution types, the parameters are assumed to be
        on the `parameter_scale` scale).
    :param bounds: The untransformed bounds of the sample (lower, upper).
    :param transformation: The transformation of the distribution.
    :param _bounds_truncate: **deprecated**
        Whether the generated prior will be truncated at the bounds.
        If ``True``, the probability density will be rescaled
        accordingly and the sample is generated from the truncated
        distribution.
        If ``False``, the probability density will not be rescaled
        accordingly, but the sample will be generated from the truncated
        distribution.
    """

    def __init__(
        self,
        type_: str,
        parameters: tuple,
        bounds: tuple = None,
        transformation: str = C.LIN,
        _bounds_truncate: bool = True,
    ):
        if transformation not in C.PARAMETER_SCALES:
            raise ValueError(
                f"Unknown parameter transformation: {transformation}"
            )

        if len(parameters) != 2:
            raise ValueError(
                f"Expected two parameters, got {len(parameters)}: {parameters}"
            )

        if bounds is not None and len(bounds) != 2:
            raise ValueError(
                "Expected (lowerBound, upperBound), got "
                f"{len(bounds)}: {bounds}"
            )

        self._type = type_
        self._parameters = parameters
        self._bounds = bounds
        self._transformation = transformation
        self._bounds_truncate = _bounds_truncate

        truncation = bounds
        if truncation is not None:
            # for uniform, we don't want to implement truncation and just
            #  adapt the distribution parameters
            if type_ == C.PARAMETER_SCALE_UNIFORM:
                parameters = (
                    max(parameters[0], scale(truncation[0], transformation)),
                    min(parameters[1], scale(truncation[1], transformation)),
                )
            elif type_ == C.UNIFORM:
                parameters = (
                    max(parameters[0], truncation[0]),
                    min(parameters[1], truncation[1]),
                )

        # create the underlying distribution
        match type_, transformation:
            case (C.UNIFORM, _) | (C.PARAMETER_SCALE_UNIFORM, C.LIN):
                self.distribution = Uniform(*parameters)
            case (C.NORMAL, _) | (C.PARAMETER_SCALE_NORMAL, C.LIN):
                self.distribution = Normal(*parameters, trunc=truncation)
            case (C.LAPLACE, _) | (C.PARAMETER_SCALE_LAPLACE, C.LIN):
                self.distribution = Laplace(*parameters, trunc=truncation)
            case (C.PARAMETER_SCALE_UNIFORM, C.LOG):
                self.distribution = Uniform(*parameters, log=True)
            case (C.LOG_NORMAL, _) | (C.PARAMETER_SCALE_NORMAL, C.LOG):
                self.distribution = Normal(
                    *parameters, log=True, trunc=truncation
                )
            case (C.LOG_LAPLACE, _) | (C.PARAMETER_SCALE_LAPLACE, C.LOG):
                self.distribution = Laplace(
                    *parameters, log=True, trunc=truncation
                )
            case (C.PARAMETER_SCALE_UNIFORM, C.LOG10):
                self.distribution = Uniform(*parameters, log=10)
            case (C.PARAMETER_SCALE_NORMAL, C.LOG10):
                self.distribution = Normal(
                    *parameters, log=10, trunc=truncation
                )
            case (C.PARAMETER_SCALE_LAPLACE, C.LOG10):
                self.distribution = Laplace(
                    *parameters, log=10, trunc=truncation
                )
            case _:
                raise ValueError(
                    "Unsupported distribution type / transformation: "
                    f"{type_} / {transformation}"
                )

    def __repr__(self):
        return (
            f"{self.__class__.__name__}("
            f"{self.type!r}, {self.parameters!r},"
            f" bounds={self.bounds!r}, transformation={self.transformation!r},"
            ")"
        )

    @property
    def type(self) -> str:
        return self._type

    @property
    def parameters(self) -> tuple:
        """The parameters of the distribution."""
        return self._parameters

    @property
    def bounds(self) -> tuple[float, float] | None:
        """The non-scaled bounds of the distribution."""
        return self._bounds

    @property
    def transformation(self) -> str:
        """The `parameterScale`."""
        return self._transformation

    def sample(self, shape=None, x_scaled=False) -> np.ndarray | float:
        """Sample from the distribution.

        :param shape: The shape of the sample.
        :param x_scaled: Whether the sample should be on the parameter scale.
        :return: A sample from the distribution.
        """
        raw_sample = self.distribution.sample(shape)
        if x_scaled:
            return self._scale_sample(raw_sample)
        else:
            return raw_sample

    def _scale_sample(self, sample):
        """Scale the sample to the parameter space"""
        # we also need to scale parameterScale* distributions, because
        #  internally, they are handled as (unscaled) log-distributions
        return scale(sample, self.transformation)

    @property
    def lb_scaled(self) -> float:
        """The lower bound on the parameter scale."""
        return scale(self.bounds[0], self.transformation)

    @property
    def ub_scaled(self) -> float:
        """The upper bound on the parameter scale."""
        return scale(self.bounds[1], self.transformation)

    def _chain_rule_coeff(self, x) -> np.ndarray | float:
        """The chain rule coefficient for the transformation at x."""
        x = unscale(x, self.transformation)

        # scale the PDF to the parameter scale
        if self.transformation == C.LIN:
            coeff = 1
        elif self.transformation == C.LOG10:
            coeff = x * np.log(10)
        elif self.transformation == C.LOG:
            coeff = x
        else:
            raise ValueError(f"Unknown transformation: {self.transformation}")

        return coeff

    def pdf(
        self, x, x_scaled: bool = False, rescale=False
    ) -> np.ndarray | float:
        """Probability density function at x.

        This accounts for truncation, independent of the `bounds_truncate`
        parameter.

        :param x: The value at which to evaluate the PDF.
            ``x`` is assumed to be on the parameter scale.
        :param x_scaled: Whether ``x`` is on the parameter scale.
        :param rescale: Whether to rescale the PDF to integrate to 1 on the
            parameter scale. Only used if ``x_scaled`` is ``True``.
        :return: The value of the PDF at ``x``.
        """
        if x_scaled:
            coeff = self._chain_rule_coeff(x) if rescale else 1
            x = unscale(x, self.transformation)
            return self.distribution.pdf(x) * coeff

        return self.distribution.pdf(x)

    def neglogprior(
        self, x: np.array | float, x_scaled: bool = False
    ) -> np.ndarray | float:
        """Negative log-prior at x.

        :param x: The value at which to evaluate the negative log-prior.
        :param x_scaled: Whether ``x`` is on the parameter scale.
            Note that the prior is always evaluated on the non-scaled
            parameters.
        :return: The negative log-prior at ``x``.
        """
        if self._bounds_truncate:
            # the truncation is handled by the distribution
            # the prior is always evaluated on the non-scaled parameters
            return -np.log(self.pdf(x, x_scaled=x_scaled, rescale=False))

        # we want to evaluate the prior on the untruncated distribution
        if x_scaled:
            x = unscale(x, self.transformation)
        return -np.log(self.distribution._pdf_untruncated(x))

    @staticmethod
    def from_par_dict(
        d,
        type_=Literal["initialization", "objective"],
        _bounds_truncate: bool = True,
    ) -> Prior:
        """Create a distribution from a row of the parameter table.

        :param d: A dictionary representing a row of the parameter table.
        :param type_: The type of the distribution.
        :param _bounds_truncate: Whether the generated prior will be truncated
            at the bounds. **deprecated**.
        :return: A distribution object.
        """
        dist_type = C.PARAMETER_SCALE_UNIFORM
        if (_table_dist_type := d.get(f"{type_}PriorType")) and (
            isinstance(_table_dist_type, str) or not np.isnan(_table_dist_type)
        ):
            dist_type = _table_dist_type

        pscale = d.get(C.PARAMETER_SCALE, C.LIN)
        params = d.get(f"{type_}PriorParameters", None)
        if pd.isna(params) and dist_type == C.PARAMETER_SCALE_UNIFORM:
            params = (
                scale(d[C.LOWER_BOUND], pscale),
                scale(d[C.UPPER_BOUND], pscale),
            )
        else:
            params = tuple(
                map(
                    float,
                    params.split(C.PARAMETER_SEPARATOR),
                )
            )
        return Prior(
            type_=dist_type,
            parameters=params,
            bounds=(d[C.LOWER_BOUND], d[C.UPPER_BOUND]),
            transformation=pscale,
            _bounds_truncate=_bounds_truncate,
        )


def priors_to_measurements(problem: Problem):
    """Convert priors to measurements.

    Reformulate the given problem such that the objective priors are converted
    to measurements. This is done by adding a new observable
    ``prior_{parameter_id}`` for each estimated parameter that has an objective
    prior, and adding a corresponding measurement to the measurement table.
    The new measurement is the prior distribution itself. The resulting
    optimization problem will be equivalent to the original problem.
    This is meant to be used for tools that do not support priors.

    The conversion involves the probability density function (PDF) of the
    prior, the parameters (e.g., location and scale) of that prior PDF, and the
    scale and value of the estimated parameter. Currently, `uniform` priors are
    not supported by this method. This method creates observables with:

    - `observableFormula`: the parameter value on the `parameterScale`
    - `observableTransformation`: `log` for `logNormal`/`logLaplace`
      distributions, `lin` otherwise

    and measurements with:

    - `measurement`: the PDF location
    - `noiseFormula`: the PDF scale

    .. warning::

       This function does not account for the truncation of the prior by
       the bounds in the parameter table. The resulting observable will
       not be truncated, and the PDF will not be rescaled.

    Arguments
    ---------
    problem:
        The problem to be converted.

    Returns
    -------
    The new problem with the priors converted to measurements.
    """
    new_problem = copy.deepcopy(problem)

    # we only need to consider parameters that are estimated
    par_df_tmp = problem.parameter_df.loc[problem.parameter_df[ESTIMATE] == 1]

    if (
        OBJECTIVE_PRIOR_TYPE not in par_df_tmp
        or par_df_tmp.get(OBJECTIVE_PRIOR_TYPE).isna().all()
        or OBJECTIVE_PRIOR_PARAMETERS not in par_df_tmp
        or par_df_tmp.get(OBJECTIVE_PRIOR_PARAMETERS).isna().all()
    ):
        # nothing to do
        return new_problem

    def scaled_observable_formula(parameter_id, parameter_scale):
        # The location parameter of the prior
        if parameter_scale == LIN:
            return parameter_id
        if parameter_scale == LOG:
            return f"ln({parameter_id})"
        if parameter_scale == LOG10:
            return f"log10({parameter_id})"
        raise ValueError(f"Unknown parameter scale {parameter_scale}.")

    new_measurement_dicts = []
    new_observable_dicts = []
    for _, row in par_df_tmp.iterrows():
        prior_type = row[OBJECTIVE_PRIOR_TYPE]
        parameter_scale = row.get(PARAMETER_SCALE, LIN)
        if pd.isna(prior_type):
            if not pd.isna(row[OBJECTIVE_PRIOR_PARAMETERS]):
                raise AssertionError(
                    "Objective prior parameters are set, but prior type is "
                    "not specified."
                )
            continue

        if "uniform" in prior_type.lower():
            # for measurements, "uniform" is not supported yet
            #  if necessary, this could still be implemented by adding another
            #  observable/measurement that will produce a constant objective
            #  offset
            raise NotImplementedError("Uniform priors are not supported.")

        if prior_type not in (C.NORMAL, C.LAPLACE):
            # we can't (easily) handle parameterScale* priors or log*-priors
            raise NotImplementedError(
                f"Objective prior type {prior_type} is not implemented."
            )

        parameter_id = row.name
        prior_parameters = tuple(
            map(
                float,
                row[OBJECTIVE_PRIOR_PARAMETERS].split(PARAMETER_SEPARATOR),
            )
        )
        if len(prior_parameters) != 2:
            raise AssertionError(
                "Expected two objective prior parameters for parameter "
                f"{parameter_id}, but got {prior_parameters}."
            )

        # create new observable
        new_obs_id = f"prior_{parameter_id}"
        if new_obs_id in new_problem.observable_df.index:
            raise ValueError(
                f"Observable ID {new_obs_id}, which is to be "
                "created, already exists."
            )
        new_observable = {
            OBSERVABLE_ID: new_obs_id,
            OBSERVABLE_FORMULA: scaled_observable_formula(
                parameter_id,
                parameter_scale
                if prior_type in C.PARAMETER_SCALE_PRIOR_TYPES
                else LIN,
            ),
            NOISE_FORMULA: f"noiseParameter1_{new_obs_id}",
        }
        if prior_type in (LOG_NORMAL, LOG_LAPLACE):
            new_observable[OBSERVABLE_TRANSFORMATION] = LOG
        elif OBSERVABLE_TRANSFORMATION in new_problem.observable_df:
            # only set default if the column is already present
            new_observable[OBSERVABLE_TRANSFORMATION] = LIN
        # type of the underlying distribution
        if prior_type in (NORMAL, PARAMETER_SCALE_NORMAL, LOG_NORMAL):
            new_observable[NOISE_DISTRIBUTION] = NORMAL
        elif prior_type in (LAPLACE, PARAMETER_SCALE_LAPLACE, LOG_LAPLACE):
            new_observable[NOISE_DISTRIBUTION] = LAPLACE
        else:
            # we can't (easily) handle uniform priors in PEtab v1
            raise NotImplementedError(
                f"Objective prior type {prior_type} is not implemented."
            )

        new_observable_dicts.append(new_observable)

        # add measurement
        # we could just use any condition and time point since the parameter
        # value is constant. however, using an existing timepoint and
        # (preequilibrationConditionId+)simulationConditionId will avoid
        # requiring extra simulations and solver stops in tools that do not
        # check for time dependency of the observable. we use the first
        # condition/timepoint from the measurement table
        new_measurement = {
            OBSERVABLE_ID: new_obs_id,
            TIME: problem.measurement_df[TIME].iloc[0],
            MEASUREMENT: prior_parameters[0],
            NOISE_PARAMETERS: prior_parameters[1],
            SIMULATION_CONDITION_ID: new_problem.measurement_df[
                SIMULATION_CONDITION_ID
            ].iloc[0],
        }
        if PREEQUILIBRATION_CONDITION_ID in new_problem.measurement_df:
            new_measurement[PREEQUILIBRATION_CONDITION_ID] = (
                new_problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].iloc[
                    0
                ]
            )
        new_measurement_dicts.append(new_measurement)

        # remove prior from parameter table
        new_problem.parameter_df.loc[parameter_id, OBJECTIVE_PRIOR_TYPE] = (
            np.nan
        )
        new_problem.parameter_df.loc[
            parameter_id, OBJECTIVE_PRIOR_PARAMETERS
        ] = np.nan

    new_problem.observable_df = pd.concat(
        [
            new_problem.observable_df,
            pd.DataFrame(new_observable_dicts).set_index(OBSERVABLE_ID),
        ]
    )
    new_problem.measurement_df = pd.concat(
        [
            new_problem.measurement_df,
            pd.DataFrame(new_measurement_dicts),
        ],
        ignore_index=True,
    )
    return new_problem