policyengine-core/policyengine_core/commons/formulas.py at ad2e2e69bd94321643ea4d6c88d3b09126bb3efa · PolicyEngine/policyengine-core · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
import logging
from typing import Any, Callable, Dict, List, Sequence, Type, TypeVar, Union
from warnings import warn

import numpy
import numpy as np
import pandas as pd
from numpy import maximum as max_
from numpy import minimum as min_
from numpy import select

from policyengine_core.parameters.parameter_node import ParameterNode
from policyengine_core.periods.period_ import Period
from policyengine_core.populations.population import Population
from policyengine_core.types import ArrayLike, ArrayType
from policyengine_core.variables.variable import Variable

T = TypeVar("T")


def apply_thresholds(
    input: ArrayType[float],
    thresholds: ArrayLike[float],
    choices: ArrayLike[float],
) -> ArrayType[float]:
    """Makes a choice based on an input and thresholds.

    From a list of ``choices``, this function selects one of these values
    based on a list of inputs, depending on the value of each ``input`` within
    a list of ``thresholds``.

    Args:
        input: A list of inputs to make a choice from.
        thresholds: A list of thresholds to choose.
        choices: A list of the possible values to choose from.

    Returns:
        :obj:`numpy.ndarray` of :obj:`float`:
        A list of the values chosen.

    Raises:
        :exc:`AssertionError`: When the number of ``thresholds`` (t) and the
            number of choices (c) are not either t == c or t == c - 1.

    Examples:
        >>> input = numpy.array([4, 5, 6, 7, 8])
        >>> thresholds = [5, 7]
        >>> choices = [10, 15, 20]
        >>> apply_thresholds(input, thresholds, choices)
        array([10, 10, 15, 15, 20])

    """

    condlist: Sequence[ArrayType[bool]]
    condlist = [input <= threshold for threshold in thresholds]

    if len(condlist) == len(choices) - 1:
        # If a choice is provided for input > highest threshold, last condition
        # must be true to return it.
        condlist += [True]

    assert len(condlist) == len(choices), " ".join(
        [
            "'apply_thresholds' must be called with the same number of",
            "thresholds than choices, or one more choice.",
        ]
    )

    return numpy.select(condlist, choices)


def concat(this: ArrayLike[str], that: ArrayLike[str]) -> ArrayType[str]:
    """Concatenates the values of two arrays.

    Args:
        this: An array to concatenate.
        that: Another array to concatenate.

    Returns:
        :obj:`numpy.ndarray` of :obj:`float`:
        An array with the concatenated values.

    Examples:
        >>> this = ["this", "that"]
        >>> that = numpy.array([1, 2.5])
        >>> concat(this, that)
        array(['this1.0', 'that2.5']...)

    """
    if isinstance(this, tuple):
        raise TypeError("First argument must not be a tuple.")

    if isinstance(that, tuple):
        raise TypeError("Second argument must not be a tuple.")

    if isinstance(this, numpy.ndarray) and not numpy.issubdtype(this.dtype, numpy.str_):
        this = this.astype("str")

    if isinstance(that, numpy.ndarray) and not numpy.issubdtype(that.dtype, numpy.str_):
        that = that.astype("str")

    return numpy.char.add(this, that)


def switch(
    conditions: ArrayType[Any],
    value_by_condition: Dict[float, T],
) -> ArrayType[T]:
    """Mimicks a switch statement.

    Given an array of conditions, returns an array of the same size,
    replacing each condition item with the matching given value.

    Args:
        conditions: An array of conditions.
        value_by_condition: Values to replace for each condition.

    Returns:
        :obj:`numpy.ndarray`:
        An array with the replaced values.

    Raises:
        :exc:`AssertionError`: When ``value_by_condition`` is empty.

    Examples:
        >>> conditions = numpy.array([1, 1, 1, 2])
        >>> value_by_condition = {1: 80, 2: 90}
        >>> switch(conditions, value_by_condition)
        array([80, 80, 80, 90])

    """

    assert len(value_by_condition) > 0, (
        "'switch' must be called with at least one value."
    )

    condlist = [conditions == condition for condition in value_by_condition.keys()]

    return numpy.select(condlist, value_by_condition.values())


def for_each_variable(
    entity: Population,
    period: Period,
    variables: List[str],
    agg_func: str = "add",
    group_agg_func: str = "add",
    options: List[str] = None,
) -> ArrayLike:
    """Applies operations to lists of variables.

    Args:
        entity (Population): The entity population, as passed in formulas.
        period (Period): The period, as pass in formulas.
        variables (List[str]): A list of variable names.
        agg_func (str, optional): The operation to apply to combine variable results. Defaults to "add".
        group_agg_func (str, optional): The operation to apply to transform values to the target entity level. Defaults to "add".
        options (List[str], optional): Options to pass to the `entity(variable, period)` call. Defaults to None.

    Raises:
        ValueError: If any target variable is not at or below the target entity level.

    Returns:
        ArrayLike: The result of the operation.
    """
    result = None
    agg_func = dict(
        add=lambda x, y: x + y, multiply=lambda x, y: x * y, max=max_, min=min_
    )[agg_func]
    if not entity.entity.is_person:
        group_agg_func = dict(
            add=entity.sum, all=entity.all, max=entity.max, min=entity.min
        )[group_agg_func]
    for variable in variables:
        variable_entity = entity.entity.get_variable(variable).entity
        if variable_entity.key == entity.entity.key:
            values = entity(variable, period, options=options)
        elif variable_entity.is_person:
            values = group_agg_func(entity.members(variable, period, options=options))
        elif entity.entity.is_person:
            raise ValueError(
                f"You requested to aggregate {variable} (defined for {variable_entity.plural}) to {entity.entity.plural}, but this is not yet implemented."
            )
        else:  # Group-to-group aggregation
            variable_population = entity.simulation.populations[variable_entity.key]
            person_shares = variable_population.project(
                variable_population(variable, period)
            ) / variable_population.project(variable_population.nb_persons())
            values = entity.sum(person_shares)
        if result is None:
            result = values
        else:
            result = agg_func(result, values)
    return result


def add(
    entity: Population,
    period: Period,
    variables: List[str],
    options: List[str] = None,
):
    """Sums a list of variables.

    Args:
        entity (Population): The entity population, as passed in formulas.
        period (Period): The period, as pass in formulas.
        variables (List[str]): A list of variable names.
        options (List[str], optional): Options to pass to the `entity(variable, period)` call. Defaults to None.

    Raises:
        ValueError: If any target variable is not at or below the target entity level.

    Returns:
        ArrayLike: The result of the operation.
    """
    return for_each_variable(entity, period, variables, agg_func="add", options=options)


def and_(
    entity: Population,
    period: Period,
    variables: List[str],
    options: List[str] = None,
):
    """Performs a logical and operation on a list of variables.

    Args:
        entity (Population): The entity population, as passed in formulas.
        period (Period): The period, as pass in formulas.
        variables (List[str]): A list of variable names.
        options (List[str], optional): Options to pass to the `entity(variable, period)` call. Defaults to None.

    Raises:
        ValueError: If any target variable is not at the target entity level.

    Returns:
        ArrayLike: The result of the operation.
    """
    return for_each_variable(
        entity, period, variables, agg_func="multiply", options=options
    )


or_ = add
any_ = or_
multiply = and_

select = np.select


clip = np.clip
inf = np.inf

WEEKS_IN_YEAR = 52
MONTHS_IN_YEAR = 12


def amount_over(amount: ArrayLike, threshold: float) -> ArrayLike:
    """Calculates the amounts over a threshold.

    Args:
        amount (ArrayLike): The amount to calculate for.
        threshold_1 (float): The threshold.

    Returns:
        ArrayLike: The amounts over the threshold.
    """
    logging.debug("amount_over(x, y) is deprecated, use max_(x - y, 0) instead.")
    return max_(0, amount - threshold)


def amount_between(
    amount: ArrayLike, threshold_1: float, threshold_2: float
) -> ArrayLike:
    """Calculates the amounts between two thresholds.

    Args:
        amount (ArrayLike): The amount to calculate for.
        threshold_1 (float): The lower threshold.
        threshold_2 (float): The upper threshold.

    Returns:
        ArrayLike: The amounts between the thresholds.
    """
    return clip(amount, threshold_1, threshold_2) - threshold_1


def random(population):
    """
    Generate random values for each entity in the population.

    Args:
        population: The population object containing simulation data.

    Returns:
        np.ndarray: Array of random values for each entity.
    """
    # Initialize count of random calls if not already present
    if not hasattr(population.simulation, "count_random_calls"):
        population.simulation.count_random_calls = 0
    population.simulation.count_random_calls += 1

    # Get known periods or use default calculation period
    known_periods = population.simulation.get_holder(
        f"{population.entity.key}_id"
    ).get_known_periods()
    period = (
        known_periods[0]
        if known_periods
        else population.simulation.default_calculation_period
    )

    # Get entity IDs for the period
    entity_ids = population(f"{population.entity.key}_id", period)

    # Generate deterministic random values using vectorised hash
    seeds = np.abs(entity_ids * 100 + population.simulation.count_random_calls).astype(
        np.uint64
    )

    # PCG-style mixing function for high-quality pseudo-random generation
    x = seeds * np.uint64(0x5851F42D4C957F2D)
    x = x ^ (x >> np.uint64(33))
    x = x * np.uint64(0xC4CEB9FE1A85EC53)
    x = x ^ (x >> np.uint64(33))

    # Convert to float in [0, 1) using upper 53 bits for full double precision
    values = (x >> np.uint64(11)).astype(np.float64) / (2**53)

    return values


def is_in(values: ArrayLike, *targets: list) -> ArrayLike:
    """Returns true if the value is in the list of targets.

    Args:
        values (ArrayLike): The values to test.

    Returns:
        ArrayLike: True if the value is in the list of targets.
    """
    if (len(targets) == 1) and isinstance(targets[0], list):
        targets = targets[0]
    return np.any([values == target for target in targets], axis=0)


def between(
    values: ArrayLike, lower: float, upper: float, inclusive: str = "both"
) -> ArrayLike:
    """Returns true if values are between lower and upper.

    Args:
        values (ArrayLike): The input array.
        lower (float): The lower bound.
        upper (float): The upper bound.
        inclusive (bool, optional): Whether to include or exclude the bounds. Defaults to True.

    Returns:
        ArrayLike: The resulting array.
    """
    return pd.Series(values).between(lower, upper, inclusive=inclusive)


def uprated(by: str = None, start_year: int = 2015) -> Callable:
    """Attaches a formula applying an uprating factor to input variables (going back as far as 2015).

    Args:
        by (str, optional): The name of the parameter (under parameters.uprating). Defaults to None (no uprating applied).

    Returns:
        Callable: A class decorator.
    """

    def uprater(variable: Type[Variable]) -> type:
        if hasattr(variable, f"formula_{start_year}"):
            return variable

        formula = variable.formula if hasattr(variable, "formula") else None

        variable.metadata = {
            "uprating": by,
        }

        def formula_start_year(entity, period, parameters):
            if by is None:
                return entity(variable.__name__, period.last_year)
            else:
                current_parameter = parameters(period)
                last_year_parameter = parameters(period.last_year)
                for name in by.split("."):
                    current_parameter = getattr(current_parameter, name)
                    last_year_parameter = getattr(last_year_parameter, name)
                uprating = current_parameter / last_year_parameter
                old = entity(variable.__name__, period.last_year)
                if (formula is not None) and (all(old) == 0):
                    # If no values have been inputted, don't uprate and
                    # instead use the previous formula on the current period.
                    return formula(entity, period, parameters)
                return uprating * old

        formula_start_year.__name__ = f"formula_{start_year}"
        setattr(variable, formula_start_year.__name__, formula_start_year)
        return variable

    return uprater


def carried_over(variable: type) -> type:
    return uprated()(variable)


def sum_of_variables(variables: Union[List[str], str]) -> Callable:
    """Returns a function that sums the values of a list of variables.

    Args:
        variables (Union[List[str], str]): A list of variable names.

    Returns:
        Callable: A function that sums the values of the variables.
    """

    warn(
        "Sum-of-variables formulas are deprecated- please use `adds` or `subtracts` instead."
    )

    def sum_of_variables(entity, period, parameters):
        if isinstance(variables, str):
            # A string parameter name is passed
            node = parameters(period)
            for name in variables.split("."):
                node = getattr(node, name)
            variable_names = node
        else:
            variable_names = variables
        return add(entity, period, variable_names)

    return sum_of_variables


any_of_variables = sum_of_variables


def index_(
    into: ParameterNode,
    indices: Union[ArrayLike, List[ArrayLike]],
    where: ArrayLike,
    fill: float = 0,
) -> ArrayLike:
    """Indexes into a object, but only when a condition is true. This improves
    performance over `np.where`, which will index all values and then filter the result.

    Args:
        into (Parameter): The parameter to index into.
        indices (Union[ArrayLike, List[ArrayLike]]): The full, un-filtered index array. Can be a list of arrays
            for sequential indexing.
        where (ArrayLike): A filter for indexing.
        fill (float, optional): The value to fill where `index_where` is False. Defaults to 0.

    Returns:
        ArrayLike: The indexed result.
    """
    if where.sum() == 0:
        return np.ones(where.shape) * fill

    if isinstance(indices, list):
        result = np.empty_like(indices[0])
        intermediate_result = into
        for i in range(len(indices)):
            intermediate_result = intermediate_result[indices[i][where]]
        result[where] = intermediate_result
    else:
        result = np.empty_like(indices)
        result[where] = into[indices[where]]
    result[~where] = fill
    return result.astype(float)