Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/structural-mortgage-interest.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Convert imputed deductible mortgage interest into structural mortgage balance, interest, and origination-year inputs when the installed `policyengine-us` supports federal MID cap modeling, while preserving total current-law interest deductions via residual investment interest inputs.
34 changes: 30 additions & 4 deletions policyengine_us_data/datasets/cps/extended_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,27 @@
import pandas as pd
from policyengine_core.data import Dataset

from policyengine_us_data.datasets.cps.cps import * # noqa: F403
from policyengine_us_data.datasets.puf import * # noqa: F403
from policyengine_us_data.datasets.cps.cps import CPS, CPS_2024, CPS_2024_Full
from policyengine_us_data.datasets.puf import PUF, PUF_2024
from policyengine_us_data.storage import STORAGE_FOLDER
from policyengine_us_data.utils.mortgage_interest import (
STRUCTURAL_MORTGAGE_VARIABLES,
convert_mortgage_interest_to_structural_inputs,
impute_tax_unit_mortgage_balance_hints,
)
from policyengine_us_data.utils.policyengine import has_policyengine_us_variables
from policyengine_us_data.utils.retirement_limits import (
get_retirement_limits,
get_se_pension_limits,
)

logger = logging.getLogger(__name__)


def _supports_structural_mortgage_inputs() -> bool:
return has_policyengine_us_variables(*STRUCTURAL_MORTGAGE_VARIABLES)


# CPS-only variables that should be QRF-imputed for the PUF clone half
# instead of naively duplicated from the CPS donor. These are
# income-correlated variables that exist only in the CPS; demographics,
Expand Down Expand Up @@ -445,6 +456,15 @@ def generate(self):
)

new_data = self._rename_imputed_to_inputs(new_data)
if _supports_structural_mortgage_inputs():
new_data = impute_tax_unit_mortgage_balance_hints(
new_data,
self.time_period,
)
new_data = convert_mortgage_interest_to_structural_inputs(
new_data,
self.time_period,
)
new_data = self._drop_formula_variables(new_data)
self.save_dataset(new_data)

Expand Down Expand Up @@ -472,11 +492,17 @@ def _rename_imputed_to_inputs(cls, data):
# due to entity shape mismatch.
_KEEP_FORMULA_VARS = {
"person_id",
"interest_deduction",
"self_employed_pension_contribution_ald",
"self_employed_health_insurance_ald",
}

@classmethod
def _keep_formula_vars(cls):
keep = set(cls._KEEP_FORMULA_VARS)
if not _supports_structural_mortgage_inputs():
keep.add("interest_deduction")
return keep

# QRF imputes formula-level variables (e.g. taxable_pension_income)
# but we must store them under leaf input names so
# _drop_formula_variables doesn't discard them. The engine then
Expand Down Expand Up @@ -526,7 +552,7 @@ def _drop_formula_variables(cls, data):
if (hasattr(var, "formulas") and len(var.formulas) > 0)
or getattr(var, "adds", None)
or getattr(var, "subtracts", None)
} - cls._KEEP_FORMULA_VARS
} - cls._keep_formula_vars()
dropped = sorted(set(data.keys()) & formula_vars)
if dropped:
logger.info(
Expand Down
17 changes: 17 additions & 0 deletions policyengine_us_data/datasets/puf/puf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
from policyengine_us_data.datasets.puf.disaggregate_puf import (
disaggregate_aggregate_records,
)
from policyengine_us_data.utils.mortgage_interest import (
STRUCTURAL_MORTGAGE_VARIABLES,
convert_mortgage_interest_to_structural_inputs,
)
from policyengine_us_data.utils.policyengine import has_policyengine_us_variables
from policyengine_us_data.utils.uprating import (
create_policyengine_uprating_factors_table,
)
Expand Down Expand Up @@ -643,6 +648,18 @@ def generate(self):
self.holder[key] = np.array(self.holder[key]).astype(float)
assert not np.isnan(self.holder[key]).any(), f"{key} has NaNs."

holder_tp = {
variable: {self.time_period: values}
for variable, values in self.holder.items()
}
if has_policyengine_us_variables(*STRUCTURAL_MORTGAGE_VARIABLES):
holder_tp = convert_mortgage_interest_to_structural_inputs(
holder_tp,
self.time_period,
)
self.holder = {
variable: values[self.time_period] for variable, values in holder_tp.items()
}
self.save_dataset(self.holder)

def add_tax_unit(self, row, tax_unit_id):
Expand Down
Loading
Loading