From 320dfc044fc60ce37b73594cb90d4438164a6b25 Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Thu, 26 Mar 2026 11:06:18 -0400
Subject: [PATCH 1/3] Add tax_unit_itemizes constraint for itemized deduction
 targets

SOI targets for SALT, real estate taxes, and medical expense deduction
are reported only for the ~10% of filers who itemize, but the existing
`variable > 0` constraint captures everyone with economic exposure
(~80-90% of filers). This mismatch causes massive count and dollar
overestimates. Adding `tax_unit_itemizes == 1` fixes the population
alignment.

Changes:
- etl_irs_soi.py: For salt, real_estate_taxes, and
  medical_expense_deduction, append a `tax_unit_itemizes == 1`
  constraint to child strata in the generic target loop.
- etl_national_targets.py: Split JCT itemized deduction targets
  (salt_deduction, medical_expense_deduction, charitable_deduction,
  interest_deduction) into a separate itemizer_targets list loaded
  into a new "United States - Itemizing Tax Filers" stratum with
  both filer and itemizer constraints. QBI deduction remains in the
  plain filer stratum (above-the-line).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 policyengine_us_data/db/etl_irs_soi.py        |  16 ++-
 .../db/etl_national_targets.py                | 108 +++++++++++++++---
 2 files changed, 110 insertions(+), 14 deletions(-)

diff --git a/policyengine_us_data/db/etl_irs_soi.py b/policyengine_us_data/db/etl_irs_soi.py
index f6bda07bc..33f08cef0 100644
--- a/policyengine_us_data/db/etl_irs_soi.py
+++ b/policyengine_us_data/db/etl_irs_soi.py
@@ -29,6 +29,7 @@
 
 logger = logging.getLogger(__name__)
 
+ITEMIZED_DEDUCTION_VARIABLES = {"salt", "real_estate_taxes", "medical_expense_deduction"}
 
 # IRS SOI data is typically available ~2 years after the tax year
 IRS_SOI_LAG_YEARS = 2
@@ -661,7 +662,11 @@ def load_soi_data(long_dfs, year):
 
             # Create child stratum with constraint for this IRS variable
             # Note: This stratum will have the constraint that amount_variable > 0
-            note = f"{geo_description} filers with {amount_variable_name} > 0"
+            is_itemized = amount_variable_name in ITEMIZED_DEDUCTION_VARIABLES
+            if is_itemized:
+                note = f"{geo_description} itemizing filers with {amount_variable_name} > 0"
+            else:
+                note = f"{geo_description} filers with {amount_variable_name} > 0"
 
             # Check if child stratum already exists
             existing_stratum = (
@@ -698,6 +703,15 @@ def load_soi_data(long_dfs, year):
                     ]
                 )
 
+                if is_itemized:
+                    child_stratum.constraints_rel.append(
+                        StratumConstraint(
+                            constraint_variable="tax_unit_itemizes",
+                            operation="==",
+                            value="1",
+                        )
+                    )
+
                 # Add geographic constraints if applicable
                 if geo_info["type"] == "state":
                     child_stratum.constraints_rel.append(
diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py
index 0e87aa84a..a5e208687 100644
--- a/policyengine_us_data/db/etl_national_targets.py
+++ b/policyengine_us_data/db/etl_national_targets.py
@@ -57,6 +57,17 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET):
 
     # Separate tax-related targets that need filer constraint
     tax_filer_targets = [
+        {
+            "variable": "qualified_business_income_deduction",
+            "value": 63.1e9,
+            "source": "Joint Committee on Taxation",
+            "notes": "QBI deduction tax expenditure",
+            "year": HARDCODED_YEAR,
+        },
+    ]
+
+    # Itemized deduction targets need both filer and itemizer constraints
+    itemizer_targets = [
         {
             "variable": "salt_deduction",
             "value": 21.247e9,
@@ -85,13 +96,6 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET):
             "notes": "Mortgage interest deduction tax expenditure",
             "year": HARDCODED_YEAR,
         },
-        {
-            "variable": "qualified_business_income_deduction",
-            "value": 63.1e9,
-            "source": "Joint Committee on Taxation",
-            "notes": "QBI deduction tax expenditure",
-            "year": HARDCODED_YEAR,
-        },
     ]
 
     direct_sum_targets = [
@@ -394,6 +398,7 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET):
     return {
         "direct_sum_targets": direct_sum_targets,
         "tax_filer_targets": tax_filer_targets,
+        "itemizer_targets": itemizer_targets,
         "conditional_count_targets": conditional_count_targets,
         "cbo_targets": cbo_targets,
         "treasury_targets": treasury_targets,
@@ -413,9 +418,10 @@ def transform_national_targets(raw_targets):
     Returns
     -------
     tuple
-        (direct_targets_df, tax_filer_df, conditional_targets)
+        (direct_targets_df, tax_filer_df, itemizer_df, conditional_targets)
         - direct_targets_df: DataFrame with direct sum targets
         - tax_filer_df: DataFrame with tax-related targets needing filer constraint
+        - itemizer_df: DataFrame with itemized deduction targets needing filer + itemizer constraints
         - conditional_targets: List of conditional count targets
     """
 
@@ -444,14 +450,19 @@ def transform_national_targets(raw_targets):
     tax_filer_df = (
         pd.DataFrame(all_tax_filer_targets) if all_tax_filer_targets else pd.DataFrame()
     )
+    itemizer_df = (
+        pd.DataFrame(raw_targets["itemizer_targets"])
+        if raw_targets["itemizer_targets"]
+        else pd.DataFrame()
+    )
 
     # Conditional targets stay as list for special processing
     conditional_targets = raw_targets["conditional_count_targets"]
 
-    return direct_df, tax_filer_df, conditional_targets
+    return direct_df, tax_filer_df, itemizer_df, conditional_targets
 
 
-def load_national_targets(direct_targets_df, tax_filer_df, conditional_targets):
+def load_national_targets(direct_targets_df, tax_filer_df, itemizer_df, conditional_targets):
     """
     Load national targets into the database.
 
@@ -461,6 +472,8 @@ def load_national_targets(direct_targets_df, tax_filer_df, conditional_targets):
         DataFrame with direct sum target data
     tax_filer_df : pd.DataFrame
         DataFrame with tax-related targets needing filer constraint
+    itemizer_df : pd.DataFrame
+        DataFrame with itemized deduction targets needing filer + itemizer constraints
     conditional_targets : list
         List of conditional count targets requiring strata
     """
@@ -590,6 +603,74 @@ def load_national_targets(direct_targets_df, tax_filer_df, conditional_targets):
                     session.add(target)
                     print(f"Added filer target: {target_data['variable']}")
 
+        # Process itemized deduction targets that need filer + itemizer constraints
+        if not itemizer_df.empty:
+            national_itemizer_stratum = (
+                session.query(Stratum)
+                .filter(
+                    Stratum.parent_stratum_id == us_stratum.stratum_id,
+                    Stratum.notes == "United States - Itemizing Tax Filers",
+                )
+                .first()
+            )
+
+            if not national_itemizer_stratum:
+                national_itemizer_stratum = Stratum(
+                    parent_stratum_id=us_stratum.stratum_id,
+                    notes="United States - Itemizing Tax Filers",
+                )
+                national_itemizer_stratum.constraints_rel = [
+                    StratumConstraint(
+                        constraint_variable="tax_unit_is_filer",
+                        operation="==",
+                        value="1",
+                    ),
+                    StratumConstraint(
+                        constraint_variable="tax_unit_itemizes",
+                        operation="==",
+                        value="1",
+                    ),
+                ]
+                session.add(national_itemizer_stratum)
+                session.flush()
+                print("Created national itemizer stratum")
+
+            for _, target_data in itemizer_df.iterrows():
+                target_year = target_data["year"]
+                existing_target = (
+                    session.query(Target)
+                    .filter(
+                        Target.stratum_id == national_itemizer_stratum.stratum_id,
+                        Target.variable == target_data["variable"],
+                        Target.period == target_year,
+                    )
+                    .first()
+                )
+
+                notes_parts = []
+                if pd.notna(target_data.get("notes")):
+                    notes_parts.append(target_data["notes"])
+                notes_parts.append(f"Source: {target_data.get('source', 'Unknown')}")
+                combined_notes = " | ".join(notes_parts)
+
+                if existing_target:
+                    existing_target.value = target_data["value"]
+                    existing_target.notes = combined_notes
+                    existing_target.source = "PolicyEngine"
+                    print(f"Updated itemizer target: {target_data['variable']}")
+                else:
+                    target = Target(
+                        stratum_id=national_itemizer_stratum.stratum_id,
+                        variable=target_data["variable"],
+                        period=target_year,
+                        value=target_data["value"],
+                        active=True,
+                        source="PolicyEngine",
+                        notes=combined_notes,
+                    )
+                    session.add(target)
+                    print(f"Added itemizer target: {target_data['variable']}")
+
         # Process conditional count targets (enrollment counts)
         for cond_target in conditional_targets:
             constraint_var = cond_target["constraint_variable"]
@@ -686,11 +767,12 @@ def load_national_targets(direct_targets_df, tax_filer_df, conditional_targets):
         session.commit()
 
         total_targets = (
-            len(direct_targets_df) + len(tax_filer_df) + len(conditional_targets)
+            len(direct_targets_df) + len(tax_filer_df) + len(itemizer_df) + len(conditional_targets)
         )
         print(f"\nSuccessfully loaded {total_targets} national targets")
         print(f"  - {len(direct_targets_df)} direct sum targets")
         print(f"  - {len(tax_filer_df)} tax filer targets")
+        print(f"  - {len(itemizer_df)} itemizer targets")
         print(f"  - {len(conditional_targets)} enrollment count targets (as strata)")
 
 
@@ -706,13 +788,13 @@ def main():
 
     # Transform
     print("Transforming targets...")
-    direct_targets_df, tax_filer_df, conditional_targets = transform_national_targets(
+    direct_targets_df, tax_filer_df, itemizer_df, conditional_targets = transform_national_targets(
         raw_targets
     )
 
     # Load
     print("Loading targets into database...")
-    load_national_targets(direct_targets_df, tax_filer_df, conditional_targets)
+    load_national_targets(direct_targets_df, tax_filer_df, itemizer_df, conditional_targets)
 
     print("\nETL pipeline complete!")
 

From 089000276e4d6d7c29a65081b9a67dbfe62e452b Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Thu, 26 Mar 2026 14:35:58 -0400
Subject: [PATCH 2/3] Fix JCT tax expenditure target handling

---
 .../calibration/unified_matrix_builder.py     | 196 ++++++++++++++++--
 .../calibration/validate_staging.py           |  86 +++++++-
 .../db/create_database_tables.py              |   3 +-
 .../db/etl_national_targets.py                | 146 ++++++++-----
 .../test_unified_matrix_builder.py            |  71 +++++--
 .../tests/test_schema_views_and_lookups.py    |  28 +++
 6 files changed, 431 insertions(+), 99 deletions(-)

diff --git a/policyengine_us_data/calibration/unified_matrix_builder.py b/policyengine_us_data/calibration/unified_matrix_builder.py
index 0e7a1188f..09c121935 100644
--- a/policyengine_us_data/calibration/unified_matrix_builder.py
+++ b/policyengine_us_data/calibration/unified_matrix_builder.py
@@ -40,6 +40,59 @@
 }
 
 
+def _make_neutralize_variable_reform(variable_name: str):
+    from policyengine_core.reforms import Reform
+
+    class NeutralizeVariable(Reform):
+        def apply(self):
+            self.neutralize_variable(variable_name)
+
+    NeutralizeVariable.__name__ = f"Neutralize_{variable_name}"
+    return NeutralizeVariable
+
+
+def _compute_reform_household_values(
+    dataset_path: str,
+    time_period: int,
+    state: int,
+    n_hh: int,
+    reform_vars: list,
+    baseline_income_tax: np.ndarray,
+) -> dict:
+    """Compute repeal-based household income tax deltas for target vars."""
+    from policyengine_us import Microsimulation
+
+    reform_hh = {}
+    if not reform_vars:
+        return reform_hh
+
+    state_input = np.full(n_hh, state, dtype=np.int32)
+    for var in reform_vars:
+        try:
+            reform_sim = Microsimulation(
+                dataset=dataset_path,
+                reform=_make_neutralize_variable_reform(var),
+            )
+            reform_sim.set_input("state_fips", time_period, state_input)
+            for calc_var in get_calculated_variables(reform_sim):
+                reform_sim.delete_arrays(calc_var)
+            reform_income_tax = reform_sim.calculate(
+                "income_tax",
+                time_period,
+                map_to="household",
+            ).values.astype(np.float32)
+            reform_hh[var] = reform_income_tax - baseline_income_tax
+        except Exception as exc:
+            logger.warning(
+                "Cannot calculate tax expenditure '%s' for state %d: %s",
+                var,
+                state,
+                exc,
+            )
+
+    return reform_hh
+
+
 def _compute_single_state(
     dataset_path: str,
     time_period: int,
@@ -47,6 +100,7 @@ def _compute_single_state(
     n_hh: int,
     target_vars: list,
     constraint_vars: list,
+    reform_vars: list,
     rerandomize_takeup: bool,
     affected_targets: dict,
 ):
@@ -118,6 +172,23 @@ def _compute_single_state(
                 exc,
             )
 
+    baseline_income_tax = None
+    reform_hh = {}
+    if reform_vars:
+        baseline_income_tax = state_sim.calculate(
+            "income_tax",
+            time_period,
+            map_to="household",
+        ).values.astype(np.float32)
+        reform_hh = _compute_reform_household_values(
+            dataset_path,
+            time_period,
+            state,
+            n_hh,
+            reform_vars,
+            baseline_income_tax,
+        )
+
     if rerandomize_takeup:
         for spec in SIMPLE_TAKEUP_VARS:
             entity = spec["entity"]
@@ -177,6 +248,7 @@ def _compute_single_state(
         {
             "hh": hh,
             "person": person,
+            "reform_hh": reform_hh,
             "entity": entity_vals,
             "entity_wf_false": entity_wf_false,
         },
@@ -347,6 +419,7 @@ def _assemble_clone_values_standalone(
     person_hh_indices: np.ndarray,
     target_vars: set,
     constraint_vars: set,
+    reform_vars: set = None,
     county_values: dict = None,
     clone_counties: np.ndarray = None,
     county_dependent_vars: set = None,
@@ -409,7 +482,23 @@ def _assemble_clone_values_standalone(
             arr[mask] = state_values[int(state)]["person"][var][mask]
         person_vars[var] = arr
 
-    return hh_vars, person_vars
+    reform_hh_vars: dict = {}
+    for var in reform_vars or set():
+        if not any(
+            var in state_values[int(state)].get("reform_hh", {})
+            for state in unique_clone_states
+        ):
+            continue
+        arr = np.zeros(n_records, dtype=np.float32)
+        for state in unique_clone_states:
+            mask = state_masks[int(state)]
+            arr[mask] = state_values[int(state)].get("reform_hh", {}).get(
+                var,
+                np.zeros(mask.sum(), dtype=np.float32),
+            )
+        reform_hh_vars[var] = arr
+
+    return hh_vars, person_vars, reform_hh_vars
 
 
 def _evaluate_constraints_standalone(
@@ -452,10 +541,12 @@ def _calculate_target_values_standalone(
     non_geo_constraints: list,
     n_households: int,
     hh_vars: dict,
+    reform_hh_vars: dict,
     person_vars: dict,
     entity_rel: pd.DataFrame,
     household_ids: np.ndarray,
     variable_entity_map: dict,
+    reform_id: int = 0,
 ) -> np.ndarray:
     """Standalone target-value calculation (no class instance).
 
@@ -472,7 +563,8 @@ def _calculate_target_values_standalone(
             household_ids,
             n_households,
         )
-        vals = hh_vars.get(target_variable)
+        source_vars = reform_hh_vars if reform_id > 0 else hh_vars
+        vals = source_vars.get(target_variable)
         if vals is None:
             return np.zeros(n_households, dtype=np.float32)
         return (vals * mask).astype(np.float32)
@@ -559,8 +651,10 @@ def _process_single_clone(
     unique_constraint_vars = sd["unique_constraint_vars"]
     county_dep_targets = sd["county_dep_targets"]
     target_variables = sd["target_variables"]
+    target_reform_ids = sd["target_reform_ids"]
     target_geo_info = sd["target_geo_info"]
     non_geo_constraints_list = sd["non_geo_constraints_list"]
+    reform_vars = sd["reform_vars"]
     n_records = sd["n_records"]
     n_total = sd["n_total"]
     n_targets = sd["n_targets"]
@@ -580,12 +674,13 @@ def _process_single_clone(
     clone_counties = geo_counties[col_start:col_end]
 
     # Assemble hh/person values from precomputed state/county
-    hh_vars, person_vars = _assemble_clone_values_standalone(
+    hh_vars, person_vars, reform_hh_vars = _assemble_clone_values_standalone(
         state_values,
         clone_states,
         person_hh_indices,
         unique_variables,
         unique_constraint_vars,
+        reform_vars=reform_vars,
         county_values=county_values,
         clone_counties=clone_counties,
         county_dependent_vars=county_dep_targets,
@@ -715,6 +810,7 @@ def _process_single_clone(
 
     for row_idx in range(n_targets):
         variable = target_variables[row_idx]
+        reform_id = target_reform_ids[row_idx]
         geo_level, geo_id = target_geo_info[row_idx]
         non_geo = non_geo_constraints_list[row_idx]
 
@@ -758,6 +854,7 @@ def _process_single_clone(
                     non_geo,
                     n_records,
                     hh_vars,
+                    reform_hh_vars,
                     person_vars,
                     entity_rel,
                     household_ids,
@@ -765,7 +862,8 @@ def _process_single_clone(
                 )
             values = count_cache[vkey]
         else:
-            if variable not in hh_vars:
+            source_vars = reform_hh_vars if reform_id > 0 else hh_vars
+            if variable not in source_vars:
                 continue
             if constraint_key not in mask_cache:
                 mask_cache[constraint_key] = _evaluate_constraints_standalone(
@@ -776,7 +874,7 @@ def _process_single_clone(
                     n_records,
                 )
             mask = mask_cache[constraint_key]
-            values = hh_vars[variable] * mask
+            values = source_vars[variable] * mask
 
         vals = values[rec_indices]
         nonzero = vals != 0
@@ -857,6 +955,7 @@ def _build_state_values(
         sim,
         target_vars: set,
         constraint_vars: set,
+        reform_vars: set,
         geography,
         rerandomize_takeup: bool = True,
         workers: int = 1,
@@ -919,6 +1018,7 @@ def _build_state_values(
         # Convert sets to sorted lists for deterministic iteration
         target_vars_list = sorted(target_vars)
         constraint_vars_list = sorted(constraint_vars)
+        reform_vars_list = sorted(reform_vars)
 
         state_values = {}
 
@@ -942,6 +1042,7 @@ def _build_state_values(
                         n_hh,
                         target_vars_list,
                         constraint_vars_list,
+                        reform_vars_list,
                         rerandomize_takeup,
                         affected_targets,
                     ): st
@@ -1015,6 +1116,22 @@ def _build_state_values(
                             exc,
                         )
 
+                reform_hh = {}
+                if reform_vars_list:
+                    baseline_income_tax = state_sim.calculate(
+                        "income_tax",
+                        self.time_period,
+                        map_to="household",
+                    ).values.astype(np.float32)
+                    reform_hh = _compute_reform_household_values(
+                        self.dataset_path,
+                        self.time_period,
+                        state,
+                        n_hh,
+                        reform_vars_list,
+                        baseline_income_tax,
+                    )
+
                 if rerandomize_takeup:
                     for spec in SIMPLE_TAKEUP_VARS:
                         entity = spec["entity"]
@@ -1085,6 +1202,7 @@ def _build_state_values(
                 state_values[state] = {
                     "hh": hh,
                     "person": person,
+                    "reform_hh": reform_hh,
                     "entity": entity_vals,
                     "entity_wf_false": entity_wf_false,
                 }
@@ -1272,6 +1390,7 @@ def _assemble_clone_values(
         person_hh_indices: np.ndarray,
         target_vars: set,
         constraint_vars: set,
+        reform_vars: set = None,
         county_values: dict = None,
         clone_counties: np.ndarray = None,
         county_dependent_vars: set = None,
@@ -1296,9 +1415,11 @@ def _assemble_clone_values(
                 be looked up by county instead of state.
 
         Returns:
-            (hh_vars, person_vars) where hh_vars maps variable
-            name to household-level float32 array and person_vars
-            maps constraint variable name to person-level array.
+            (hh_vars, person_vars, reform_hh_vars) where hh_vars maps
+            baseline variables to household-level float32 arrays,
+            person_vars maps constraint variables to person-level arrays,
+            and reform_hh_vars maps repeal-based expenditure targets to
+            household-level arrays.
         """
         n_records = len(clone_states)
         n_persons = len(person_hh_indices)
@@ -1353,7 +1474,23 @@ def _assemble_clone_values(
                 arr[mask] = state_values[int(state)]["person"][var][mask]
             person_vars[var] = arr
 
-        return hh_vars, person_vars
+        reform_hh_vars = {}
+        for var in reform_vars or set():
+            if not any(
+                var in state_values[int(state)].get("reform_hh", {})
+                for state in unique_clone_states
+            ):
+                continue
+            arr = np.zeros(n_records, dtype=np.float32)
+            for state in unique_clone_states:
+                mask = state_masks[int(state)]
+                arr[mask] = state_values[int(state)].get("reform_hh", {}).get(
+                    var,
+                    np.zeros(mask.sum(), dtype=np.float32),
+                )
+            reform_hh_vars[var] = arr
+
+        return hh_vars, person_vars, reform_hh_vars
 
     # ---------------------------------------------------------------
     # Database queries
@@ -1402,14 +1539,15 @@ def _query_targets(self, target_filter: dict) -> pd.DataFrame:
 
         query = f"""
         WITH filtered_targets AS (
-            SELECT tv.target_id, tv.stratum_id, tv.variable,
+            SELECT tv.target_id, tv.stratum_id, tv.variable, tv.reform_id,
                    tv.value, tv.period, tv.geo_level,
                    tv.geographic_id, tv.domain_variable
             FROM target_overview tv
-            WHERE {where_clause}
+            WHERE tv.active = 1
+              AND ({where_clause})
         ),
         best_periods AS (
-            SELECT stratum_id, variable,
+            SELECT stratum_id, variable, reform_id,
                 CASE
                     WHEN MAX(CASE WHEN period <= :time_period
                              THEN period END) IS NOT NULL
@@ -1418,13 +1556,14 @@ def _query_targets(self, target_filter: dict) -> pd.DataFrame:
                     ELSE MIN(period)
                 END as best_period
             FROM filtered_targets
-            GROUP BY stratum_id, variable
+            GROUP BY stratum_id, variable, reform_id
         )
         SELECT ft.*
         FROM filtered_targets ft
         JOIN best_periods bp
             ON ft.stratum_id = bp.stratum_id
             AND ft.variable = bp.variable
+            AND ft.reform_id = bp.reform_id
             AND ft.period = bp.best_period
         ORDER BY ft.target_id
         """
@@ -1821,7 +1960,9 @@ def build_matrix(
 
         # 2. Sort targets by geographic level
         targets_df["_geo_level"] = targets_df["geographic_id"].apply(get_geo_level)
-        targets_df = targets_df.sort_values(["_geo_level", "variable", "geographic_id"])
+        targets_df = targets_df.sort_values(
+            ["_geo_level", "variable", "reform_id", "geographic_id"]
+        )
         targets_df = targets_df.drop(columns=["_geo_level"]).reset_index(drop=True)
 
         # 3. Build column index structures from geography
@@ -1838,6 +1979,7 @@ def build_matrix(
         target_geo_info: List[Tuple[str, str]] = []
         target_names: List[str] = []
         non_geo_constraints_list: List[List[dict]] = []
+        target_reform_ids: List[int] = []
 
         for _, row in targets_df.iterrows():
             sid = int(row["stratum_id"])
@@ -1851,12 +1993,23 @@ def build_matrix(
 
             non_geo = [c for c in constraints if c["variable"] not in _GEO_VARS]
             non_geo_constraints_list.append(non_geo)
+            reform_id = int(row.get("reform_id", 0))
+            target_reform_ids.append(reform_id)
 
             target_names.append(
-                self._make_target_name(str(row["variable"]), constraints)
+                self._make_target_name(
+                    str(row["variable"]),
+                    constraints,
+                    reform_id=reform_id,
+                )
             )
 
         unique_variables = set(targets_df["variable"].values)
+        reform_variables = {
+            str(row["variable"])
+            for _, row in targets_df.iterrows()
+            if int(row.get("reform_id", 0)) > 0
+        }
 
         # 5a. Collect unique constraint variables
         unique_constraint_vars = set()
@@ -1870,6 +2023,7 @@ def build_matrix(
             sim,
             unique_variables,
             unique_constraint_vars,
+            reform_variables,
             geography,
             rerandomize_takeup=rerandomize_takeup,
             workers=workers,
@@ -2003,8 +2157,10 @@ def build_matrix(
                 "person_hh_indices": person_hh_indices,
                 "unique_variables": unique_variables,
                 "unique_constraint_vars": unique_constraint_vars,
+                "reform_vars": reform_variables,
                 "county_dep_targets": county_dep_targets,
                 "target_variables": target_variables,
+                "target_reform_ids": target_reform_ids,
                 "target_geo_info": target_geo_info,
                 "non_geo_constraints_list": (non_geo_constraints_list),
                 "n_records": n_records,
@@ -2103,12 +2259,13 @@ def build_matrix(
                         len(np.unique(clone_states)),
                     )
 
-                hh_vars, person_vars = self._assemble_clone_values(
+                hh_vars, person_vars, reform_hh_vars = self._assemble_clone_values(
                     state_values,
                     clone_states,
                     person_hh_indices,
                     unique_variables,
                     unique_constraint_vars,
+                    reform_vars=reform_variables,
                     county_values=county_values,
                     clone_counties=clone_counties,
                     county_dependent_vars=(county_dep_targets),
@@ -2245,6 +2402,7 @@ def build_matrix(
 
                 for row_idx in range(n_targets):
                     variable = str(targets_df.iloc[row_idx]["variable"])
+                    reform_id = int(targets_df.iloc[row_idx].get("reform_id", 0))
                     geo_level, geo_id = target_geo_info[row_idx]
                     non_geo = non_geo_constraints_list[row_idx]
 
@@ -2291,6 +2449,7 @@ def build_matrix(
                                 non_geo_constraints=non_geo,
                                 n_households=n_records,
                                 hh_vars=hh_vars,
+                                reform_hh_vars=reform_hh_vars,
                                 person_vars=person_vars,
                                 entity_rel=entity_rel,
                                 household_ids=household_ids,
@@ -2298,7 +2457,8 @@ def build_matrix(
                             )
                         values = count_cache[vkey]
                     else:
-                        if variable not in hh_vars:
+                        source_vars = reform_hh_vars if reform_id > 0 else hh_vars
+                        if variable not in source_vars:
                             continue
                         if constraint_key not in mask_cache:
                             mask_cache[constraint_key] = (
@@ -2311,7 +2471,7 @@ def build_matrix(
                                 )
                             )
                         mask = mask_cache[constraint_key]
-                        values = hh_vars[variable] * mask
+                        values = source_vars[variable] * mask
 
                     vals = values[rec_indices]
                     nonzero = vals != 0
diff --git a/policyengine_us_data/calibration/validate_staging.py b/policyengine_us_data/calibration/validate_staging.py
index eb46287f4..f13f441ad 100644
--- a/policyengine_us_data/calibration/validate_staging.py
+++ b/policyengine_us_data/calibration/validate_staging.py
@@ -33,6 +33,7 @@
     UnifiedMatrixBuilder,
     _calculate_target_values_standalone,
     _GEO_VARS,
+    _make_neutralize_variable_reform,
 )
 from policyengine_us_data.calibration.calibration_utils import (
     STATE_CODES,
@@ -122,7 +123,7 @@ def _run_sanity_check(
 def _query_all_active_targets(engine, period: int) -> pd.DataFrame:
     query = """
     WITH best_periods AS (
-        SELECT stratum_id, variable,
+        SELECT stratum_id, variable, reform_id,
             CASE
                 WHEN MAX(CASE WHEN period <= :period
                          THEN period END) IS NOT NULL
@@ -132,15 +133,16 @@ def _query_all_active_targets(engine, period: int) -> pd.DataFrame:
             END as best_period
         FROM target_overview
         WHERE active = 1
-        GROUP BY stratum_id, variable
+        GROUP BY stratum_id, variable, reform_id
     )
-    SELECT tv.target_id, tv.stratum_id, tv.variable,
+    SELECT tv.target_id, tv.stratum_id, tv.variable, tv.reform_id,
            tv.value, tv.period, tv.geo_level,
            tv.geographic_id, tv.domain_variable
     FROM target_overview tv
     JOIN best_periods bp
         ON tv.stratum_id = bp.stratum_id
         AND tv.variable = bp.variable
+        AND tv.reform_id = bp.reform_id
         AND tv.period = bp.best_period
     WHERE tv.active = 1
     ORDER BY tv.target_id
@@ -268,6 +270,29 @@ def _build_entity_rel(sim) -> pd.DataFrame:
     )
 
 
+def _get_reform_household_values(
+    dataset_path: str,
+    period: int,
+    variable: str,
+    reform_hh_cache: dict,
+) -> np.ndarray:
+    if variable in reform_hh_cache:
+        return reform_hh_cache[variable]
+
+    from policyengine_us import Microsimulation
+
+    reform_sim = Microsimulation(
+        dataset=dataset_path,
+        reform=_make_neutralize_variable_reform(variable),
+    )
+    reform_hh_cache[variable] = reform_sim.calculate(
+        "income_tax",
+        map_to="household",
+        period=period,
+    ).values
+    return reform_hh_cache[variable]
+
+
 def validate_area(
     sim,
     targets_df: pd.DataFrame,
@@ -275,6 +300,7 @@ def validate_area(
     area_type: str,
     area_id: str,
     display_id: str,
+    dataset_path: str,
     period: int,
     training_mask: np.ndarray,
     variable_entity_map: dict,
@@ -291,6 +317,7 @@ def validate_area(
     ).values.astype(np.float64)
 
     hh_vars_cache = {}
+    reform_hh_cache = {}
     person_vars_cache = {}
 
     training_arr = np.asarray(training_mask, dtype=bool)
@@ -300,6 +327,7 @@ def validate_area(
     results = []
     for i, (idx, row) in enumerate(targets_df.iterrows()):
         variable = row["variable"]
+        reform_id = int(row.get("reform_id", 0))
         target_value = float(row["value"])
         stratum_id = int(row["stratum_id"])
 
@@ -336,15 +364,32 @@ def validate_area(
                 except Exception:
                     pass
 
+        if reform_id > 0 and "income_tax" not in hh_vars_cache:
+            hh_vars_cache["income_tax"] = sim.calculate(
+                "income_tax",
+                map_to="household",
+                period=period,
+            ).values
+        if reform_id > 0 and variable not in reform_hh_cache:
+            reform_income_tax = _get_reform_household_values(
+                dataset_path,
+                period,
+                variable,
+                reform_hh_cache,
+            )
+            reform_hh_cache[variable] = reform_income_tax - hh_vars_cache["income_tax"]
+
         per_hh = _calculate_target_values_standalone(
             target_variable=variable,
             non_geo_constraints=non_geo,
             n_households=n_households,
             hh_vars=hh_vars_cache,
+            reform_hh_vars=reform_hh_cache,
             person_vars=person_vars_cache,
             entity_rel=entity_rel,
             household_ids=household_ids,
             variable_entity_map=variable_entity_map,
+            reform_id=reform_id,
         )
 
         sim_value = float(np.dot(per_hh, hh_weight))
@@ -361,6 +406,7 @@ def validate_area(
         target_name = UnifiedMatrixBuilder._make_target_name(
             variable,
             constraints,
+            reform_id=reform_id,
         )
 
         sanity_check, sanity_reason = _run_sanity_check(
@@ -526,6 +572,7 @@ def _validate_single_area(
         area_type=area_type,
         area_id=area_id,
         display_id=display_id,
+        dataset_path=h5_path,
         period=period,
         training_mask=area_training,
         variable_entity_map=variable_entity_map,
@@ -580,11 +627,13 @@ def _compute_district_contributions(
     ).values.astype(np.float64)
 
     hh_vars_cache = {}
+    reform_hh_cache = {}
     person_vars_cache = {}
 
     results = []
     for i, (idx, row) in enumerate(state_targets_df.iterrows()):
         variable = row["variable"]
+        reform_id = int(row.get("reform_id", 0))
         stratum_id = int(row["stratum_id"])
 
         constraints = constraints_map.get(stratum_id, [])
@@ -615,15 +664,32 @@ def _compute_district_contributions(
                 except Exception:
                     pass
 
+        if reform_id > 0 and "income_tax" not in hh_vars_cache:
+            hh_vars_cache["income_tax"] = sim.calculate(
+                "income_tax",
+                map_to="household",
+                period=period,
+            ).values
+        if reform_id > 0 and variable not in reform_hh_cache:
+            reform_income_tax = _get_reform_household_values(
+                district_h5_path,
+                period,
+                variable,
+                reform_hh_cache,
+            )
+            reform_hh_cache[variable] = reform_income_tax - hh_vars_cache["income_tax"]
+
         per_hh = _calculate_target_values_standalone(
             target_variable=variable,
             non_geo_constraints=non_geo,
             n_households=n_households,
             hh_vars=hh_vars_cache,
+            reform_hh_vars=reform_hh_cache,
             person_vars=person_vars_cache,
             entity_rel=entity_rel,
             household_ids=household_ids,
             variable_entity_map=variable_entity_map,
+            reform_id=reform_id,
         )
 
         sim_value = float(np.dot(per_hh, hh_weight))
@@ -709,9 +775,14 @@ def _run_state_via_districts(
             row_data = state_targets.iloc[tidx]
             target_value = float(row_data["value"])
             variable = row_data["variable"]
+            reform_id = int(row_data.get("reform_id", 0))
             stratum_id = int(row_data["stratum_id"])
             constraints = constraints_map.get(stratum_id, [])
-            target_name = UnifiedMatrixBuilder._make_target_name(variable, constraints)
+            target_name = UnifiedMatrixBuilder._make_target_name(
+                variable,
+                constraints,
+                reform_id=reform_id,
+            )
 
             per_district_rows.append(
                 {
@@ -737,12 +808,17 @@ def _run_state_via_districts(
     for i in range(n_targets):
         row_data = state_targets.iloc[i]
         variable = row_data["variable"]
+        reform_id = int(row_data.get("reform_id", 0))
         target_value = float(row_data["value"])
         sim_value = float(aggregated[i])
         stratum_id = int(row_data["stratum_id"])
 
         constraints = constraints_map.get(stratum_id, [])
-        target_name = UnifiedMatrixBuilder._make_target_name(variable, constraints)
+        target_name = UnifiedMatrixBuilder._make_target_name(
+            variable,
+            constraints,
+            reform_id=reform_id,
+        )
 
         error = sim_value - target_value
         abs_error = abs(error)
diff --git a/policyengine_us_data/db/create_database_tables.py b/policyengine_us_data/db/create_database_tables.py
index 4999a6f7f..86121f1d7 100644
--- a/policyengine_us_data/db/create_database_tables.py
+++ b/policyengine_us_data/db/create_database_tables.py
@@ -309,6 +309,7 @@ def validate_parent_child_constraints(mapper, connection, target: Stratum):
     t.target_id,
     t.stratum_id,
     t.variable,
+    t.reform_id,
     t.value,
     t.period,
     t.active,
@@ -348,7 +349,7 @@ def validate_parent_child_constraints(mapper, connection, target: Stratum):
 FROM targets t
 LEFT JOIN stratum_constraints sc ON t.stratum_id = sc.stratum_id
 GROUP BY t.target_id, t.stratum_id, t.variable,
-         t.value, t.period, t.active;
+         t.reform_id, t.value, t.period, t.active;
 """
 
 
diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py
index a5e208687..12ec523bb 100644
--- a/policyengine_us_data/db/etl_national_targets.py
+++ b/policyengine_us_data/db/etl_national_targets.py
@@ -14,6 +14,8 @@
     etl_argparser,
 )
 
+TAX_EXPENDITURE_REFORM_ID = 1
+
 
 def extract_national_targets(dataset: str = DEFAULT_DATASET):
     """
@@ -31,6 +33,7 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET):
         Dictionary containing:
         - direct_sum_targets: Variables that can be summed directly
         - tax_filer_targets: Tax-related variables requiring filer constraint
+        - tax_expenditure_targets: Variables targeted via repeal-based tax expenditures
         - conditional_count_targets: Enrollment counts requiring constraints
         - cbo_targets: List of CBO projection targets
         - treasury_targets: List of Treasury/JCT targets
@@ -56,18 +59,12 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET):
         )
 
     # Separate tax-related targets that need filer constraint
-    tax_filer_targets = [
-        {
-            "variable": "qualified_business_income_deduction",
-            "value": 63.1e9,
-            "source": "Joint Committee on Taxation",
-            "notes": "QBI deduction tax expenditure",
-            "year": HARDCODED_YEAR,
-        },
-    ]
+    tax_filer_targets = []
 
-    # Itemized deduction targets need both filer and itemizer constraints
-    itemizer_targets = [
+    # These JCT values are tax expenditures, not baseline deduction totals.
+    # They must be matched against repeal-based income tax deltas in the
+    # unified calibration path.
+    tax_expenditure_targets = [
         {
             "variable": "salt_deduction",
             "value": 21.247e9,
@@ -96,6 +93,13 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET):
             "notes": "Mortgage interest deduction tax expenditure",
             "year": HARDCODED_YEAR,
         },
+        {
+            "variable": "qualified_business_income_deduction",
+            "value": 63.1e9,
+            "source": "Joint Committee on Taxation",
+            "notes": "QBI deduction tax expenditure",
+            "year": HARDCODED_YEAR,
+        },
     ]
 
     direct_sum_targets = [
@@ -398,7 +402,7 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET):
     return {
         "direct_sum_targets": direct_sum_targets,
         "tax_filer_targets": tax_filer_targets,
-        "itemizer_targets": itemizer_targets,
+        "tax_expenditure_targets": tax_expenditure_targets,
         "conditional_count_targets": conditional_count_targets,
         "cbo_targets": cbo_targets,
         "treasury_targets": treasury_targets,
@@ -418,10 +422,10 @@ def transform_national_targets(raw_targets):
     Returns
     -------
     tuple
-        (direct_targets_df, tax_filer_df, itemizer_df, conditional_targets)
+        (direct_targets_df, tax_filer_df, tax_expenditure_df, conditional_targets)
         - direct_targets_df: DataFrame with direct sum targets
         - tax_filer_df: DataFrame with tax-related targets needing filer constraint
-        - itemizer_df: DataFrame with itemized deduction targets needing filer + itemizer constraints
+        - tax_expenditure_df: DataFrame with reform-based tax expenditure targets
         - conditional_targets: List of conditional count targets
     """
 
@@ -450,19 +454,24 @@ def transform_national_targets(raw_targets):
     tax_filer_df = (
         pd.DataFrame(all_tax_filer_targets) if all_tax_filer_targets else pd.DataFrame()
     )
-    itemizer_df = (
-        pd.DataFrame(raw_targets["itemizer_targets"])
-        if raw_targets["itemizer_targets"]
+    tax_expenditure_df = (
+        pd.DataFrame(raw_targets["tax_expenditure_targets"])
+        if raw_targets["tax_expenditure_targets"]
         else pd.DataFrame()
     )
 
     # Conditional targets stay as list for special processing
     conditional_targets = raw_targets["conditional_count_targets"]
 
-    return direct_df, tax_filer_df, itemizer_df, conditional_targets
+    return direct_df, tax_filer_df, tax_expenditure_df, conditional_targets
 
 
-def load_national_targets(direct_targets_df, tax_filer_df, itemizer_df, conditional_targets):
+def load_national_targets(
+    direct_targets_df,
+    tax_filer_df,
+    tax_expenditure_df,
+    conditional_targets,
+):
     """
     Load national targets into the database.
 
@@ -472,8 +481,8 @@ def load_national_targets(direct_targets_df, tax_filer_df, itemizer_df, conditio
         DataFrame with direct sum target data
     tax_filer_df : pd.DataFrame
         DataFrame with tax-related targets needing filer constraint
-    itemizer_df : pd.DataFrame
-        DataFrame with itemized deduction targets needing filer + itemizer constraints
+    tax_expenditure_df : pd.DataFrame
+        DataFrame with reform-based tax expenditure targets
     conditional_targets : list
         List of conditional count targets requiring strata
     """
@@ -603,46 +612,49 @@ def load_national_targets(direct_targets_df, tax_filer_df, itemizer_df, conditio
                     session.add(target)
                     print(f"Added filer target: {target_data['variable']}")
 
-        # Process itemized deduction targets that need filer + itemizer constraints
-        if not itemizer_df.empty:
-            national_itemizer_stratum = (
+        # Process reform-based tax expenditure targets.
+        if not tax_expenditure_df.empty:
+            migrated_strata = (
                 session.query(Stratum)
                 .filter(
                     Stratum.parent_stratum_id == us_stratum.stratum_id,
-                    Stratum.notes == "United States - Itemizing Tax Filers",
+                    Stratum.notes.in_(
+                        [
+                            "United States - Tax Filers",
+                            "United States - Itemizing Tax Filers",
+                        ]
+                    ),
                 )
-                .first()
+                .all()
             )
+            migrated_stratum_ids = [s.stratum_id for s in migrated_strata]
 
-            if not national_itemizer_stratum:
-                national_itemizer_stratum = Stratum(
-                    parent_stratum_id=us_stratum.stratum_id,
-                    notes="United States - Itemizing Tax Filers",
-                )
-                national_itemizer_stratum.constraints_rel = [
-                    StratumConstraint(
-                        constraint_variable="tax_unit_is_filer",
-                        operation="==",
-                        value="1",
-                    ),
-                    StratumConstraint(
-                        constraint_variable="tax_unit_itemizes",
-                        operation="==",
-                        value="1",
-                    ),
-                ]
-                session.add(national_itemizer_stratum)
-                session.flush()
-                print("Created national itemizer stratum")
-
-            for _, target_data in itemizer_df.iterrows():
+            for _, target_data in tax_expenditure_df.iterrows():
                 target_year = target_data["year"]
+
+                # Clean up incorrectly scoped baseline rows from older DBs.
+                if migrated_stratum_ids:
+                    stale_targets = (
+                        session.query(Target)
+                        .filter(
+                            Target.stratum_id.in_(migrated_stratum_ids),
+                            Target.variable == target_data["variable"],
+                            Target.period == target_year,
+                            Target.reform_id == 0,
+                            Target.active == True,
+                        )
+                        .all()
+                    )
+                    for stale_target in stale_targets:
+                        stale_target.active = False
+
                 existing_target = (
                     session.query(Target)
                     .filter(
-                        Target.stratum_id == national_itemizer_stratum.stratum_id,
+                        Target.stratum_id == us_stratum.stratum_id,
                         Target.variable == target_data["variable"],
                         Target.period == target_year,
+                        Target.reform_id == TAX_EXPENDITURE_REFORM_ID,
                     )
                     .first()
                 )
@@ -650,6 +662,9 @@ def load_national_targets(direct_targets_df, tax_filer_df, itemizer_df, conditio
                 notes_parts = []
                 if pd.notna(target_data.get("notes")):
                     notes_parts.append(target_data["notes"])
+                notes_parts.append(
+                    "Modeled as repeal-based income tax expenditure target"
+                )
                 notes_parts.append(f"Source: {target_data.get('source', 'Unknown')}")
                 combined_notes = " | ".join(notes_parts)
 
@@ -657,19 +672,25 @@ def load_national_targets(direct_targets_df, tax_filer_df, itemizer_df, conditio
                     existing_target.value = target_data["value"]
                     existing_target.notes = combined_notes
                     existing_target.source = "PolicyEngine"
-                    print(f"Updated itemizer target: {target_data['variable']}")
+                    existing_target.active = True
+                    print(
+                        f"Updated tax expenditure target: {target_data['variable']}"
+                    )
                 else:
                     target = Target(
-                        stratum_id=national_itemizer_stratum.stratum_id,
+                        stratum_id=us_stratum.stratum_id,
                         variable=target_data["variable"],
                         period=target_year,
+                        reform_id=TAX_EXPENDITURE_REFORM_ID,
                         value=target_data["value"],
                         active=True,
                         source="PolicyEngine",
                         notes=combined_notes,
                     )
                     session.add(target)
-                    print(f"Added itemizer target: {target_data['variable']}")
+                    print(
+                        f"Added tax expenditure target: {target_data['variable']}"
+                    )
 
         # Process conditional count targets (enrollment counts)
         for cond_target in conditional_targets:
@@ -767,12 +788,15 @@ def load_national_targets(direct_targets_df, tax_filer_df, itemizer_df, conditio
         session.commit()
 
         total_targets = (
-            len(direct_targets_df) + len(tax_filer_df) + len(itemizer_df) + len(conditional_targets)
+            len(direct_targets_df)
+            + len(tax_filer_df)
+            + len(tax_expenditure_df)
+            + len(conditional_targets)
         )
         print(f"\nSuccessfully loaded {total_targets} national targets")
         print(f"  - {len(direct_targets_df)} direct sum targets")
         print(f"  - {len(tax_filer_df)} tax filer targets")
-        print(f"  - {len(itemizer_df)} itemizer targets")
+        print(f"  - {len(tax_expenditure_df)} tax expenditure targets")
         print(f"  - {len(conditional_targets)} enrollment count targets (as strata)")
 
 
@@ -788,13 +812,23 @@ def main():
 
     # Transform
     print("Transforming targets...")
-    direct_targets_df, tax_filer_df, itemizer_df, conditional_targets = transform_national_targets(
+    (
+        direct_targets_df,
+        tax_filer_df,
+        tax_expenditure_df,
+        conditional_targets,
+    ) = transform_national_targets(
         raw_targets
     )
 
     # Load
     print("Loading targets into database...")
-    load_national_targets(direct_targets_df, tax_filer_df, itemizer_df, conditional_targets)
+    load_national_targets(
+        direct_targets_df,
+        tax_filer_df,
+        tax_expenditure_df,
+        conditional_targets,
+    )
 
     print("\nETL pipeline complete!")
 
diff --git a/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py b/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py
index 492719d9e..da0f49882 100644
--- a/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py
+++ b/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py
@@ -49,6 +49,7 @@ def _create_test_db(db_path):
                 "target_id INTEGER PRIMARY KEY, "
                 "stratum_id INTEGER, "
                 "variable TEXT, "
+                "reform_id INTEGER DEFAULT 0, "
                 "value REAL, "
                 "period INTEGER, "
                 "active INTEGER DEFAULT 1)"
@@ -110,33 +111,41 @@ def _insert_aca_ptc_data(engine):
             )
 
         targets = [
-            (1, 1, "aca_ptc", 10000.0, 2022),
-            (2, 1, "tax_unit_count", 500.0, 2022),
-            (3, 2, "aca_ptc", 6000.0, 2022),
-            (4, 2, "tax_unit_count", 300.0, 2022),
-            (5, 3, "aca_ptc", 4000.0, 2022),
-            (6, 3, "tax_unit_count", 200.0, 2022),
-            (7, 4, "aca_ptc", 2000.0, 2022),
-            (8, 5, "aca_ptc", 2500.0, 2022),
-            (9, 6, "aca_ptc", 1500.0, 2022),
-            (10, 4, "tax_unit_count", 100.0, 2022),
-            (11, 5, "tax_unit_count", 120.0, 2022),
-            (12, 6, "tax_unit_count", 80.0, 2022),
-            (13, 7, "aca_ptc", 2200.0, 2022),
-            (14, 8, "aca_ptc", 1800.0, 2022),
-            (15, 7, "tax_unit_count", 110.0, 2022),
-            (16, 8, "tax_unit_count", 90.0, 2022),
-            (17, 9, "person_count", 19743689.0, 2024),
+            (1, 1, "aca_ptc", 0, 10000.0, 2022, 1),
+            (2, 1, "tax_unit_count", 0, 500.0, 2022, 1),
+            (3, 2, "aca_ptc", 0, 6000.0, 2022, 1),
+            (4, 2, "tax_unit_count", 0, 300.0, 2022, 1),
+            (5, 3, "aca_ptc", 0, 4000.0, 2022, 1),
+            (6, 3, "tax_unit_count", 0, 200.0, 2022, 1),
+            (7, 4, "aca_ptc", 0, 2000.0, 2022, 1),
+            (8, 5, "aca_ptc", 0, 2500.0, 2022, 1),
+            (9, 6, "aca_ptc", 0, 1500.0, 2022, 1),
+            (10, 4, "tax_unit_count", 0, 100.0, 2022, 1),
+            (11, 5, "tax_unit_count", 0, 120.0, 2022, 1),
+            (12, 6, "tax_unit_count", 0, 80.0, 2022, 1),
+            (13, 7, "aca_ptc", 0, 2200.0, 2022, 1),
+            (14, 8, "aca_ptc", 0, 1800.0, 2022, 1),
+            (15, 7, "tax_unit_count", 0, 110.0, 2022, 1),
+            (16, 8, "tax_unit_count", 0, 90.0, 2022, 1),
+            (17, 9, "person_count", 0, 19743689.0, 2024, 1),
+            (18, 1, "aca_ptc", 1, 999.0, 2022, 1),
+            (19, 1, "aca_ptc", 0, 12345.0, 2024, 0),
         ]
-        for tid, sid, var, val, period in targets:
+        for tid, sid, var, reform_id, val, period, active in targets:
             conn.execute(
-                text("INSERT INTO targets VALUES (:tid, :sid, :var, :val, :period, 1)"),
+                text(
+                    "INSERT INTO targets "
+                    "(target_id, stratum_id, variable, reform_id, value, period, active) "
+                    "VALUES (:tid, :sid, :var, :reform_id, :val, :period, :active)"
+                ),
                 {
                     "tid": tid,
                     "sid": sid,
                     "var": var,
+                    "reform_id": reform_id,
                     "val": val,
                     "period": period,
+                    "active": active,
                 },
             )
         conn.commit()
@@ -192,6 +201,30 @@ def test_geographic_id_populated(self):
         state_ca = df[(df["geo_level"] == "state") & (df["geographic_id"] == "6")]
         self.assertGreater(len(state_ca), 0)
 
+    def test_reform_targets_preserved(self):
+        b = self._make_builder()
+        df = b._query_targets({"domain_variables": ["aca_ptc"]})
+        reform_rows = df[(df["variable"] == "aca_ptc") & (df["reform_id"] == 1)]
+        baseline_rows = df[(df["variable"] == "aca_ptc") & (df["reform_id"] == 0)]
+        self.assertEqual(len(reform_rows), 1)
+        self.assertGreater(len(baseline_rows), 0)
+
+    def test_inactive_targets_are_excluded(self):
+        b = self._make_builder(time_period=2024)
+        df = b._query_targets({"stratum_ids": [1], "variables": ["aca_ptc"]})
+        baseline_rows = df[(df["variable"] == "aca_ptc") & (df["reform_id"] == 0)]
+        self.assertEqual(len(baseline_rows), 1)
+        self.assertEqual(int(baseline_rows.iloc[0]["period"]), 2022)
+        self.assertEqual(float(baseline_rows.iloc[0]["value"]), 10000.0)
+
+    def test_target_name_adds_expenditure_suffix_for_reforms(self):
+        name = UnifiedMatrixBuilder._make_target_name(
+            "salt_deduction",
+            [],
+            reform_id=1,
+        )
+        self.assertEqual(name, "national/salt_deduction_expenditure")
+
 
 class TestHierarchicalUprating(unittest.TestCase):
     @classmethod
diff --git a/policyengine_us_data/tests/test_schema_views_and_lookups.py b/policyengine_us_data/tests/test_schema_views_and_lookups.py
index c8e5f4f8a..e4fea0f08 100644
--- a/policyengine_us_data/tests/test_schema_views_and_lookups.py
+++ b/policyengine_us_data/tests/test_schema_views_and_lookups.py
@@ -66,6 +66,7 @@ def _add_target(
     period: int,
     value: float,
     active: bool = True,
+    reform_id: int = 0,
 ) -> Target:
     """Insert a target row."""
     target = Target(
@@ -74,6 +75,7 @@ def _add_target(
         period=period,
         value=value,
         active=active,
+        reform_id=reform_id,
     )
     session.add(target)
     session.commit()
@@ -371,6 +373,32 @@ def test_active_flag_passthrough(self):
             elif r[var_idx] == "household_count":
                 self.assertFalse(bool(r[active_idx]))
 
+    def test_reform_id_passthrough(self):
+        """Reform targets retain their reform_id in target_overview."""
+        with Session(self.engine) as session:
+            _add_target(
+                session,
+                self.national_id,
+                "salt_deduction",
+                2024,
+                21.247e9,
+                reform_id=1,
+            )
+
+        rows = self._query_target_overview()
+        cols = self._overview_columns()
+        sid_idx = cols.index("stratum_id")
+        var_idx = cols.index("variable")
+        reform_idx = cols.index("reform_id")
+
+        matches = [
+            r
+            for r in rows
+            if r[sid_idx] == self.national_id and r[var_idx] == "salt_deduction"
+        ]
+        self.assertEqual(len(matches), 1)
+        self.assertEqual(matches[0][reform_idx], 1)
+
     # ----------------------------------------------------------------
     # get_geographic_strata()
     # ----------------------------------------------------------------

From e75dc7c217317cd1c1510a0fb9ced71dfc116167 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Fri, 27 Mar 2026 11:15:41 -0400
Subject: [PATCH 3/3] Format files for lint

---
 .../calibration/unified_matrix_builder.py     | 20 +++++++++++++------
 policyengine_us_data/db/etl_irs_soi.py        |  6 +++++-
 .../db/etl_national_targets.py                | 12 +++--------
 3 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/policyengine_us_data/calibration/unified_matrix_builder.py b/policyengine_us_data/calibration/unified_matrix_builder.py
index 09c121935..1e1bb0055 100644
--- a/policyengine_us_data/calibration/unified_matrix_builder.py
+++ b/policyengine_us_data/calibration/unified_matrix_builder.py
@@ -492,9 +492,13 @@ def _assemble_clone_values_standalone(
         arr = np.zeros(n_records, dtype=np.float32)
         for state in unique_clone_states:
             mask = state_masks[int(state)]
-            arr[mask] = state_values[int(state)].get("reform_hh", {}).get(
-                var,
-                np.zeros(mask.sum(), dtype=np.float32),
+            arr[mask] = (
+                state_values[int(state)]
+                .get("reform_hh", {})
+                .get(
+                    var,
+                    np.zeros(mask.sum(), dtype=np.float32),
+                )
             )
         reform_hh_vars[var] = arr
 
@@ -1484,9 +1488,13 @@ def _assemble_clone_values(
             arr = np.zeros(n_records, dtype=np.float32)
             for state in unique_clone_states:
                 mask = state_masks[int(state)]
-                arr[mask] = state_values[int(state)].get("reform_hh", {}).get(
-                    var,
-                    np.zeros(mask.sum(), dtype=np.float32),
+                arr[mask] = (
+                    state_values[int(state)]
+                    .get("reform_hh", {})
+                    .get(
+                        var,
+                        np.zeros(mask.sum(), dtype=np.float32),
+                    )
                 )
             reform_hh_vars[var] = arr
 
diff --git a/policyengine_us_data/db/etl_irs_soi.py b/policyengine_us_data/db/etl_irs_soi.py
index 33f08cef0..8e9543da8 100644
--- a/policyengine_us_data/db/etl_irs_soi.py
+++ b/policyengine_us_data/db/etl_irs_soi.py
@@ -29,7 +29,11 @@
 
 logger = logging.getLogger(__name__)
 
-ITEMIZED_DEDUCTION_VARIABLES = {"salt", "real_estate_taxes", "medical_expense_deduction"}
+ITEMIZED_DEDUCTION_VARIABLES = {
+    "salt",
+    "real_estate_taxes",
+    "medical_expense_deduction",
+}
 
 # IRS SOI data is typically available ~2 years after the tax year
 IRS_SOI_LAG_YEARS = 2
diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py
index 12ec523bb..278e3a909 100644
--- a/policyengine_us_data/db/etl_national_targets.py
+++ b/policyengine_us_data/db/etl_national_targets.py
@@ -673,9 +673,7 @@ def load_national_targets(
                     existing_target.notes = combined_notes
                     existing_target.source = "PolicyEngine"
                     existing_target.active = True
-                    print(
-                        f"Updated tax expenditure target: {target_data['variable']}"
-                    )
+                    print(f"Updated tax expenditure target: {target_data['variable']}")
                 else:
                     target = Target(
                         stratum_id=us_stratum.stratum_id,
@@ -688,9 +686,7 @@ def load_national_targets(
                         notes=combined_notes,
                     )
                     session.add(target)
-                    print(
-                        f"Added tax expenditure target: {target_data['variable']}"
-                    )
+                    print(f"Added tax expenditure target: {target_data['variable']}")
 
         # Process conditional count targets (enrollment counts)
         for cond_target in conditional_targets:
@@ -817,9 +813,7 @@ def main():
         tax_filer_df,
         tax_expenditure_df,
         conditional_targets,
-    ) = transform_national_targets(
-        raw_targets
-    )
+    ) = transform_national_targets(raw_targets)
 
     # Load
     print("Loading targets into database...")