diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..cafe3f63 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + added: + - Add voluntary tax filer variable and filer count calibration targets by AGI band. diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index 84969500..558a5353 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -289,6 +289,17 @@ def add_takeup(self): imputed_risk = rng.random(n_persons) < wic_risk_rate_by_person data["is_wic_at_nutritional_risk"] = receives_wic | imputed_risk + # Voluntary tax filing: some people file even when not required and not + # seeking a refund. EITC take-up already captures refund-seeking behavior + # (if you take up EITC, you file). This variable captures people who file + # for other reasons: state requirements, documentation, habit. + # ~5% of tax units who don't take up EITC still file voluntarily. + voluntary_filing_rate = 0.05 + rng = seeded_rng("would_file_taxes_voluntarily") + data["would_file_taxes_voluntarily"] = ~data["takes_up_eitc"] & ( + rng.random(n_tax_units) < voluntary_filing_rate + ) + self.save_dataset(data) diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index f798c0dc..05caf44b 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -330,6 +330,39 @@ def build_loss_matrix(dataset: type, time_period): ) targets_array.append(row["eitc_total"] * eitc_spending_uprating) + # Tax filer counts by AGI band (SOI Table 1.1) + # This calibrates total filers (not just taxable returns) including + # low-AGI filers who are important for income distribution accuracy + SOI_FILER_COUNTS_2015 = { + # (agi_lower, agi_upper): total_returns + (-np.inf, 0): 2_072_066, + (0, 5_000): 10_134_703, + (5_000, 10_000): 11_398_595, + (10_000, 25_000): 23_447_927, + (25_000, 50_000): 23_727_745, + (50_000, 100_000): 32_801_908, + (100_000, np.inf): 25_120_985, + } + + # Get AGI and filer status at tax unit level, mapped to household + agi_tu = sim.calculate("adjusted_gross_income").values + is_filer_tu = sim.calculate("tax_unit_is_filer").values > 0 + + for ( + agi_lower, + agi_upper, + ), filer_count_2015 in SOI_FILER_COUNTS_2015.items(): + in_band = (agi_tu >= agi_lower) & (agi_tu < agi_upper) + label = f"nation/soi/filer_count/agi_{fmt(agi_lower)}_{fmt(agi_upper)}" + loss_matrix[label] = sim.map_result( + (is_filer_tu & in_band).astype(float), + "tax_unit", + "household", + ) + # Uprate from 2015 to current year using population growth + uprated_target = filer_count_2015 * population_uprating + targets_array.append(uprated_target) + # Hard-coded totals for variable_name, target in HARD_CODED_TOTALS.items(): label = f"nation/census/{variable_name}"