Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
added:
- Add voluntary tax filer variable and filer count calibration targets by AGI band.
11 changes: 11 additions & 0 deletions policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,17 @@ def add_takeup(self):
imputed_risk = rng.random(n_persons) < wic_risk_rate_by_person
data["is_wic_at_nutritional_risk"] = receives_wic | imputed_risk

# Voluntary tax filing: some people file even when not required and not
# seeking a refund. EITC take-up already captures refund-seeking behavior
# (if you take up EITC, you file). This variable captures people who file
# for other reasons: state requirements, documentation, habit.
# ~5% of tax units who don't take up EITC still file voluntarily.
voluntary_filing_rate = 0.05
rng = seeded_rng("would_file_taxes_voluntarily")
data["would_file_taxes_voluntarily"] = ~data["takes_up_eitc"] & (
rng.random(n_tax_units) < voluntary_filing_rate
)

self.save_dataset(data)


Expand Down
33 changes: 33 additions & 0 deletions policyengine_us_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,39 @@ def build_loss_matrix(dataset: type, time_period):
)
targets_array.append(row["eitc_total"] * eitc_spending_uprating)

# Tax filer counts by AGI band (SOI Table 1.1)
# This calibrates total filers (not just taxable returns) including
# low-AGI filers who are important for income distribution accuracy
SOI_FILER_COUNTS_2015 = {
# (agi_lower, agi_upper): total_returns
(-np.inf, 0): 2_072_066,
(0, 5_000): 10_134_703,
(5_000, 10_000): 11_398_595,
(10_000, 25_000): 23_447_927,
(25_000, 50_000): 23_727_745,
(50_000, 100_000): 32_801_908,
(100_000, np.inf): 25_120_985,
}

# Get AGI and filer status at tax unit level, mapped to household
agi_tu = sim.calculate("adjusted_gross_income").values
is_filer_tu = sim.calculate("tax_unit_is_filer").values > 0

for (
agi_lower,
agi_upper,
), filer_count_2015 in SOI_FILER_COUNTS_2015.items():
in_band = (agi_tu >= agi_lower) & (agi_tu < agi_upper)
label = f"nation/soi/filer_count/agi_{fmt(agi_lower)}_{fmt(agi_upper)}"
loss_matrix[label] = sim.map_result(
(is_filer_tu & in_band).astype(float),
"tax_unit",
"household",
)
# Uprate from 2015 to current year using population growth
uprated_target = filer_count_2015 * population_uprating
targets_array.append(uprated_target)

# Hard-coded totals
for variable_name, target in HARD_CODED_TOTALS.items():
label = f"nation/census/{variable_name}"
Expand Down