From fd98af6fecfa6105c9275378b2d214d41bd8af07 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 25 Jan 2026 11:16:55 -0500 Subject: [PATCH 1/3] Add partnership_se_income from Schedule K-1 Box 14 Uses k1bx14p (taxpayer) and k1bx14s (spouse) fields from the PUF, which represent the partnership income subject to self-employment tax as reported on Schedule K-1 Box 14. This enables policyengine-us to correctly include general partner SE income in the self-employment tax base per 26 USC 1402(a). Closes #480 Co-Authored-By: Claude Opus 4.5 --- changelog_entry.yaml | 4 ++++ policyengine_us_data/datasets/puf/puf.py | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..10ee6d5ff 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + added: + - partnership_se_income variable from Schedule K-1 Box 14 (k1bx14p + k1bx14s), representing partnership income subject to self-employment tax. diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py index c90255e3f..c13313658 100644 --- a/policyengine_us_data/datasets/puf/puf.py +++ b/policyengine_us_data/datasets/puf/puf.py @@ -381,7 +381,13 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: puf["unreported_payroll_tax"] = puf.E09800 # Ignore f2441 (AMT form attached) # Ignore cmbtp (estimate of AMT income not in AGI) - # Ignore k1bx14s and k1bx14p (partner self-employment income included in partnership and S-corp income) + + # Partnership self-employment income from Schedule K-1 Box 14 + # This is the portion of partnership income subject to SE tax (general partners only) + # k1bx14p = taxpayer, k1bx14s = spouse + k1bx14p = puf["k1bx14p"] if "k1bx14p" in puf.columns else 0 + k1bx14s = puf["k1bx14s"] if "k1bx14s" in puf.columns else 0 + puf["partnership_se_income"] = k1bx14p + k1bx14s # --- Qualified Business Income Deduction (QBID) simulation --- w2, ubia = simulate_w2_and_ubia_from_puf(puf, seed=42) @@ -491,6 +497,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: "business_is_sstb", "deductible_mortgage_interest", "partnership_s_corp_income", + "partnership_se_income", "qualified_reit_and_ptp_income", "qualified_bdc_income", ] From 6565211c7f527a89b157f85c163d27da5d80f1d1 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 25 Jan 2026 11:19:56 -0500 Subject: [PATCH 2/3] Update uv.lock --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index 23c7d36b8..52082274a 100644 --- a/uv.lock +++ b/uv.lock @@ -1842,7 +1842,7 @@ wheels = [ [[package]] name = "policyengine-us-data" -version = "1.53.0" +version = "1.53.1" source = { editable = "." } dependencies = [ { name = "google-auth" }, From 064ceb1bb14de7295f0e4121b1a0038efa17da7a Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 25 Jan 2026 11:36:02 -0500 Subject: [PATCH 3/3] Fix forward compatibility for new variables Filter FINANCIAL_SUBSET to only include variables that exist in policyengine-us. This allows us-data and policyengine-us PRs to be merged in any order without CI failures. Co-Authored-By: Claude Opus 4.5 --- policyengine_us_data/datasets/puf/puf.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py index c13313658..d00e0bdc6 100644 --- a/policyengine_us_data/datasets/puf/puf.py +++ b/policyengine_us_data/datasets/puf/puf.py @@ -551,6 +551,13 @@ def generate(self): for variable in system.variables } + # Filter FINANCIAL_SUBSET to only include variables defined in + # policyengine-us. This allows us-data to be updated before or after + # policyengine-us without breaking. + self.available_financial_vars = [ + v for v in FINANCIAL_SUBSET if v in self.variable_to_entity + ] + VARIABLES = [ "person_id", "tax_unit_id", @@ -570,7 +577,7 @@ def generate(self): "is_tax_unit_head", "is_tax_unit_spouse", "is_tax_unit_dependent", - ] + FINANCIAL_SUBSET + ] + self.available_financial_vars self.holder = {variable: [] for variable in VARIABLES} @@ -614,7 +621,7 @@ def generate(self): def add_tax_unit(self, row, tax_unit_id): self.holder["tax_unit_id"].append(tax_unit_id) - for key in FINANCIAL_SUBSET: + for key in self.available_financial_vars: if self.variable_to_entity[key] == "tax_unit": self.holder[key].append(row[key]) @@ -656,7 +663,7 @@ def add_filer(self, row, tax_unit_id): row["interest_deduction"] ) - for key in FINANCIAL_SUBSET: + for key in self.available_financial_vars: if key == "deductible_mortgage_interest": # Skip this one- we are adding it artificially at the filer level. continue @@ -689,7 +696,7 @@ def add_spouse(self, row, tax_unit_id): self.holder["deductible_mortgage_interest"].append(0) - for key in FINANCIAL_SUBSET: + for key in self.available_financial_vars: if key == "deductible_mortgage_interest": # Skip this one- we are adding it artificially at the filer level. continue @@ -713,7 +720,7 @@ def add_dependent(self, row, tax_unit_id, dependent_id): self.holder["deductible_mortgage_interest"].append(0) - for key in FINANCIAL_SUBSET: + for key in self.available_financial_vars: if key == "deductible_mortgage_interest": # Skip this one- we are adding it artificially at the filer level. continue