From 9668ea6c516360ec436c0b3841c9362f01818782 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 25 Jan 2026 20:18:37 -0500 Subject: [PATCH 1/3] Derive partnership_se_income from PUF instead of looking for missing columns The raw IRS PUF doesn't contain k1bx14p/k1bx14s columns - these are derived by PSLmodels/taxdata from the total SE income (E30400/E30500) minus Schedule C (E00900) and Schedule F (E02100) income. This fix implements the same derivation logic from taxdata's finalprep.py split_earnings_variables function. The formula is: partnership_se = (E30400 + E30500) - E00900 - E02100 This ensures partnership_se_income has non-zero values in the PUF-based datasets, enabling accurate SE tax calculations for general partners. Co-Authored-By: Claude Opus 4.5 --- policyengine_us_data/datasets/puf/puf.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py index d00e0bdc..7cb0e128 100644 --- a/policyengine_us_data/datasets/puf/puf.py +++ b/policyengine_us_data/datasets/puf/puf.py @@ -383,11 +383,23 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: # Ignore cmbtp (estimate of AMT income not in AGI) # Partnership self-employment income from Schedule K-1 Box 14 - # This is the portion of partnership income subject to SE tax (general partners only) - # k1bx14p = taxpayer, k1bx14s = spouse - k1bx14p = puf["k1bx14p"] if "k1bx14p" in puf.columns else 0 - k1bx14s = puf["k1bx14s"] if "k1bx14s" in puf.columns else 0 - puf["partnership_se_income"] = k1bx14p + k1bx14s + # This is the portion of partnership income subject to SE tax (general partners) + # Derived from total SE income minus Schedule C and Schedule F income + # Based on PSLmodels/taxdata finalprep.py split_earnings_variables logic: + # E30400 = taxpayer's total SE taxable income (Sch C + Sch F + K-1 box 14) + # E30500 = spouse's total SE taxable income + # E00900 = Schedule C net profit/loss + # E02100 = Schedule F farm income + # Formula: k1bx14 = E30400 + E30500 - E00900 - E02100 + total_se_income = puf["E30400"].fillna(0) + puf["E30500"].fillna(0) + schedule_c_f_income = puf["E00900"].fillna(0) + puf["E02100"].fillna(0) + # Partnership SE is residual; can be negative (losses) unless both components <= 0 + partnership_se = np.where( + np.logical_and(schedule_c_f_income <= 0, total_se_income <= 0), + 0.0, + total_se_income - schedule_c_f_income, + ) + puf["partnership_se_income"] = partnership_se # --- Qualified Business Income Deduction (QBID) simulation --- w2, ubia = simulate_w2_and_ubia_from_puf(puf, seed=42) From 0fc7f6695c7ad22c96b4ebcc29e5b618cdf9a9a5 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 25 Jan 2026 20:25:03 -0500 Subject: [PATCH 2/3] Use Yale Budget Lab's gross-up approach for partnership_se_income The E30400/E30500 PUF columns are already TAXABLE SE income (post-0.9235 deduction factor). Since PolicyEngine applies the 0.9235 factor itself in taxable_self_employment_income, we need to provide GROSS partnership SE income. Changes: - Gross up E30400+E30500 by dividing by 0.9235 before subtracting Sch C/F - Only compute when partnership activity exists (E25940+E25980-E25920-E25960 != 0) This aligns with Yale Budget Lab's Tax-Data approach in process_puf.R: part_se = if_else(E25940 + E25980 - E25920 - E25960 != 0, (E30400 + E30500) / 0.9235 - E00900 - E02100, 0) Weighted sum increases from $12.7B to $55.7B, which is more realistic given total SE income of ~$400B. Co-Authored-By: Claude Opus 4.5 --- policyengine_us_data/datasets/puf/puf.py | 30 +++++++++++++++--------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py index 7cb0e128..b3290fe9 100644 --- a/policyengine_us_data/datasets/puf/puf.py +++ b/policyengine_us_data/datasets/puf/puf.py @@ -385,19 +385,27 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: # Partnership self-employment income from Schedule K-1 Box 14 # This is the portion of partnership income subject to SE tax (general partners) # Derived from total SE income minus Schedule C and Schedule F income - # Based on PSLmodels/taxdata finalprep.py split_earnings_variables logic: - # E30400 = taxpayer's total SE taxable income (Sch C + Sch F + K-1 box 14) - # E30500 = spouse's total SE taxable income - # E00900 = Schedule C net profit/loss - # E02100 = Schedule F farm income - # Formula: k1bx14 = E30400 + E30500 - E00900 - E02100 - total_se_income = puf["E30400"].fillna(0) + puf["E30500"].fillna(0) + # Based on Yale Budget Lab's Tax-Data process_puf.R approach: + # E30400 = taxpayer's TAXABLE SE income (already * 0.9235) + # E30500 = spouse's TAXABLE SE income (already * 0.9235) + # E00900 = Schedule C net profit/loss (gross) + # E02100 = Schedule F farm income (gross) + # Since E30400/E30500 are post-deduction (taxable), we gross them up + # by dividing by 0.9235 before subtracting Sch C/F. + # PolicyEngine applies the 0.9235 factor itself in taxable_self_employment_income. + SE_DEDUCTION_FACTOR = 0.9235 # 1 - 0.5 * 0.153 (half of SE tax rate) + taxable_se = puf["E30400"].fillna(0) + puf["E30500"].fillna(0) + gross_se = taxable_se / SE_DEDUCTION_FACTOR schedule_c_f_income = puf["E00900"].fillna(0) + puf["E02100"].fillna(0) - # Partnership SE is residual; can be negative (losses) unless both components <= 0 + # Only compute when there's partnership activity (net partnership income != 0) + has_partnership = ( + puf["E25940"].fillna(0) + + puf["E25980"].fillna(0) + - puf["E25920"].fillna(0) + - puf["E25960"].fillna(0) + ) != 0 partnership_se = np.where( - np.logical_and(schedule_c_f_income <= 0, total_se_income <= 0), - 0.0, - total_se_income - schedule_c_f_income, + has_partnership, gross_se - schedule_c_f_income, 0 ) puf["partnership_se_income"] = partnership_se From 1be33eb3ce9f8de4a0daf3f03d44f51969c13d94 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 25 Jan 2026 20:26:28 -0500 Subject: [PATCH 3/3] Add changelog entry --- changelog_entry.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..f9cf85bf 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Derive partnership_se_income from PUF source columns using Yale Budget Lab's gross-up approach instead of looking for non-existent k1bx14 columns.