Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/populate-spm-energy-subsidy.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Populate the SPM housing and energy subsidy concepts directly from CPS ASEC SPM fields, and keep Census SPM resource aggregates out of generated pipeline datasets.
6 changes: 4 additions & 2 deletions policyengine_us_data/calibration/formulaic_inputs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Formula outputs that must not be persisted as dataset leaf inputs."""
"""SPM formula/output aggregates that must not be persisted as leaf inputs."""

FORMULAIC_SPM_INPUTS_TO_DROP = frozenset(
{
Expand All @@ -9,11 +9,13 @@
"spm_unit_is_in_deep_spm_poverty",
"spm_unit_spm_threshold",
"spm_unit_geographic_adjustment",
"spm_unit_total_income_reported",
"spm_unit_net_income_reported",
}
)


def drop_formulaic_spm_inputs(variable_names: set[str]) -> None:
"""Remove SPM formula outputs from a mutable variable-name set."""
"""Remove SPM formula/output aggregates from a mutable variable-name set."""

variable_names.difference_update(FORMULAIC_SPM_INPUTS_TO_DROP)
3 changes: 3 additions & 0 deletions policyengine_us_data/datasets/cps/census_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,9 @@ class CensusCPS_2018(CensusCPS):
"PTOTVAL",
"OI_OFF",
"OI_VAL",
"ED_VAL",
"FIN_VAL",
"SRVS_VAL",
"CSP_VAL",
"PAW_VAL",
"SSI_VAL",
Expand Down
24 changes: 15 additions & 9 deletions policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,9 +443,7 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
cps["rent"][mask] = imputed_values["rent"]
# Assume zero housing assistance since
cps["pre_subsidy_rent"] = cps["rent"]
cps["housing_assistance"] = np.zeros_like(
cps["spm_unit_capped_housing_subsidy_data"]
)
cps["housing_assistance"] = np.zeros_like(cps["spm_unit_capped_housing_subsidy"])
cps["real_estate_taxes"] = np.zeros(len(cps["age"]), dtype=float)
cps["real_estate_taxes"][mask] = imputed_values["real_estate_taxes"]

Expand Down Expand Up @@ -1276,9 +1274,19 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
cps["tax_exempt_ira_distributions"] = cps["roth_ira_distributions"]
# Other income (OI_VAL) is a catch-all for all other income sources.
# The code for alimony income is 20.
cps["alimony_income"] = (person.OI_OFF == 20) * person.OI_VAL
alimony_income = person.OI_OFF == 20
cps["alimony_income"] = alimony_income * person.OI_VAL
# The code for strike benefits is 12.
cps["strike_benefits"] = (person.OI_OFF == 12) * person.OI_VAL
strike_benefits = person.OI_OFF == 12
cps["strike_benefits"] = strike_benefits * person.OI_VAL
cps["miscellaneous_income"] = np.where(
alimony_income | strike_benefits,
0,
person.OI_VAL,
)
cps["educational_assistance"] = person.ED_VAL
cps["financial_assistance"] = person.FIN_VAL
cps["survivor_benefits"] = person.SRVS_VAL
cps["child_support_received"] = person.CSP_VAL
# CPS SSI receipt anchors SSI take-up and disability alignment inside
# add_takeup; it is dropped before the dataset is saved.
Expand Down Expand Up @@ -1414,12 +1422,10 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
)
def add_spm_variables(self, cps: h5py.File, spm_unit: DataFrame) -> None:
SPM_RENAMES = dict(
spm_unit_total_income_reported="SPM_TOTVAL",
snap_reported="SPM_SNAPSUB",
spm_unit_capped_housing_subsidy_data="SPM_CAPHOUSESUB",
spm_unit_energy_subsidy_data="SPM_ENGVAL",
spm_unit_capped_housing_subsidy="SPM_CAPHOUSESUB",
spm_unit_energy_subsidy="SPM_ENGVAL",
spm_unit_capped_work_childcare_expenses="SPM_CAPWKCCXPNS",
spm_unit_net_income_reported="SPM_RESOURCES",
spm_unit_pre_subsidy_childcare_expenses="SPM_CHILDCAREXPNS",
)

Expand Down
22 changes: 2 additions & 20 deletions policyengine_us_data/datasets/cps/enhanced_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ def compute_clone_diagnostics_summary(
person_is_puf_clone,
person_weight,
person_in_poverty,
person_reported_in_poverty,
spm_unit_is_puf_clone,
spm_unit_weight,
spm_unit_capped_work_childcare_expenses,
Expand All @@ -96,7 +95,6 @@ def compute_clone_diagnostics_summary(
person_is_puf_clone = np.asarray(person_is_puf_clone, dtype=bool)
person_weight = np.asarray(person_weight, dtype=np.float64)
person_in_poverty = np.asarray(person_in_poverty, dtype=bool)
person_reported_in_poverty = np.asarray(person_reported_in_poverty, dtype=bool)
spm_unit_is_puf_clone = np.asarray(spm_unit_is_puf_clone, dtype=bool)
spm_unit_weight = np.asarray(spm_unit_weight, dtype=np.float64)
capped_childcare = np.asarray(
Expand All @@ -108,7 +106,6 @@ def compute_clone_diagnostics_summary(
spm_unit_taxes = np.asarray(spm_unit_taxes, dtype=np.float64)
spm_unit_market_income = np.asarray(spm_unit_market_income, dtype=np.float64)

poor_modeled_only = person_in_poverty & ~person_reported_in_poverty
clone_spm_weight = spm_unit_weight[spm_unit_is_puf_clone].sum()

return {
Expand All @@ -118,18 +115,10 @@ def compute_clone_diagnostics_summary(
"clone_person_weight_share_pct": _weighted_share(
person_is_puf_clone, person_weight
),
"clone_poor_modeled_only_person_weight_share_pct": _weighted_share(
person_is_puf_clone & poor_modeled_only,
"clone_poor_person_weight_share_pct": _weighted_share(
person_is_puf_clone & person_in_poverty,
person_weight,
),
"poor_modeled_only_within_clone_person_weight_share_pct": (
0.0
if person_weight[person_is_puf_clone].sum() <= 0
else _weighted_share(
poor_modeled_only[person_is_puf_clone],
person_weight[person_is_puf_clone],
)
),
"clone_childcare_exceeds_pre_subsidy_share_pct": (
0.0
if clone_spm_weight <= 0
Expand Down Expand Up @@ -269,12 +258,6 @@ def build_clone_diagnostics_for_simulation(
weight inputs back from disk.
"""

person_reported_in_poverty = _to_numpy(
sim.calculate("spm_unit_net_income_reported", period=period, map_to="person")
) < _to_numpy(
sim.calculate("spm_unit_spm_threshold", period=period, map_to="person")
)

return compute_clone_diagnostics_summary(
household_is_puf_clone=_load_saved_period_array(
dataset_path, "household_is_puf_clone", period
Expand All @@ -287,7 +270,6 @@ def build_clone_diagnostics_for_simulation(
sim.calculate("household_weight", period=period, map_to="person")
),
person_in_poverty=_to_numpy(sim.calculate("person_in_poverty", period=period)),
person_reported_in_poverty=person_reported_in_poverty,
spm_unit_is_puf_clone=_load_saved_period_array(
dataset_path, "spm_unit_is_puf_clone", period
),
Expand Down
9 changes: 5 additions & 4 deletions policyengine_us_data/datasets/cps/extended_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,15 @@ def _supports_structural_mortgage_inputs() -> bool:
"child_support_received",
"veterans_benefits",
"workers_compensation",
"educational_assistance",
"financial_assistance",
"survivor_benefits",
"disability_benefits",
"strike_benefits",
"receives_wic",
# SPM variables
"spm_unit_total_income_reported",
"spm_unit_capped_housing_subsidy_data",
"spm_unit_energy_subsidy_data",
"spm_unit_net_income_reported",
"spm_unit_capped_housing_subsidy",
"spm_unit_energy_subsidy",
"spm_unit_pre_subsidy_childcare_expenses",
# Medical expenses
"employer_sponsored_insurance_premiums",
Expand Down
6 changes: 3 additions & 3 deletions policyengine_us_data/db/etl_national_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,15 +476,15 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
"year": 2024,
},
{
"constraint_variable": "spm_unit_energy_subsidy_data",
"constraint_variable": "spm_unit_energy_subsidy",
"target_variable": "household_count",
"household_count": 5_939_605,
"source": "https://liheappm.acf.gov/sites/default/files/private/congress/profiles/2023/FY2023AllStates%28National%29Profile-508Compliant.pdf",
"notes": "LIHEAP total households served by state programs",
"year": 2023,
},
{
"constraint_variable": "spm_unit_energy_subsidy_data",
"constraint_variable": "spm_unit_energy_subsidy",
"target_variable": "household_count",
"household_count": 5_876_646,
"source": "https://liheappm.acf.gov/sites/default/files/private/congress/profiles/2024/FY2024_AllStates%28National%29_Profile.pdf",
Expand Down Expand Up @@ -903,7 +903,7 @@ def load_national_targets(
stratum_notes = "National ACA Premium Tax Credit Recipients"
constraint_operation = ">"
constraint_value = "0"
elif constraint_var == "spm_unit_energy_subsidy_data":
elif constraint_var == "spm_unit_energy_subsidy":
stratum_notes = "National LIHEAP Recipient Households"
constraint_operation = ">"
constraint_value = "0"
Expand Down
5 changes: 3 additions & 2 deletions policyengine_us_data/storage/upload_completed_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ class MicrosimulationAggregateCheck:
"free_school_meals_reported",
"reduced_price_school_meals_reported",
"spm_unit_wic_reported",
"spm_unit_total_income_reported",
"spm_unit_net_income_reported",
"spm_unit_broadband_subsidy",
"spm_unit_broadband_subsidy_reported",
"spm_unit_payroll_tax_reported",
Expand Down Expand Up @@ -185,8 +187,7 @@ class MicrosimulationAggregateCheck:
CLONE_DIAGNOSTICS_METRICS = {
"clone_household_weight_share_pct",
"clone_person_weight_share_pct",
"clone_poor_modeled_only_person_weight_share_pct",
"poor_modeled_only_within_clone_person_weight_share_pct",
"clone_poor_person_weight_share_pct",
"clone_childcare_exceeds_pre_subsidy_share_pct",
"clone_childcare_above_5000_share_pct",
"clone_taxes_exceed_market_income_share_pct",
Expand Down
4 changes: 2 additions & 2 deletions policyengine_us_data/utils/national_target_parity.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,9 +482,9 @@ def classify_national_target(
target_name,
index.match(
variable="household_count",
domain_variable="spm_unit_energy_subsidy_data",
domain_variable="spm_unit_energy_subsidy",
period=period,
constraints=[_constraint("spm_unit_energy_subsidy_data", ">", 0)],
constraints=[_constraint("spm_unit_energy_subsidy", ">", 0)],
),
reason="structured_liheap_target",
)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [
"Programming Language :: Python :: 3.14",
]
dependencies = [
"policyengine-us==1.691.12",
"policyengine-us @ git+https://github.com/PolicyEngine/policyengine-us@4588f756668f12cac43e847a73e6a1f38b0b296d",
# policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for
# PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost
# after _invalidate_all_caches) and is required by policyengine-us 1.682.1+.
Expand Down
33 changes: 2 additions & 31 deletions tests/integration/support/tiny_stage_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@
*GROUP_LEVEL_VARIABLES,
"tax_unit_count_dependents",
"tax_unit_is_joint",
"spm_unit_total_income_reported",
"spm_unit_net_income_reported",
"spm_unit_capped_housing_subsidy_data",
"spm_unit_capped_housing_subsidy",
"household_is_puf_clone",
)
)
Expand All @@ -71,8 +69,6 @@
"state_fips",
"tax_unit_count_dependents",
"tax_unit_is_joint",
"spm_unit_total_income_reported",
"spm_unit_net_income_reported",
"is_puf_clone",
)

Expand Down Expand Up @@ -238,24 +234,11 @@ def _extended_group_arrays(
household_count = len(arrays["household_id"])
puf_household_count = household_count - cps_household_count
tax_unit_count_dependents = _count_dependents_by_tax_unit(arrays)
total_income = _sum_person_values_by_group(
group_ids=arrays["spm_unit_id"],
person_group_ids=arrays["person_spm_unit_id"],
person_values=(
arrays["employment_income"].astype(np.float32)
+ arrays["self_employment_income"].astype(np.float32)
+ arrays["social_security"].astype(np.float32)
),
)

return {
"tax_unit_count_dependents": tax_unit_count_dependents,
"tax_unit_is_joint": arrays["filing_status"] == b"JOINT",
"spm_unit_total_income_reported": total_income.astype(np.float32),
"spm_unit_net_income_reported": np.round(total_income * 0.85, 2).astype(
np.float32
),
"spm_unit_capped_housing_subsidy_data": np.where(
"spm_unit_capped_housing_subsidy": np.where(
arrays["tenure_type"] == b"RENTED",
1_200,
0,
Expand All @@ -280,18 +263,6 @@ def _count_dependents_by_tax_unit(arrays: dict[str, np.ndarray]) -> np.ndarray:
)


def _sum_person_values_by_group(
*,
group_ids: np.ndarray,
person_group_ids: np.ndarray,
person_values: np.ndarray,
) -> np.ndarray:
return np.array(
[person_values[person_group_ids == group_id].sum() for group_id in group_ids],
dtype=np.float32,
)


def _resize_pattern(values: list[object], length: int, *, dtype) -> np.ndarray:
repeats = int(np.ceil(length / len(values)))
return np.resize(np.array(values * repeats, dtype=dtype), length)
Expand Down
19 changes: 17 additions & 2 deletions tests/integration/support/tiny_stage_4.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def _load_period_arrays(path: Path) -> dict[str, np.ndarray]:

def _calibrated_household_weights(arrays: dict[str, np.ndarray]) -> np.ndarray:
weights = arrays["household_weight"].astype(np.float32)
income = arrays["spm_unit_total_income_reported"].astype(np.float32)
income = _spm_unit_income_proxy(arrays)
income_rank = np.argsort(np.argsort(income)).astype(np.float32)
center = income_rank.mean()
scale = 1.0 + (income_rank - center) * 0.04
Expand Down Expand Up @@ -209,7 +209,7 @@ def _select_representative_household_ids(
arrays: dict[str, np.ndarray],
) -> np.ndarray:
household_ids = arrays["household_id"].astype(np.int64)
income = arrays["spm_unit_total_income_reported"].astype(np.float32)
income = _spm_unit_income_proxy(arrays)
ordered = household_ids[np.argsort(income)]
candidates = [ordered[0], ordered[len(ordered) // 2], ordered[-1]]

Expand All @@ -223,6 +223,21 @@ def _select_representative_household_ids(
return selected.astype(np.int64)


def _spm_unit_income_proxy(arrays: dict[str, np.ndarray]) -> np.ndarray:
person_income = (
arrays["employment_income"].astype(np.float32)
+ arrays["self_employment_income"].astype(np.float32)
+ arrays["social_security"].astype(np.float32)
)
return np.array(
[
person_income[arrays["person_spm_unit_id"] == spm_unit_id].sum()
for spm_unit_id in arrays["spm_unit_id"]
],
dtype=np.float32,
)


def _subset_by_household_ids(
arrays: dict[str, np.ndarray],
household_ids: np.ndarray,
Expand Down
17 changes: 16 additions & 1 deletion tests/integration/support/tiny_stage_5.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def _source_imputed_household_arrays(
def _source_imputed_household_asset_inputs(
arrays: dict[str, np.ndarray],
) -> dict[str, np.ndarray]:
income = arrays["spm_unit_total_income_reported"].astype(np.float32)
income = _spm_unit_income_proxy(arrays)
return {
"bank_account_assets": np.round(np.maximum(income * 0.06, 250), 2).astype(
np.float32
Expand All @@ -288,6 +288,21 @@ def _source_imputed_household_asset_inputs(
}


def _spm_unit_income_proxy(arrays: dict[str, np.ndarray]) -> np.ndarray:
person_income = (
arrays["employment_income"].astype(np.float32)
+ arrays["self_employment_income"].astype(np.float32)
+ arrays["social_security"].astype(np.float32)
)
return np.array(
[
person_income[arrays["person_spm_unit_id"] == spm_unit_id].sum()
for spm_unit_id in arrays["spm_unit_id"]
],
dtype=np.float32,
)


def _household_values_to_person(
arrays: dict[str, np.ndarray],
household_values: np.ndarray,
Expand Down
Loading
Loading