diff --git a/changelog.d/populate-spm-energy-subsidy.changed.md b/changelog.d/populate-spm-energy-subsidy.changed.md new file mode 100644 index 000000000..02b6cf3be --- /dev/null +++ b/changelog.d/populate-spm-energy-subsidy.changed.md @@ -0,0 +1 @@ +Populate the SPM housing and energy subsidy concepts directly from CPS ASEC SPM fields, and keep Census SPM resource aggregates out of generated pipeline datasets. diff --git a/policyengine_us_data/calibration/formulaic_inputs.py b/policyengine_us_data/calibration/formulaic_inputs.py index abe88a223..f316061a0 100644 --- a/policyengine_us_data/calibration/formulaic_inputs.py +++ b/policyengine_us_data/calibration/formulaic_inputs.py @@ -1,4 +1,4 @@ -"""Formula outputs that must not be persisted as dataset leaf inputs.""" +"""SPM formula/output aggregates that must not be persisted as leaf inputs.""" FORMULAIC_SPM_INPUTS_TO_DROP = frozenset( { @@ -9,11 +9,13 @@ "spm_unit_is_in_deep_spm_poverty", "spm_unit_spm_threshold", "spm_unit_geographic_adjustment", + "spm_unit_total_income_reported", + "spm_unit_net_income_reported", } ) def drop_formulaic_spm_inputs(variable_names: set[str]) -> None: - """Remove SPM formula outputs from a mutable variable-name set.""" + """Remove SPM formula/output aggregates from a mutable variable-name set.""" variable_names.difference_update(FORMULAIC_SPM_INPUTS_TO_DROP) diff --git a/policyengine_us_data/datasets/cps/census_cps.py b/policyengine_us_data/datasets/cps/census_cps.py index 8f2221077..37f85fb86 100644 --- a/policyengine_us_data/datasets/cps/census_cps.py +++ b/policyengine_us_data/datasets/cps/census_cps.py @@ -354,6 +354,9 @@ class CensusCPS_2018(CensusCPS): "PTOTVAL", "OI_OFF", "OI_VAL", + "ED_VAL", + "FIN_VAL", + "SRVS_VAL", "CSP_VAL", "PAW_VAL", "SSI_VAL", diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index d015e3faa..02bb22e2d 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -443,9 +443,7 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame): cps["rent"][mask] = imputed_values["rent"] # Assume zero housing assistance since cps["pre_subsidy_rent"] = cps["rent"] - cps["housing_assistance"] = np.zeros_like( - cps["spm_unit_capped_housing_subsidy_data"] - ) + cps["housing_assistance"] = np.zeros_like(cps["spm_unit_capped_housing_subsidy"]) cps["real_estate_taxes"] = np.zeros(len(cps["age"]), dtype=float) cps["real_estate_taxes"][mask] = imputed_values["real_estate_taxes"] @@ -1276,9 +1274,19 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int): cps["tax_exempt_ira_distributions"] = cps["roth_ira_distributions"] # Other income (OI_VAL) is a catch-all for all other income sources. # The code for alimony income is 20. - cps["alimony_income"] = (person.OI_OFF == 20) * person.OI_VAL + alimony_income = person.OI_OFF == 20 + cps["alimony_income"] = alimony_income * person.OI_VAL # The code for strike benefits is 12. - cps["strike_benefits"] = (person.OI_OFF == 12) * person.OI_VAL + strike_benefits = person.OI_OFF == 12 + cps["strike_benefits"] = strike_benefits * person.OI_VAL + cps["miscellaneous_income"] = np.where( + alimony_income | strike_benefits, + 0, + person.OI_VAL, + ) + cps["educational_assistance"] = person.ED_VAL + cps["financial_assistance"] = person.FIN_VAL + cps["survivor_benefits"] = person.SRVS_VAL cps["child_support_received"] = person.CSP_VAL # CPS SSI receipt anchors SSI take-up and disability alignment inside # add_takeup; it is dropped before the dataset is saved. @@ -1414,12 +1422,10 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int): ) def add_spm_variables(self, cps: h5py.File, spm_unit: DataFrame) -> None: SPM_RENAMES = dict( - spm_unit_total_income_reported="SPM_TOTVAL", snap_reported="SPM_SNAPSUB", - spm_unit_capped_housing_subsidy_data="SPM_CAPHOUSESUB", - spm_unit_energy_subsidy_data="SPM_ENGVAL", + spm_unit_capped_housing_subsidy="SPM_CAPHOUSESUB", + spm_unit_energy_subsidy="SPM_ENGVAL", spm_unit_capped_work_childcare_expenses="SPM_CAPWKCCXPNS", - spm_unit_net_income_reported="SPM_RESOURCES", spm_unit_pre_subsidy_childcare_expenses="SPM_CHILDCAREXPNS", ) diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py index 4140d3c47..42b8ba209 100644 --- a/policyengine_us_data/datasets/cps/enhanced_cps.py +++ b/policyengine_us_data/datasets/cps/enhanced_cps.py @@ -83,7 +83,6 @@ def compute_clone_diagnostics_summary( person_is_puf_clone, person_weight, person_in_poverty, - person_reported_in_poverty, spm_unit_is_puf_clone, spm_unit_weight, spm_unit_capped_work_childcare_expenses, @@ -96,7 +95,6 @@ def compute_clone_diagnostics_summary( person_is_puf_clone = np.asarray(person_is_puf_clone, dtype=bool) person_weight = np.asarray(person_weight, dtype=np.float64) person_in_poverty = np.asarray(person_in_poverty, dtype=bool) - person_reported_in_poverty = np.asarray(person_reported_in_poverty, dtype=bool) spm_unit_is_puf_clone = np.asarray(spm_unit_is_puf_clone, dtype=bool) spm_unit_weight = np.asarray(spm_unit_weight, dtype=np.float64) capped_childcare = np.asarray( @@ -108,7 +106,6 @@ def compute_clone_diagnostics_summary( spm_unit_taxes = np.asarray(spm_unit_taxes, dtype=np.float64) spm_unit_market_income = np.asarray(spm_unit_market_income, dtype=np.float64) - poor_modeled_only = person_in_poverty & ~person_reported_in_poverty clone_spm_weight = spm_unit_weight[spm_unit_is_puf_clone].sum() return { @@ -118,18 +115,10 @@ def compute_clone_diagnostics_summary( "clone_person_weight_share_pct": _weighted_share( person_is_puf_clone, person_weight ), - "clone_poor_modeled_only_person_weight_share_pct": _weighted_share( - person_is_puf_clone & poor_modeled_only, + "clone_poor_person_weight_share_pct": _weighted_share( + person_is_puf_clone & person_in_poverty, person_weight, ), - "poor_modeled_only_within_clone_person_weight_share_pct": ( - 0.0 - if person_weight[person_is_puf_clone].sum() <= 0 - else _weighted_share( - poor_modeled_only[person_is_puf_clone], - person_weight[person_is_puf_clone], - ) - ), "clone_childcare_exceeds_pre_subsidy_share_pct": ( 0.0 if clone_spm_weight <= 0 @@ -269,12 +258,6 @@ def build_clone_diagnostics_for_simulation( weight inputs back from disk. """ - person_reported_in_poverty = _to_numpy( - sim.calculate("spm_unit_net_income_reported", period=period, map_to="person") - ) < _to_numpy( - sim.calculate("spm_unit_spm_threshold", period=period, map_to="person") - ) - return compute_clone_diagnostics_summary( household_is_puf_clone=_load_saved_period_array( dataset_path, "household_is_puf_clone", period @@ -287,7 +270,6 @@ def build_clone_diagnostics_for_simulation( sim.calculate("household_weight", period=period, map_to="person") ), person_in_poverty=_to_numpy(sim.calculate("person_in_poverty", period=period)), - person_reported_in_poverty=person_reported_in_poverty, spm_unit_is_puf_clone=_load_saved_period_array( dataset_path, "spm_unit_is_puf_clone", period ), diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py index e10cc763a..adecb6460 100644 --- a/policyengine_us_data/datasets/cps/extended_cps.py +++ b/policyengine_us_data/datasets/cps/extended_cps.py @@ -167,14 +167,15 @@ def _supports_structural_mortgage_inputs() -> bool: "child_support_received", "veterans_benefits", "workers_compensation", + "educational_assistance", + "financial_assistance", + "survivor_benefits", "disability_benefits", "strike_benefits", "receives_wic", # SPM variables - "spm_unit_total_income_reported", - "spm_unit_capped_housing_subsidy_data", - "spm_unit_energy_subsidy_data", - "spm_unit_net_income_reported", + "spm_unit_capped_housing_subsidy", + "spm_unit_energy_subsidy", "spm_unit_pre_subsidy_childcare_expenses", # Medical expenses "employer_sponsored_insurance_premiums", diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py index d4b7bb5f0..e93778ea9 100644 --- a/policyengine_us_data/db/etl_national_targets.py +++ b/policyengine_us_data/db/etl_national_targets.py @@ -476,7 +476,7 @@ def extract_national_targets(year: int = DEFAULT_YEAR): "year": 2024, }, { - "constraint_variable": "spm_unit_energy_subsidy_data", + "constraint_variable": "spm_unit_energy_subsidy", "target_variable": "household_count", "household_count": 5_939_605, "source": "https://liheappm.acf.gov/sites/default/files/private/congress/profiles/2023/FY2023AllStates%28National%29Profile-508Compliant.pdf", @@ -484,7 +484,7 @@ def extract_national_targets(year: int = DEFAULT_YEAR): "year": 2023, }, { - "constraint_variable": "spm_unit_energy_subsidy_data", + "constraint_variable": "spm_unit_energy_subsidy", "target_variable": "household_count", "household_count": 5_876_646, "source": "https://liheappm.acf.gov/sites/default/files/private/congress/profiles/2024/FY2024_AllStates%28National%29_Profile.pdf", @@ -903,7 +903,7 @@ def load_national_targets( stratum_notes = "National ACA Premium Tax Credit Recipients" constraint_operation = ">" constraint_value = "0" - elif constraint_var == "spm_unit_energy_subsidy_data": + elif constraint_var == "spm_unit_energy_subsidy": stratum_notes = "National LIHEAP Recipient Households" constraint_operation = ">" constraint_value = "0" diff --git a/policyengine_us_data/storage/upload_completed_datasets.py b/policyengine_us_data/storage/upload_completed_datasets.py index 7561e5c8a..ada9382d3 100644 --- a/policyengine_us_data/storage/upload_completed_datasets.py +++ b/policyengine_us_data/storage/upload_completed_datasets.py @@ -99,6 +99,8 @@ class MicrosimulationAggregateCheck: "free_school_meals_reported", "reduced_price_school_meals_reported", "spm_unit_wic_reported", + "spm_unit_total_income_reported", + "spm_unit_net_income_reported", "spm_unit_broadband_subsidy", "spm_unit_broadband_subsidy_reported", "spm_unit_payroll_tax_reported", @@ -185,8 +187,7 @@ class MicrosimulationAggregateCheck: CLONE_DIAGNOSTICS_METRICS = { "clone_household_weight_share_pct", "clone_person_weight_share_pct", - "clone_poor_modeled_only_person_weight_share_pct", - "poor_modeled_only_within_clone_person_weight_share_pct", + "clone_poor_person_weight_share_pct", "clone_childcare_exceeds_pre_subsidy_share_pct", "clone_childcare_above_5000_share_pct", "clone_taxes_exceed_market_income_share_pct", diff --git a/policyengine_us_data/utils/national_target_parity.py b/policyengine_us_data/utils/national_target_parity.py index 43befc479..0b5b60ea1 100644 --- a/policyengine_us_data/utils/national_target_parity.py +++ b/policyengine_us_data/utils/national_target_parity.py @@ -482,9 +482,9 @@ def classify_national_target( target_name, index.match( variable="household_count", - domain_variable="spm_unit_energy_subsidy_data", + domain_variable="spm_unit_energy_subsidy", period=period, - constraints=[_constraint("spm_unit_energy_subsidy_data", ">", 0)], + constraints=[_constraint("spm_unit_energy_subsidy", ">", 0)], ), reason="structured_liheap_target", ) diff --git a/pyproject.toml b/pyproject.toml index e057f7aed..90eda6aa0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "policyengine-us==1.691.12", + "policyengine-us @ git+https://github.com/PolicyEngine/policyengine-us@4588f756668f12cac43e847a73e6a1f38b0b296d", # policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for # PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost # after _invalidate_all_caches) and is required by policyengine-us 1.682.1+. diff --git a/tests/integration/support/tiny_stage_3.py b/tests/integration/support/tiny_stage_3.py index 17fd6f63d..4e65a7934 100644 --- a/tests/integration/support/tiny_stage_3.py +++ b/tests/integration/support/tiny_stage_3.py @@ -46,9 +46,7 @@ *GROUP_LEVEL_VARIABLES, "tax_unit_count_dependents", "tax_unit_is_joint", - "spm_unit_total_income_reported", - "spm_unit_net_income_reported", - "spm_unit_capped_housing_subsidy_data", + "spm_unit_capped_housing_subsidy", "household_is_puf_clone", ) ) @@ -71,8 +69,6 @@ "state_fips", "tax_unit_count_dependents", "tax_unit_is_joint", - "spm_unit_total_income_reported", - "spm_unit_net_income_reported", "is_puf_clone", ) @@ -238,24 +234,11 @@ def _extended_group_arrays( household_count = len(arrays["household_id"]) puf_household_count = household_count - cps_household_count tax_unit_count_dependents = _count_dependents_by_tax_unit(arrays) - total_income = _sum_person_values_by_group( - group_ids=arrays["spm_unit_id"], - person_group_ids=arrays["person_spm_unit_id"], - person_values=( - arrays["employment_income"].astype(np.float32) - + arrays["self_employment_income"].astype(np.float32) - + arrays["social_security"].astype(np.float32) - ), - ) return { "tax_unit_count_dependents": tax_unit_count_dependents, "tax_unit_is_joint": arrays["filing_status"] == b"JOINT", - "spm_unit_total_income_reported": total_income.astype(np.float32), - "spm_unit_net_income_reported": np.round(total_income * 0.85, 2).astype( - np.float32 - ), - "spm_unit_capped_housing_subsidy_data": np.where( + "spm_unit_capped_housing_subsidy": np.where( arrays["tenure_type"] == b"RENTED", 1_200, 0, @@ -280,18 +263,6 @@ def _count_dependents_by_tax_unit(arrays: dict[str, np.ndarray]) -> np.ndarray: ) -def _sum_person_values_by_group( - *, - group_ids: np.ndarray, - person_group_ids: np.ndarray, - person_values: np.ndarray, -) -> np.ndarray: - return np.array( - [person_values[person_group_ids == group_id].sum() for group_id in group_ids], - dtype=np.float32, - ) - - def _resize_pattern(values: list[object], length: int, *, dtype) -> np.ndarray: repeats = int(np.ceil(length / len(values))) return np.resize(np.array(values * repeats, dtype=dtype), length) diff --git a/tests/integration/support/tiny_stage_4.py b/tests/integration/support/tiny_stage_4.py index 707e0acb7..a47006e16 100644 --- a/tests/integration/support/tiny_stage_4.py +++ b/tests/integration/support/tiny_stage_4.py @@ -147,7 +147,7 @@ def _load_period_arrays(path: Path) -> dict[str, np.ndarray]: def _calibrated_household_weights(arrays: dict[str, np.ndarray]) -> np.ndarray: weights = arrays["household_weight"].astype(np.float32) - income = arrays["spm_unit_total_income_reported"].astype(np.float32) + income = _spm_unit_income_proxy(arrays) income_rank = np.argsort(np.argsort(income)).astype(np.float32) center = income_rank.mean() scale = 1.0 + (income_rank - center) * 0.04 @@ -209,7 +209,7 @@ def _select_representative_household_ids( arrays: dict[str, np.ndarray], ) -> np.ndarray: household_ids = arrays["household_id"].astype(np.int64) - income = arrays["spm_unit_total_income_reported"].astype(np.float32) + income = _spm_unit_income_proxy(arrays) ordered = household_ids[np.argsort(income)] candidates = [ordered[0], ordered[len(ordered) // 2], ordered[-1]] @@ -223,6 +223,21 @@ def _select_representative_household_ids( return selected.astype(np.int64) +def _spm_unit_income_proxy(arrays: dict[str, np.ndarray]) -> np.ndarray: + person_income = ( + arrays["employment_income"].astype(np.float32) + + arrays["self_employment_income"].astype(np.float32) + + arrays["social_security"].astype(np.float32) + ) + return np.array( + [ + person_income[arrays["person_spm_unit_id"] == spm_unit_id].sum() + for spm_unit_id in arrays["spm_unit_id"] + ], + dtype=np.float32, + ) + + def _subset_by_household_ids( arrays: dict[str, np.ndarray], household_ids: np.ndarray, diff --git a/tests/integration/support/tiny_stage_5.py b/tests/integration/support/tiny_stage_5.py index be6adbabf..bebad6137 100644 --- a/tests/integration/support/tiny_stage_5.py +++ b/tests/integration/support/tiny_stage_5.py @@ -273,7 +273,7 @@ def _source_imputed_household_arrays( def _source_imputed_household_asset_inputs( arrays: dict[str, np.ndarray], ) -> dict[str, np.ndarray]: - income = arrays["spm_unit_total_income_reported"].astype(np.float32) + income = _spm_unit_income_proxy(arrays) return { "bank_account_assets": np.round(np.maximum(income * 0.06, 250), 2).astype( np.float32 @@ -288,6 +288,21 @@ def _source_imputed_household_asset_inputs( } +def _spm_unit_income_proxy(arrays: dict[str, np.ndarray]) -> np.ndarray: + person_income = ( + arrays["employment_income"].astype(np.float32) + + arrays["self_employment_income"].astype(np.float32) + + arrays["social_security"].astype(np.float32) + ) + return np.array( + [ + person_income[arrays["person_spm_unit_id"] == spm_unit_id].sum() + for spm_unit_id in arrays["spm_unit_id"] + ], + dtype=np.float32, + ) + + def _household_values_to_person( arrays: dict[str, np.ndarray], household_values: np.ndarray, diff --git a/tests/integration/test_cps_generation.py b/tests/integration/test_cps_generation.py index c87e564b7..3ba0f07d1 100644 --- a/tests/integration/test_cps_generation.py +++ b/tests/integration/test_cps_generation.py @@ -339,7 +339,7 @@ def fit(self, X_train, predictors, imputed_variables): cps = { "age": np.array([40, 12, 70], dtype=np.int32), "is_household_head": np.array([True, False, True], dtype=bool), - "spm_unit_capped_housing_subsidy_data": np.zeros(3, dtype=np.float32), + "spm_unit_capped_housing_subsidy": np.zeros(3, dtype=np.float32), } person = pd.DataFrame({"P_SEQ": [1, 2, 1]}) household = pd.DataFrame({"H_TENURE": [2, 1]}) @@ -355,7 +355,7 @@ def fit(self, X_train, predictors, imputed_variables): assert not dataset.file_path.exists() -def test_add_spm_variables_keeps_formulaic_outputs_out_of_dataset(): +def test_add_spm_variables_keeps_spm_output_aggregates_out_of_dataset(): from policyengine_us_data.datasets.cps.cps import add_spm_variables cps = {} @@ -380,11 +380,11 @@ def test_add_spm_variables_keeps_formulaic_outputs_out_of_dataset(): add_spm_variables(None, cps, spm_unit) - assert cps["spm_unit_total_income_reported"].tolist() == [50_000] - assert cps["spm_unit_net_income_reported"].tolist() == [45_000] + assert "spm_unit_total_income_reported" not in cps + assert "spm_unit_net_income_reported" not in cps assert cps["snap_reported"].tolist() == [1_200] - assert cps["spm_unit_capped_housing_subsidy_data"].tolist() == [3_000] - assert cps["spm_unit_energy_subsidy_data"].tolist() == [500] + assert cps["spm_unit_capped_housing_subsidy"].tolist() == [3_000] + assert cps["spm_unit_energy_subsidy"].tolist() == [500] assert cps["spm_unit_tenure_type"].tolist() == [b"RENTER"] for variable in ( "free_school_meals_reported", diff --git a/tests/integration/test_tiny_stage_3_artifacts.py b/tests/integration/test_tiny_stage_3_artifacts.py index 04d13b40f..e28362bf3 100644 --- a/tests/integration/test_tiny_stage_3_artifacts.py +++ b/tests/integration/test_tiny_stage_3_artifacts.py @@ -93,8 +93,8 @@ def test_tiny_extended_cps_derives_stage_4_contract_variables(tmp_path): arrays["employment_income"], ) assert (arrays["pre_tax_contributions"] >= 0).all() - assert (arrays["spm_unit_total_income_reported"] >= 0).all() - assert (arrays["spm_unit_net_income_reported"] >= 0).all() + assert "spm_unit_total_income_reported" not in arrays + assert "spm_unit_net_income_reported" not in arrays def test_tiny_extended_cps_digest_is_stable_for_same_inputs(tmp_path): diff --git a/tests/integration/test_tiny_stage_4_artifacts.py b/tests/integration/test_tiny_stage_4_artifacts.py index c290c8722..ddfece6e7 100644 --- a/tests/integration/test_tiny_stage_4_artifacts.py +++ b/tests/integration/test_tiny_stage_4_artifacts.py @@ -122,8 +122,12 @@ def test_tiny_stratified_cps_preserves_low_middle_and_high_income_rows(tmp_path) artifacts = create_stage_4_artifacts(workspace) arrays = _load_period_arrays(artifacts.stratified_extended_cps_path) - income = arrays["spm_unit_total_income_reported"] + income = ( + arrays["employment_income"].astype(np.float32) + + arrays["self_employment_income"].astype(np.float32) + + arrays["social_security"].astype(np.float32) + ) assert income.min() == 0 - assert income.max() >= 100_000 - assert len(np.unique(income)) == len(income) + assert income.max() >= 50_000 + assert len(np.unique(income)) >= 3 diff --git a/tests/unit/datasets/test_cps_file_handles.py b/tests/unit/datasets/test_cps_file_handles.py index 58183c307..c2c077321 100644 --- a/tests/unit/datasets/test_cps_file_handles.py +++ b/tests/unit/datasets/test_cps_file_handles.py @@ -479,7 +479,7 @@ class FakeACS_2022: dataset = FakeDataset() cps = { "age": np.array([40], dtype=np.int32), - "spm_unit_capped_housing_subsidy_data": np.array([0.0]), + "spm_unit_capped_housing_subsidy": np.array([0.0]), # add_id_variables populates this upstream of add_rent in the real # pipeline; see the policyengine-core#482 workaround override below. "is_household_head": np.array([True]), diff --git a/tests/unit/datasets/test_cps_income_variables.py b/tests/unit/datasets/test_cps_income_variables.py index 883a4dafe..69570cf33 100644 --- a/tests/unit/datasets/test_cps_income_variables.py +++ b/tests/unit/datasets/test_cps_income_variables.py @@ -33,6 +33,9 @@ def _minimal_person_income_frame() -> pd.DataFrame: "DST_VAL2_YNG", "OI_OFF", "OI_VAL", + "ED_VAL", + "FIN_VAL", + "SRVS_VAL", "CSP_VAL", "PAW_VAL", "SSI_VAL", @@ -68,3 +71,25 @@ def test_add_personal_income_variables_maps_farm_self_employment_to_operations() np.testing.assert_array_equal(cps["farm_operations_income"], [1_000.0, -500.0]) assert "farm_income" not in cps + + +def test_add_personal_income_variables_maps_spm_income_leaves(): + person = pd.concat( + [_minimal_person_income_frame(), _minimal_person_income_frame().iloc[[0]]], + ignore_index=True, + ) + person["OI_OFF"] = [0, 20, 12] + person["OI_VAL"] = [50.0, 70.0, 90.0] + person["ED_VAL"] = [10.0, 11.0, 12.0] + person["FIN_VAL"] = [20.0, 21.0, 22.0] + person["SRVS_VAL"] = [30.0, 31.0, 32.0] + cps = {} + + add_personal_income_variables(cps, person, 2024) + + np.testing.assert_array_equal(cps["miscellaneous_income"], [50.0, 0.0, 0.0]) + np.testing.assert_array_equal(cps["alimony_income"], [0.0, 70.0, 0.0]) + np.testing.assert_array_equal(cps["strike_benefits"], [0.0, 0.0, 90.0]) + np.testing.assert_array_equal(cps["educational_assistance"], [10.0, 11.0, 12.0]) + np.testing.assert_array_equal(cps["financial_assistance"], [20.0, 21.0, 22.0]) + np.testing.assert_array_equal(cps["survivor_benefits"], [30.0, 31.0, 32.0]) diff --git a/tests/unit/test_enhanced_cps_clone_diagnostics.py b/tests/unit/test_enhanced_cps_clone_diagnostics.py index f28827a78..bd1676120 100644 --- a/tests/unit/test_enhanced_cps_clone_diagnostics.py +++ b/tests/unit/test_enhanced_cps_clone_diagnostics.py @@ -50,7 +50,6 @@ def test_compute_clone_diagnostics_summary(): person_is_puf_clone=[False, True, True], person_weight=[4.0, 3.0, 3.0], person_in_poverty=[False, True, True], - person_reported_in_poverty=[False, False, True], spm_unit_is_puf_clone=[False, True, True], spm_unit_weight=[2.0, 3.0, 5.0], spm_unit_capped_work_childcare_expenses=[0.0, 6000.0, 7000.0], @@ -60,12 +59,7 @@ def test_compute_clone_diagnostics_summary(): ) assert diagnostics["clone_household_weight_share_pct"] == pytest.approx(10.0) - assert diagnostics[ - "clone_poor_modeled_only_person_weight_share_pct" - ] == pytest.approx(30.0) - assert diagnostics[ - "poor_modeled_only_within_clone_person_weight_share_pct" - ] == pytest.approx(50.0) + assert diagnostics["clone_poor_person_weight_share_pct"] == pytest.approx(60.0) assert diagnostics[ "clone_childcare_exceeds_pre_subsidy_share_pct" ] == pytest.approx(37.5) @@ -85,8 +79,6 @@ def __init__(self, values): class FakeSim: def calculate(self, variable, period=None, map_to=None): lookup = { - ("spm_unit_net_income_reported", "person"): [1000.0, 300.0, 100.0], - ("spm_unit_spm_threshold", "person"): [500.0, 200.0, 200.0], ("household_weight", None): [9.0, 1.0], ("household_weight", "person"): [9.0, 1.0, 1.0], ("household_weight", "spm_unit"): [9.0, 1.0], @@ -120,12 +112,9 @@ def calculate(self, variable, period=None, map_to=None): assert diagnostics["clone_household_weight_share_pct"] == pytest.approx(10.0) assert diagnostics["clone_person_weight_share_pct"] == pytest.approx(200.0 / 11.0) - assert diagnostics[ - "clone_poor_modeled_only_person_weight_share_pct" - ] == pytest.approx(100.0 / 11.0) - assert diagnostics[ - "poor_modeled_only_within_clone_person_weight_share_pct" - ] == pytest.approx(50.0) + assert diagnostics["clone_poor_person_weight_share_pct"] == pytest.approx( + 200.0 / 11.0 + ) assert diagnostics[ "clone_childcare_exceeds_pre_subsidy_share_pct" ] == pytest.approx(100.0) diff --git a/tests/unit/test_etl_national_targets.py b/tests/unit/test_etl_national_targets.py index c78846b11..1a9629700 100644 --- a/tests/unit/test_etl_national_targets.py +++ b/tests/unit/test_etl_national_targets.py @@ -194,7 +194,7 @@ def test_load_national_targets_supports_liheap_household_counts(tmp_path, monkey conditional_targets = [ { - "constraint_variable": "spm_unit_energy_subsidy_data", + "constraint_variable": "spm_unit_energy_subsidy", "target_variable": "household_count", "household_count": 5_876_646, "source": "https://example.com/liheap-2024.pdf", @@ -226,7 +226,7 @@ def test_load_national_targets_supports_liheap_household_counts(tmp_path, monkey ) for constraint in liheap_stratum.constraints_rel } - assert ("spm_unit_energy_subsidy_data", ">", "0") in constraints + assert ("spm_unit_energy_subsidy", ">", "0") in constraints liheap_target = session.exec( select(Target).where( diff --git a/tests/unit/test_extended_cps.py b/tests/unit/test_extended_cps.py index 7a5ab2978..ca42d02f5 100644 --- a/tests/unit/test_extended_cps.py +++ b/tests/unit/test_extended_cps.py @@ -166,6 +166,10 @@ def test_spm_threshold_is_formula_output_not_qrf_imputed(self): ): ExtendedCPS._assert_no_computed_variables_exported(data, 2024) + def test_spm_resource_aggregates_are_not_qrf_imputed(self): + assert "spm_unit_total_income_reported" not in set(CPS_ONLY_IMPUTED_VARIABLES) + assert "spm_unit_net_income_reported" not in set(CPS_ONLY_IMPUTED_VARIABLES) + def test_weeks_worked_is_preserved_for_future_year_formulas(self): data = {"weeks_worked": {2024: np.array([52])}} diff --git a/tests/unit/test_upload_completed_datasets.py b/tests/unit/test_upload_completed_datasets.py index 542c2ade9..2c3dd7337 100644 --- a/tests/unit/test_upload_completed_datasets.py +++ b/tests/unit/test_upload_completed_datasets.py @@ -23,8 +23,7 @@ "period": 2024, "clone_household_weight_share_pct": 5.0, "clone_person_weight_share_pct": 5.0, - "clone_poor_modeled_only_person_weight_share_pct": 1.0, - "poor_modeled_only_within_clone_person_weight_share_pct": 20.0, + "clone_poor_person_weight_share_pct": 1.0, "clone_childcare_exceeds_pre_subsidy_share_pct": 0.0, "clone_childcare_above_5000_share_pct": 0.0, "clone_taxes_exceed_market_income_share_pct": 0.0, diff --git a/uv.lock b/uv.lock index 781b625fd..e04eb2258 100644 --- a/uv.lock +++ b/uv.lock @@ -2122,8 +2122,8 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.691.12" -source = { registry = "https://pypi.org/simple" } +version = "1.691.11" +source = { git = "https://github.com/PolicyEngine/policyengine-us?rev=4588f756668f12cac43e847a73e6a1f38b0b296d#4588f756668f12cac43e847a73e6a1f38b0b296d" } dependencies = [ { name = "microdf-python" }, { name = "pandas" }, @@ -2132,10 +2132,6 @@ dependencies = [ { name = "tables" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4a/23/c8eb34c1c0c0e8150fba8467f1f8463f606a68c5b28fd31ad637cecd886d/policyengine_us-1.691.12.tar.gz", hash = "sha256:435fa2a8c7085f13a7d9d2ce903670f9d40ee7538a2db28a2dfae038d2bfa91a", size = 9507768, upload-time = "2026-05-16T16:54:37.905Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/83/6f/b605fc1d8e06e377ae50870dc44a28cfe6562f0032e36dd53a5ef49472db/policyengine_us-1.691.12-py3-none-any.whl", hash = "sha256:ef43482bd8c6cc16f8f1d4050423f5dc1d045af15931f5d1b089715a31c839d2", size = 10022960, upload-time = "2026-05-16T16:54:35.255Z" }, -] [[package]] name = "policyengine-us-data" @@ -2204,7 +2200,7 @@ requires-dist = [ { name = "pandas", specifier = ">=2.3.1" }, { name = "pip-system-certs", specifier = ">=3.0" }, { name = "policyengine-core", specifier = ">=3.26.1,<3.27" }, - { name = "policyengine-us", specifier = "==1.691.12" }, + { name = "policyengine-us", git = "https://github.com/PolicyEngine/policyengine-us?rev=4588f756668f12cac43e847a73e6a1f38b0b296d" }, { name = "requests", specifier = ">=2.25.0" }, { name = "samplics", marker = "extra == 'calibration'" }, { name = "scipy", specifier = ">=1.15.3" },