Skip to content
11 changes: 11 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
- bump: minor
changes:
added:
- Name-based seeding (seeded_rng) for order-independent reproducibility
- State-specific Medicaid takeup rates (53%-99% range, 51 jurisdictions)
- SSI resource test pass rate parameter (0.4)
- WIC takeup and nutritional risk draw variables (float)
- meets_ssi_resource_test boolean generation
changed:
- Replaced shared RNG (seed=100) with per-variable name-based seeding
- Medicaid takeup now uses state-specific rates instead of uniform 93%
103 changes: 89 additions & 14 deletions policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
)
from microimpute.models.qrf import QRF
import logging
from policyengine_us_data.parameters import load_take_up_rate
from policyengine_us_data.utils.randomness import seeded_rng


class CPS(Dataset):
Expand Down Expand Up @@ -191,28 +193,101 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
def add_takeup(self):
data = self.load_dataset()

from policyengine_us import system, Microsimulation
from policyengine_us import Microsimulation

baseline = Microsimulation(dataset=self)
parameters = baseline.tax_benefit_system.parameters(self.time_period)

generator = np.random.default_rng(seed=100)
n_persons = len(data["person_id"])
n_tax_units = len(data["tax_unit_id"])
n_spm_units = len(data["spm_unit_id"])

# Load take-up rates
eitc_rates_by_children = load_take_up_rate("eitc", self.time_period)
dc_ptc_rate = load_take_up_rate("dc_ptc", self.time_period)
snap_rate = load_take_up_rate("snap", self.time_period)
aca_rate = load_take_up_rate("aca", self.time_period)
medicaid_rates_by_state = load_take_up_rate("medicaid", self.time_period)
head_start_rate = load_take_up_rate("head_start", self.time_period)
early_head_start_rate = load_take_up_rate(
"early_head_start", self.time_period
)
ssi_pass_rate = load_take_up_rate("ssi_pass_rate", self.time_period)

eitc_takeup_rates = parameters.gov.irs.credits.eitc.takeup
# EITC: varies by number of children
eitc_child_count = baseline.calculate("eitc_child_count").values
eitc_takeup_rate = eitc_takeup_rates.calc(eitc_child_count)
data["takes_up_eitc"] = (
generator.random(len(data["tax_unit_id"])) < eitc_takeup_rate
eitc_takeup_rate = np.array(
[
eitc_rates_by_children.get(min(int(c), 3), 0.85)
for c in eitc_child_count
]
)
rng = seeded_rng("takes_up_eitc")
data["takes_up_eitc"] = rng.random(n_tax_units) < eitc_takeup_rate

# DC Property Tax Credit
rng = seeded_rng("takes_up_dc_ptc")
data["takes_up_dc_ptc"] = rng.random(n_tax_units) < dc_ptc_rate

# SNAP
rng = seeded_rng("takes_up_snap_if_eligible")
data["takes_up_snap_if_eligible"] = rng.random(n_spm_units) < snap_rate

# ACA
rng = seeded_rng("takes_up_aca_if_eligible")
data["takes_up_aca_if_eligible"] = rng.random(n_tax_units) < aca_rate

# Medicaid: state-specific rates
state_codes = baseline.calculate("state_code_str").values
hh_ids = data["household_id"]
person_hh_ids = data["person_household_id"]
hh_to_state = dict(zip(hh_ids, state_codes))
person_states = np.array(
[hh_to_state.get(hh_id, "CA") for hh_id in person_hh_ids]
)
dc_ptc_takeup_rate = parameters.gov.states.dc.tax.income.credits.ptc.takeup
data["takes_up_dc_ptc"] = (
generator.random(len(data["tax_unit_id"])) < dc_ptc_takeup_rate
medicaid_rate_by_person = np.array(
[medicaid_rates_by_state.get(s, 0.93) for s in person_states]
)
rng = seeded_rng("takes_up_medicaid_if_eligible")
data["takes_up_medicaid_if_eligible"] = (
rng.random(n_persons) < medicaid_rate_by_person
)

# Head Start
rng = seeded_rng("takes_up_head_start_if_eligible")
data["takes_up_head_start_if_eligible"] = (
rng.random(n_persons) < head_start_rate
)

# Early Head Start
rng = seeded_rng("takes_up_early_head_start_if_eligible")
data["takes_up_early_head_start_if_eligible"] = (
rng.random(n_persons) < early_head_start_rate
)
generator = np.random.default_rng(seed=100)

data["snap_take_up_seed"] = generator.random(len(data["spm_unit_id"]))
data["aca_take_up_seed"] = generator.random(len(data["tax_unit_id"]))
data["medicaid_take_up_seed"] = generator.random(len(data["person_id"]))
# SSI resource test
rng = seeded_rng("meets_ssi_resource_test")
data["meets_ssi_resource_test"] = rng.random(n_persons) < ssi_pass_rate

# WIC: resolve draws to bools using category-specific rates
wic_categories = baseline.calculate("wic_category_str").values
wic_takeup_rates = load_take_up_rate("wic_takeup", self.time_period)
wic_takeup_rate_by_person = np.array(
[wic_takeup_rates.get(c, 0) for c in wic_categories]
)
rng = seeded_rng("would_claim_wic")
data["would_claim_wic"] = rng.random(n_persons) < wic_takeup_rate_by_person

# WIC nutritional risk — fully resolved
wic_risk_rates = load_take_up_rate(
"wic_nutritional_risk", self.time_period
)
wic_risk_rate_by_person = np.array(
[wic_risk_rates.get(c, 0) for c in wic_categories]
)
receives_wic = baseline.calculate("receives_wic").values
rng = seeded_rng("is_wic_at_nutritional_risk")
imputed_risk = rng.random(n_persons) < wic_risk_rate_by_person
data["is_wic_at_nutritional_risk"] = receives_wic | imputed_risk

self.save_dataset(data)

Expand Down
72 changes: 72 additions & 0 deletions policyengine_us_data/parameters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
Take-up rate parameters for stochastic simulation.

These parameters are stored in the data package to keep the country package
as a purely deterministic rules engine.
"""

import yaml
from pathlib import Path

PARAMETERS_DIR = Path(__file__).parent


def load_take_up_rate(variable_name: str, year: int = 2018):
"""Load take-up rate from YAML parameter files.

Args:
variable_name: Name of the take-up parameter file (without .yaml)
year: Year for which to get the rate

Returns:
float, dict (EITC rates_by_children), or dict (Medicaid
rates_by_state)
"""
yaml_path = PARAMETERS_DIR / "take_up" / f"{variable_name}.yaml"

with open(yaml_path) as f:
data = yaml.safe_load(f)

# EITC: rates by number of children
if "rates_by_children" in data:
return data["rates_by_children"]

# Medicaid: state-specific rates
if "rates_by_state" in data:
return data["rates_by_state"]

# WIC-style: rates by category (each category has a time series)
if "rates_by_category" in data:
result = {}
for category, time_series in data["rates_by_category"].items():
applicable_value = None
for y, value in sorted(time_series.items()):
if int(y) <= year:
applicable_value = value
else:
break
if applicable_value is not None:
result[category] = applicable_value
return result

# Standard time-series values
values = data["values"]
applicable_value = None

for date_key, value in sorted(values.items()):
if hasattr(date_key, "year"):
date_year = date_key.year
else:
date_year = int(date_key.split("-")[0])

if date_year <= year:
applicable_value = value
else:
break

if applicable_value is None:
raise ValueError(
f"No take-up rate found for {variable_name} in {year}"
)

return applicable_value
10 changes: 10 additions & 0 deletions policyengine_us_data/parameters/take_up/aca.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
description: Percentage of eligible people who do enroll in Affordable Care Act coverage, if eligible.
metadata:
label: ACA takeup rate
unit: /1
period: year
reference:
- title: KFF "A Closer Look at the Remaining Uninsured Population Eligible for Medicaid and CHIP"
href: https://www.kff.org/uninsured/issue-brief/a-closer-look-at-the-remaining-uninsured-population-eligible-for-medicaid-and-chip/#:~:text=the%20uninsured%20rate%20dropped%20to,States%20began%20the
values:
2018-01-01: 0.672
11 changes: 11 additions & 0 deletions policyengine_us_data/parameters/take_up/dc_ptc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
description: The share of eligible individuals who claim the DC property tax credit.
metadata:
unit: /1
label: DC property tax credit takeup rate
period: year
reference:
- title: District of Columbia Tax Expenditure Report, 2024
href: https://ora-cfo.dc.gov/sites/default/files/dc/sites/ora-cfo/publication/attachments/2024%20Tax%20Expenditure%20Report.pdf#page=234
values:
# 37,133 (from 2024 Tax Expenditure Report) / 131,791,388 (PolicyEngine DC PTC value estimate)
2021-01-01: 0.32
9 changes: 9 additions & 0 deletions policyengine_us_data/parameters/take_up/early_head_start.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
description: Percentage of eligible infants and toddlers who enroll in Early Head Start.
metadata:
label: Early Head Start take-up rate
unit: /1
reference:
- title: NIEER State(s) of Head Start and Early Head Start Report
href: https://nieer.org/research-library/states-head-start-early-head-start
values:
2020-09-01: 0.09
12 changes: 12 additions & 0 deletions policyengine_us_data/parameters/take_up/eitc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
description: The share of eligible individuals who claim the EITC (by number of children).
metadata:
label: EITC take-up rate by number of children
reference:
- title: National Taxpayer Advocate Special Report to Congress 2020 | IRS
href: https://www.taxpayeradvocate.irs.gov/wp-content/uploads/2020/08/JRC20_Volume3.pdf#page=62
# Maps number of children to take-up rate
rates_by_children:
0: 0.65
1: 0.86
2: 0.85
3: 0.85 # Assume same as 2
10 changes: 10 additions & 0 deletions policyengine_us_data/parameters/take_up/head_start.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
description: Percentage of eligible children who enroll in Head Start.
metadata:
label: Head Start take-up rate
unit: /1
reference:
- title: NIEER State(s) of Head Start and Early Head Start Report
href: https://nieer.org/research-library/states-head-start-early-head-start
values:
2020-09-01: 0.40
2021-09-01: 0.30
64 changes: 64 additions & 0 deletions policyengine_us_data/parameters/take_up/medicaid.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
description: Percentage of people who do enroll in Medicaid, if eligible.
metadata:
label: Medicaid takeup rate
unit: /1
period: year
breakdown:
- state_code
reference:
- title: KFF "A Closer Look at the Remaining Uninsured Population Eligible for Medicaid and CHIP"
href: https://www.kff.org/uninsured/issue-brief/a-closer-look-at-the-remaining-uninsured-population-eligible-for-medicaid-and-chip/
- title: State-specific rates derived from MACPAC enrollment targets vs modeled eligibility
href: https://www.medicaid.gov/medicaid/program-information/medicaid-and-chip-enrollment-data/report-highlights/index.html
rates_by_state:
AK: 0.88
AL: 0.92
AR: 0.79
AZ: 0.95
CA: 0.78
CO: 0.99
CT: 0.89
DC: 0.99
DE: 0.86
FL: 0.98
GA: 0.73
HI: 0.88
IA: 0.84
ID: 0.78
IL: 0.85
IN: 0.99
KS: 0.92
KY: 0.87
LA: 0.79
MA: 0.94
MD: 0.95
ME: 0.92
MI: 0.91
MN: 0.89
MO: 0.89
MS: 0.75
MT: 0.83
NC: 0.94
ND: 0.91
NE: 0.79
NH: 0.84
NJ: 0.74
NM: 0.84
NV: 0.93
NY: 0.86
OH: 0.82
OK: 0.77
OR: 0.92
PA: 0.64
RI: 0.94
SC: 0.93
SD: 0.88
TN: 0.92
TX: 0.76
UT: 0.53
VA: 0.82
VT: 0.93
WA: 0.98
WI: 0.91
WV: 0.83
WY: 0.70
9 changes: 9 additions & 0 deletions policyengine_us_data/parameters/take_up/snap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
description: Percentage of eligible SNAP recipients who claim SNAP.
metadata:
label: SNAP takeup rate
unit: /1
reference:
- title: USDA
href: https://www.fns.usda.gov/usamap
values:
2018-01-01: 0.82
10 changes: 10 additions & 0 deletions policyengine_us_data/parameters/take_up/ssi_pass_rate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
description: Proportion of SSI-aged-blind-disabled recipients who meet the asset test.
metadata:
label: SSI resource test pass rate
unit: /1
period: year
reference:
- title: SSI resource test pass rate from policyengine-us
href: https://github.com/PolicyEngine/policyengine-us
values:
2018-01-01: 0.4
13 changes: 13 additions & 0 deletions policyengine_us_data/parameters/take_up/wic_nutritional_risk.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
rates_by_category:
PREGNANT:
1980: 0.913
POSTPARTUM:
1980: 0.933
BREASTFEEDING:
1980: 0.889
INFANT:
1980: 0.95
CHILD:
1980: 0.752
NONE:
1980: 0
Loading