Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/8089.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added other health insurance premiums as the non-Medicare premium category not covered by modeled Marketplace, CHIP, or Medicaid premiums.
2 changes: 1 addition & 1 deletion policyengine_us_data/calibration/target_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ include:
geo_level: national
- variable: medicaid
geo_level: national
- variable: medicare_part_b_premiums
- variable: medicare_part_b_premium
geo_level: national
- variable: other_medical_expenses
geo_level: national
Expand Down
132 changes: 121 additions & 11 deletions policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,6 @@
from policyengine_us_data.utils.asset_imputation import (
build_household_vehicle_receiver,
)
from policyengine_us_data.utils.policyengine import (
supports_medicare_enrollment_input,
supports_modeled_medicare_part_b_inputs,
)


CURRENT_HEALTH_COVERAGE_REPORTED_VAR_MAP = {
"reported_has_direct_purchase_health_coverage_at_interview": "NOW_DIR",
Expand Down Expand Up @@ -193,6 +188,8 @@ def generate(self):
add_takeup(self)
logging.info("Imputing Marketplace plan benchmark ratio")
add_marketplace_plan_benchmark_ratio(self)
logging.info("Deriving other health insurance premiums")
derive_other_health_insurance_premiums(self)
logging.info("Downsampling")

# Downsample
Expand Down Expand Up @@ -519,6 +516,124 @@ def add_marketplace_plan_benchmark_ratio(self):
self.save_dataset(data)


OTHER_HEALTH_INSURANCE_PREMIUM_TARGETS = {
"other_health_insurance_premiums": {
"reported_variable": "health_insurance_premiums_without_medicare_part_b",
"modeled_variables": (
"chip_premium",
"marketplace_net_premium",
"medicaid_premium",
),
},
}


def derive_other_health_insurance_premiums(self):
"""Create other premium inputs net of baseline computed premiums.

The model adds computed premiums back explicitly, so it needs a separate
other-premium input for the parts of CPS-reported non-Medicare premiums
not explained by baseline computed Marketplace, CHIP, or Medicaid
premiums. The original CPS-reported premium inputs remain unchanged as raw
source fields. The data package requires a policyengine-us release with
these modeled premium variables, so missing variables fail fast instead of
silently producing an incomplete decomposition.
"""
from policyengine_us import Microsimulation

data = self.load_dataset()
baseline = Microsimulation(dataset=self)
tbs = baseline.tax_benefit_system
period = self.time_period
changed = False

for output_variable, config in OTHER_HEALTH_INSURANCE_PREMIUM_TARGETS.items():
reported_variable = config["reported_variable"]
premium_variables = config["modeled_variables"]

if reported_variable not in data:
continue

computed_premium = np.zeros(len(data[reported_variable]), dtype=float)
for variable in premium_variables:
values = np.asarray(
baseline.calculate(variable, period=period).values,
dtype=float,
)
computed_premium += _premium_values_to_person(
data=data,
source_entity=tbs.variables[variable].entity.key,
values=values,
)

data[output_variable] = compute_other_health_insurance_premiums(
reported_premium=data[reported_variable],
baseline_computed_premium=computed_premium,
)
logging.info(
"Created %s from %s by subtracting baseline computed premiums: %s",
output_variable,
reported_variable,
", ".join(premium_variables),
)
changed = True

if changed:
self.save_dataset(data)


def compute_other_health_insurance_premiums(
reported_premium: np.ndarray,
baseline_computed_premium: np.ndarray,
) -> np.ndarray:
"""Return other premiums after subtracting baseline computed premiums."""
return np.asarray(reported_premium, dtype=float) - np.asarray(
baseline_computed_premium, dtype=float
)


def _premium_values_to_person(
data: dict,
source_entity: str,
values: np.ndarray,
) -> np.ndarray:
"""Map computed premiums to person rows for person-level premium accounting."""
person_ids = data["person_id"]
if source_entity == "person":
if len(values) != len(person_ids):
raise ValueError(
"Person-level computed premium length does not match person rows: "
f"got {len(values)}, expected {len(person_ids)}."
)
return values

entity_id_variable = f"{source_entity}_id"
person_entity_id_variable = f"person_{source_entity}_id"
if entity_id_variable not in data or person_entity_id_variable not in data:
raise ValueError(
f"Cannot allocate {source_entity}-level premiums to people: missing "
f"{entity_id_variable} or {person_entity_id_variable}."
)

entity_ids = data[entity_id_variable]
person_entity_ids = data[person_entity_id_variable]
if len(values) != len(entity_ids):
raise ValueError(
f"{source_entity}-level computed premium length does not match "
f"{source_entity} rows: got {len(values)}, expected {len(entity_ids)}."
)

entity_position = {entity_id: index for index, entity_id in enumerate(entity_ids)}
allocated = np.zeros(len(person_ids), dtype=float)
seen_entities = set()
for person_index, entity_id in enumerate(person_entity_ids):
if entity_id in seen_entities:
continue
allocated[person_index] = values[entity_position[entity_id]]
seen_entities.add(entity_id)
return allocated


MARKETPLACE_PLAN_BENCHMARK_RATIO_MIN = 0.5
MARKETPLACE_PLAN_BENCHMARK_RATIO_MAX = 1.5

Expand Down Expand Up @@ -1009,12 +1124,7 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
cps["health_insurance_premiums_without_medicare_part_b"] = person.PHIP_VAL
cps["over_the_counter_health_expenses"] = person.POTC_VAL
cps["other_medical_expenses"] = person.PMED_VAL
if supports_medicare_enrollment_input():
cps["medicare_enrolled"] = person.MCARE == 1
if supports_modeled_medicare_part_b_inputs():
cps["medicare_part_b_premiums_reported"] = person.PEMCPREM
else:
cps["medicare_part_b_premiums"] = person.PEMCPREM
cps["medicare_enrolled"] = person.MCARE == 1

# Get QBI simulation parameters ---
yamlfilename = (
Expand Down
7 changes: 1 addition & 6 deletions policyengine_us_data/datasets/cps/extended_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@
impute_tax_unit_mortgage_balance_hints,
)
from policyengine_us_data.utils.policyengine import has_policyengine_us_variables
from policyengine_us_data.utils.policyengine import (
supports_modeled_medicare_part_b_inputs,
)
from policyengine_us_data.utils.retirement_limits import (
get_retirement_limits,
get_se_pension_limits,
Expand Down Expand Up @@ -151,6 +148,7 @@ def _supports_structural_mortgage_inputs() -> bool:
"spm_unit_pre_subsidy_childcare_expenses",
# Medical expenses
"health_insurance_premiums_without_medicare_part_b",
"other_health_insurance_premiums",
"over_the_counter_health_expenses",
"other_medical_expenses",
"child_support_expense",
Expand All @@ -166,9 +164,6 @@ def _supports_structural_mortgage_inputs() -> bool:
"self_employment_income_last_year",
]

if not supports_modeled_medicare_part_b_inputs():
CPS_ONLY_IMPUTED_VARIABLES.append("medicare_part_b_premiums")

# Set for O(1) lookup in the splice loop.
_CPS_ONLY_SET = set(CPS_ONLY_IMPUTED_VARIABLES)

Expand Down
6 changes: 4 additions & 2 deletions policyengine_us_data/datasets/puf/puf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
STRUCTURAL_MORTGAGE_VARIABLES,
convert_mortgage_interest_to_structural_inputs,
)
from policyengine_us_data.utils.policyengine import has_policyengine_us_variables
from policyengine_us_data.utils.policyengine import (
has_policyengine_us_variables,
)
from policyengine_us_data.utils.uprating import (
create_policyengine_uprating_factors_table,
)
Expand Down Expand Up @@ -984,7 +986,7 @@ class PUF_2024(PUF):
"health_insurance_premiums_without_medicare_part_b": 0.453,
"other_medical_expenses": 0.325,
"over_the_counter_health_expenses": 0.085,
"medicare_part_b_premiums": 0.137,
"medicare_part_b_premium": 0.137,
}

if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion policyengine_us_data/db/etl_national_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
"year": 2024,
},
{
"variable": "medicare_part_b_premiums",
"variable": "medicare_part_b_premium",
"value": get_beneficiary_paid_medicare_part_b_premiums_target(2024),
"source": get_beneficiary_paid_medicare_part_b_premiums_source(2024),
"notes": get_beneficiary_paid_medicare_part_b_premiums_notes(2024),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
HARD_CODED_TOTALS = {
"health_insurance_premiums_without_medicare_part_b": 385e9,
"other_medical_expenses": 278e9,
"medicare_part_b_premiums": 112e9,
"medicare_part_b_premium": 112e9,
"over_the_counter_health_expenses": 72e9,
"spm_unit_spm_threshold": 3_945e9,
"child_support_expense": 33e9,
Expand Down
22 changes: 14 additions & 8 deletions policyengine_us_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
from policyengine_core.reforms import Reform
from policyengine_us_data.utils.soi import pe_to_soi, get_soi


MEDICARE_PART_B_PREMIUM_VARIABLE = "medicare_part_b_premium"

# National calibration targets consumed by build_loss_matrix().
# These values are specific to 2024 — they should NOT be applied to
# other years without re-sourcing. They are duplicated in
Expand All @@ -29,8 +32,8 @@
HARD_CODED_TOTALS = {
"health_insurance_premiums_without_medicare_part_b": 385e9,
"other_medical_expenses": 278e9,
"medicare_part_b_premiums": get_beneficiary_paid_medicare_part_b_premiums_target(
2024
MEDICARE_PART_B_PREMIUM_VARIABLE: (
get_beneficiary_paid_medicare_part_b_premiums_target(2024)
),
"over_the_counter_health_expenses": 72e9,
"spm_unit_spm_threshold": 3_945e9,
Expand Down Expand Up @@ -851,18 +854,21 @@ def build_loss_matrix(dataset: type, time_period):
else:
in_age_range = (age >= age_lower_bound) * (age < age_lower_bound + 10)
label_suffix = f"age_{age_lower_bound}_to_{age_lower_bound + 9}"
for expense_type in [
"health_insurance_premiums_without_medicare_part_b",
"over_the_counter_health_expenses",
"other_medical_expenses",
"medicare_part_b_premiums",
for expense_type, target_column in [
(
"health_insurance_premiums_without_medicare_part_b",
"health_insurance_premiums_without_medicare_part_b",
),
("over_the_counter_health_expenses", "over_the_counter_health_expenses"),
("other_medical_expenses", "other_medical_expenses"),
(MEDICARE_PART_B_PREMIUM_VARIABLE, "medicare_part_b_premiums"),
]:
label = f"nation/census/{expense_type}/{label_suffix}"
value = sim.calculate(expense_type).values
loss_matrix[label] = sim.map_result(
in_age_range * value, "person", "household"
)
targets_array.append(row[expense_type])
targets_array.append(row[target_column])

# AGI by SPM threshold totals

Expand Down
10 changes: 0 additions & 10 deletions policyengine_us_data/utils/policyengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,3 @@ def has_policyengine_us_variables(*variables: str) -> bool:
return False

return set(variables).issubset(available_variables)


def supports_medicare_enrollment_input() -> bool:
return has_policyengine_us_variables("medicare_enrolled")


def supports_modeled_medicare_part_b_inputs() -> bool:
return has_policyengine_us_variables(
"medicare_part_b_premiums_reported",
)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [
"Programming Language :: Python :: 3.14",
]
dependencies = [
"policyengine-us>=1.637.0",
"policyengine-us>=1.674.1",
"policyengine-core>=3.23.6",
"pandas>=2.3.1",
"requests>=2.25.0",
Expand Down
15 changes: 15 additions & 0 deletions tests/unit/calibration/test_target_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,21 @@ def test_training_config_includes_national_ctc_agi_targets(self):
"domain_variable": "adjusted_gross_income,non_refundable_ctc",
} in include_rules

def test_training_config_includes_medicare_part_b_target(self):
config = load_target_config(
str(
Path(__file__).resolve().parents[3]
/ "policyengine_us_data"
/ "calibration"
/ "target_config.yaml"
)
)

assert {
"variable": "medicare_part_b_premium",
"geo_level": "national",
} in config["include"]

def test_training_config_includes_district_non_refundable_ctc_target(self):
config = load_target_config(
str(
Expand Down
Loading
Loading