Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/841.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Populate American Opportunity Credit eligibility inputs in Enhanced CPS from the PUF-imputed AOTC signal.
131 changes: 131 additions & 0 deletions policyengine_us_data/datasets/cps/extended_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,7 @@ def generate(self):
dataset_path=str(self.cps.file_path),
)

new_data = self._impute_aotc_eligibility_inputs(new_data, self.time_period)
new_data = self._rename_imputed_to_inputs(new_data)
if _supports_structural_mortgage_inputs():
had_positive_mortgage_input = self._has_positive_mortgage_input(
Expand All @@ -920,6 +921,136 @@ def generate(self):
new_data = self._drop_formula_variables(new_data)
self.save_dataset(new_data)

@staticmethod
def _aotc_qualifying_expenses_from_credit(credit):
capped_credit = min(max(float(credit), 0), 2_500)
if capped_credit <= 2_000:
return capped_credit
return 2_000 + (capped_credit - 2_000) / 0.25

@classmethod
def _impute_aotc_eligibility_inputs(cls, data, time_period):
"""Convert imputed tax-unit AOTC amounts to person eligibility inputs."""
credit = data.get("american_opportunity_credit", {}).get(time_period)
tax_unit_ids = data.get("tax_unit_id", {}).get(time_period)
person_tax_unit_ids = data.get("person_tax_unit_id", {}).get(time_period)
tuition = data.get("qualified_tuition_expenses", {}).get(time_period)
if (
credit is None
or tax_unit_ids is None
or person_tax_unit_ids is None
or tuition is None
):
return data

credit = np.asarray(credit)
tax_unit_ids = np.asarray(tax_unit_ids)
person_tax_unit_ids = np.asarray(person_tax_unit_ids)
tuition = np.array(tuition, copy=True)
if len(credit) != len(tax_unit_ids) or len(tuition) != len(person_tax_unit_ids):
logger.warning(
"Skipping AOTC eligibility imputation due to entity length mismatch"
)
return data

aotc_student = np.zeros(len(person_tax_unit_ids), dtype=bool)

full_time = data.get("is_full_time_college_student", {}).get(time_period)
full_time = (
np.asarray(full_time, dtype=bool)
if full_time is not None
else np.zeros(len(person_tax_unit_ids), dtype=bool)
)
dependent = data.get("is_tax_unit_dependent", {}).get(time_period)
dependent = (
np.asarray(dependent, dtype=bool)
if dependent is not None
else np.zeros(len(person_tax_unit_ids), dtype=bool)
)

positive_credit = credit > 0
if not positive_credit.any():
return data

positive_credit_units = tax_unit_ids[positive_credit]
credit_by_tax_unit_id = dict(zip(tax_unit_ids, credit))
imputed_tuition_count = 0
for tax_unit_id in positive_credit_units:
member_indices = np.flatnonzero(person_tax_unit_ids == tax_unit_id)
if member_indices.size == 0:
continue

tuition_indices = member_indices[tuition[member_indices] > 0]
if tuition_indices.size > 0:
aotc_student[tuition_indices] = True
continue

preferred = member_indices[full_time[member_indices]]
if preferred.size == 0:
preferred = member_indices[dependent[member_indices]]
if preferred.size == 0:
preferred = member_indices

selected = preferred[0]
aotc_student[selected] = True
tuition[selected] = max(
tuition[selected],
cls._aotc_qualifying_expenses_from_credit(
credit_by_tax_unit_id[tax_unit_id]
),
)
imputed_tuition_count += 1

for variable in (
"is_pursuing_credential_for_american_opportunity_credit",
"attends_eligible_educational_institution_for_american_opportunity_credit",
"is_enrolled_at_least_half_time_for_american_opportunity_credit",
"has_american_opportunity_credit_1098_t_or_exception",
):
existing = data.get(variable, {}).get(time_period)
values = (
np.asarray(existing, dtype=bool).copy()
if existing is not None
else np.zeros(len(person_tax_unit_ids), dtype=bool)
)
values[aotc_student] = True
data[variable] = {time_period: values}

for variable in (
"has_completed_first_four_years_of_postsecondary_education",
"has_felony_drug_conviction",
):
existing = data.get(variable, {}).get(time_period)
values = (
np.asarray(existing, dtype=bool).copy()
if existing is not None
else np.zeros(len(person_tax_unit_ids), dtype=bool)
)
values[aotc_student] = False
data[variable] = {time_period: values}

existing_prior_years = data.get(
"american_opportunity_credit_claimed_prior_years", {}
).get(time_period)
prior_years = (
np.asarray(existing_prior_years).copy()
if existing_prior_years is not None
else np.zeros(len(person_tax_unit_ids), dtype=np.int8)
)
prior_years[aotc_student] = np.minimum(prior_years[aotc_student], 3)
data["american_opportunity_credit_claimed_prior_years"] = {
time_period: prior_years
}
data["qualified_tuition_expenses"] = {time_period: tuition}
logger.info(
"AOTC eligibility imputation populated inputs for %d people "
"across %d tax units and filled tuition for %d people",
int(aotc_student.sum()),
int(positive_credit.sum()),
imputed_tuition_count,
)
return data

@classmethod
def _rename_imputed_to_inputs(cls, data):
"""Rename QRF-imputed formula vars to their leaf inputs.
Expand Down
75 changes: 75 additions & 0 deletions tests/unit/test_extended_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,81 @@ def test_positive_mortgage_input_detects_positive_deductible_interest(self):
assert ExtendedCPS._has_positive_mortgage_input(data, 2024) is True


class TestAOTCEligibilityInputImputation:
def test_leaves_data_unchanged_without_positive_aotc_signal(self):
data = {
"american_opportunity_credit": {2024: np.array([0.0])},
"tax_unit_id": {2024: np.array([1])},
"person_tax_unit_id": {2024: np.array([1])},
"qualified_tuition_expenses": {2024: np.array([1_200.0])},
}

result = ExtendedCPS._impute_aotc_eligibility_inputs(data, 2024)

assert "is_pursuing_credential_for_american_opportunity_credit" not in result
np.testing.assert_array_equal(
result["qualified_tuition_expenses"][2024],
np.array([1_200.0]),
)

def test_marks_tuition_members_in_positive_aotc_tax_units(self):
data = {
"american_opportunity_credit": {2024: np.array([1_000.0, 0.0])},
"tax_unit_id": {2024: np.array([1, 2])},
"person_tax_unit_id": {2024: np.array([1, 1, 2])},
"qualified_tuition_expenses": {2024: np.array([1_200.0, 0.0, 1_200.0])},
"is_full_time_college_student": {2024: np.array([False, True, True])},
}

result = ExtendedCPS._impute_aotc_eligibility_inputs(data, 2024)

expected = np.array([True, False, False])
for variable in (
"is_pursuing_credential_for_american_opportunity_credit",
"attends_eligible_educational_institution_for_american_opportunity_credit",
"is_enrolled_at_least_half_time_for_american_opportunity_credit",
"has_american_opportunity_credit_1098_t_or_exception",
):
np.testing.assert_array_equal(result[variable][2024], expected)
for variable in (
"has_completed_first_four_years_of_postsecondary_education",
"has_felony_drug_conviction",
):
np.testing.assert_array_equal(result[variable][2024], np.zeros(3, bool))
np.testing.assert_array_equal(
result["american_opportunity_credit_claimed_prior_years"][2024],
np.zeros(3, dtype=np.int8),
)
np.testing.assert_array_equal(
result["qualified_tuition_expenses"][2024],
np.array([1_200.0, 0.0, 1_200.0]),
)

def test_fills_tuition_when_positive_aotc_unit_has_no_tuition(self):
data = {
"american_opportunity_credit": {2024: np.array([2_500.0])},
"tax_unit_id": {2024: np.array([1])},
"person_tax_unit_id": {2024: np.array([1, 1])},
"qualified_tuition_expenses": {2024: np.array([0.0, 0.0])},
"is_full_time_college_student": {2024: np.array([False, True])},
}

result = ExtendedCPS._impute_aotc_eligibility_inputs(data, 2024)

expected = np.array([False, True])
for variable in (
"is_pursuing_credential_for_american_opportunity_credit",
"attends_eligible_educational_institution_for_american_opportunity_credit",
"is_enrolled_at_least_half_time_for_american_opportunity_credit",
"has_american_opportunity_credit_1098_t_or_exception",
):
np.testing.assert_array_equal(result[variable][2024], expected)
np.testing.assert_array_equal(
result["qualified_tuition_expenses"][2024],
np.array([0.0, 4_000.0]),
)


class TestCloneChildcareDerivation:
"""Clone-half capped childcare should be derived deterministically."""

Expand Down
Loading