From 82bbfd717f087298d5bd0bf838b58e71235e97b6 Mon Sep 17 00:00:00 2001 From: Rahul Aggarwal Date: Thu, 12 Mar 2026 01:21:58 -0700 Subject: [PATCH 1/3] Add 2026 tax year support Update federal income withholding tables (single, married, HoH), social security wage limit, and valid tax year configuration for 2026. --- src/python_taxes/__init__.py | 2 +- src/python_taxes/federal/__init__.py | 4 +- .../income/tables/percentage/automated/hoh.py | 100 ++++++++++++++++++ .../tables/percentage/automated/married.py | 100 ++++++++++++++++++ .../tables/percentage/automated/single.py | 100 ++++++++++++++++++ src/python_taxes/federal/social_security.py | 1 + 6 files changed, 304 insertions(+), 3 deletions(-) diff --git a/src/python_taxes/__init__.py b/src/python_taxes/__init__.py index ea8818c..446b809 100644 --- a/src/python_taxes/__init__.py +++ b/src/python_taxes/__init__.py @@ -2,6 +2,6 @@ from pydantic import Field -CURRENT_TAX_YEAR = 2025 +CURRENT_TAX_YEAR = 2026 currency_field = Field(ge=Decimal("0.00"), decimal_places=2) diff --git a/src/python_taxes/federal/__init__.py b/src/python_taxes/federal/__init__.py index 0076989..d6d7395 100644 --- a/src/python_taxes/federal/__init__.py +++ b/src/python_taxes/federal/__init__.py @@ -14,6 +14,6 @@ # AfterValidator for tax_year def is_valid_tax_year(value: int) -> int: - if value in [2023, 2024, 2025]: + if value in [2023, 2024, 2025, 2026]: return value - raise ValueError("Invalid tax year. Valid tax years are 2023, 2024, and 2025.") + raise ValueError("Invalid tax year. Valid tax years are 2023, 2024, 2025, and 2026.") diff --git a/src/python_taxes/federal/income/tables/percentage/automated/hoh.py b/src/python_taxes/federal/income/tables/percentage/automated/hoh.py index 0741dd9..90c2335 100644 --- a/src/python_taxes/federal/income/tables/percentage/automated/hoh.py +++ b/src/python_taxes/federal/income/tables/percentage/automated/hoh.py @@ -153,6 +153,56 @@ percent=37, ), ], + 2026: [ + RateRow( + min=Decimal("0.00"), + max=Decimal("15549.99"), + withhold_amount=Decimal("0.00"), + percent=0, + ), + RateRow( + min=Decimal("15550.00"), + max=Decimal("33249.99"), + withhold_amount=Decimal("0.00"), + percent=10, + ), + RateRow( + min=Decimal("33250.00"), + max=Decimal("82999.99"), + withhold_amount=Decimal("1770.00"), + percent=12, + ), + RateRow( + min=Decimal("83000.00"), + max=Decimal("121249.99"), + withhold_amount=Decimal("7740.00"), + percent=22, + ), + RateRow( + min=Decimal("121250.00"), + max=Decimal("217299.99"), + withhold_amount=Decimal("16155.00"), + percent=24, + ), + RateRow( + min=Decimal("217300.00"), + max=Decimal("271749.99"), + withhold_amount=Decimal("39207.00"), + percent=32, + ), + RateRow( + min=Decimal("271750.00"), + max=Decimal("656149.99"), + withhold_amount=Decimal("56631.00"), + percent=35, + ), + RateRow( + min=Decimal("656150.00"), + max=MAX, + withhold_amount=Decimal("191171.00"), + percent=37, + ), + ], } multiple_jobs = { @@ -306,4 +356,54 @@ percent=37, ), ], + 2026: [ + RateRow( + min=Decimal("0.00"), + max=Decimal("12074.99"), + withhold_amount=Decimal("0.00"), + percent=0, + ), + RateRow( + min=Decimal("12075.00"), + max=Decimal("20924.99"), + withhold_amount=Decimal("0.00"), + percent=10, + ), + RateRow( + min=Decimal("20925.00"), + max=Decimal("45799.99"), + withhold_amount=Decimal("885.00"), + percent=12, + ), + RateRow( + min=Decimal("45800.00"), + max=Decimal("64924.99"), + withhold_amount=Decimal("3870.00"), + percent=22, + ), + RateRow( + min=Decimal("64925.00"), + max=Decimal("112949.99"), + withhold_amount=Decimal("8077.50"), + percent=24, + ), + RateRow( + min=Decimal("112950.00"), + max=Decimal("140174.99"), + withhold_amount=Decimal("19603.50"), + percent=32, + ), + RateRow( + min=Decimal("140175.00"), + max=Decimal("332374.99"), + withhold_amount=Decimal("28315.50"), + percent=35, + ), + RateRow( + min=Decimal("332375.00"), + max=MAX, + withhold_amount=Decimal("95585.50"), + percent=37, + ), + ], } diff --git a/src/python_taxes/federal/income/tables/percentage/automated/married.py b/src/python_taxes/federal/income/tables/percentage/automated/married.py index 8a247b0..21802bf 100644 --- a/src/python_taxes/federal/income/tables/percentage/automated/married.py +++ b/src/python_taxes/federal/income/tables/percentage/automated/married.py @@ -153,6 +153,56 @@ percent=37, ), ], + 2026: [ + RateRow( + min=Decimal("0.00"), + max=Decimal("19299.99"), + withhold_amount=Decimal("0.00"), + percent=0, + ), + RateRow( + min=Decimal("19300.00"), + max=Decimal("44099.99"), + withhold_amount=Decimal("0.00"), + percent=10, + ), + RateRow( + min=Decimal("44100.00"), + max=Decimal("120099.99"), + withhold_amount=Decimal("2480.00"), + percent=12, + ), + RateRow( + min=Decimal("120100.00"), + max=Decimal("230699.99"), + withhold_amount=Decimal("11600.00"), + percent=22, + ), + RateRow( + min=Decimal("230700.00"), + max=Decimal("422849.99"), + withhold_amount=Decimal("35932.00"), + percent=24, + ), + RateRow( + min=Decimal("422850.00"), + max=Decimal("531749.99"), + withhold_amount=Decimal("82048.00"), + percent=32, + ), + RateRow( + min=Decimal("531750.00"), + max=Decimal("787999.99"), + withhold_amount=Decimal("116896.00"), + percent=35, + ), + RateRow( + min=Decimal("788000.00"), + max=MAX, + withhold_amount=Decimal("206583.50"), + percent=37, + ), + ], } multiple_jobs = { @@ -306,4 +356,54 @@ percent=37, ), ], + 2026: [ + RateRow( + min=Decimal("0.00"), + max=Decimal("16099.99"), + withhold_amount=Decimal("0.00"), + percent=0, + ), + RateRow( + min=Decimal("16100.00"), + max=Decimal("28499.99"), + withhold_amount=Decimal("0.00"), + percent=10, + ), + RateRow( + min=Decimal("28500.00"), + max=Decimal("66499.99"), + withhold_amount=Decimal("1240.00"), + percent=12, + ), + RateRow( + min=Decimal("66500.00"), + max=Decimal("121799.99"), + withhold_amount=Decimal("5800.00"), + percent=22, + ), + RateRow( + min=Decimal("121800.00"), + max=Decimal("217874.99"), + withhold_amount=Decimal("17966.00"), + percent=24, + ), + RateRow( + min=Decimal("217875.00"), + max=Decimal("272324.99"), + withhold_amount=Decimal("41024.00"), + percent=32, + ), + RateRow( + min=Decimal("272325.00"), + max=Decimal("400449.99"), + withhold_amount=Decimal("58448.00"), + percent=35, + ), + RateRow( + min=Decimal("400450.00"), + max=MAX, + withhold_amount=Decimal("103291.75"), + percent=37, + ), + ], } diff --git a/src/python_taxes/federal/income/tables/percentage/automated/single.py b/src/python_taxes/federal/income/tables/percentage/automated/single.py index afe9db9..ae6c8d2 100644 --- a/src/python_taxes/federal/income/tables/percentage/automated/single.py +++ b/src/python_taxes/federal/income/tables/percentage/automated/single.py @@ -153,6 +153,56 @@ percent=37, ), ], + 2026: [ + RateRow( + min=Decimal("0.00"), + max=Decimal("7499.99"), + withhold_amount=Decimal("0.00"), + percent=0, + ), + RateRow( + min=Decimal("7500.00"), + max=Decimal("19899.99"), + withhold_amount=Decimal("0.00"), + percent=10, + ), + RateRow( + min=Decimal("19900.00"), + max=Decimal("57899.99"), + withhold_amount=Decimal("1240.00"), + percent=12, + ), + RateRow( + min=Decimal("57900.00"), + max=Decimal("113199.99"), + withhold_amount=Decimal("5800.00"), + percent=22, + ), + RateRow( + min=Decimal("113200.00"), + max=Decimal("209274.99"), + withhold_amount=Decimal("17966.00"), + percent=24, + ), + RateRow( + min=Decimal("209275.00"), + max=Decimal("263724.99"), + withhold_amount=Decimal("41024.00"), + percent=32, + ), + RateRow( + min=Decimal("263725.00"), + max=Decimal("648099.99"), + withhold_amount=Decimal("58448.00"), + percent=35, + ), + RateRow( + min=Decimal("648100.00"), + max=MAX, + withhold_amount=Decimal("192979.25"), + percent=37, + ), + ], } multiple_jobs = { @@ -306,4 +356,54 @@ percent=37, ), ], + 2026: [ + RateRow( + min=Decimal("0.00"), + max=Decimal("8049.99"), + withhold_amount=Decimal("0.00"), + percent=0, + ), + RateRow( + min=Decimal("8050.00"), + max=Decimal("14249.99"), + withhold_amount=Decimal("0.00"), + percent=10, + ), + RateRow( + min=Decimal("14250.00"), + max=Decimal("33249.99"), + withhold_amount=Decimal("620.00"), + percent=12, + ), + RateRow( + min=Decimal("33250.00"), + max=Decimal("60899.99"), + withhold_amount=Decimal("2900.00"), + percent=22, + ), + RateRow( + min=Decimal("60900.00"), + max=Decimal("108937.99"), + withhold_amount=Decimal("8983.00"), + percent=24, + ), + RateRow( + min=Decimal("108938.00"), + max=Decimal("136162.99"), + withhold_amount=Decimal("20512.00"), + percent=32, + ), + RateRow( + min=Decimal("136163.00"), + max=Decimal("328349.99"), + withhold_amount=Decimal("29224.00"), + percent=35, + ), + RateRow( + min=Decimal("328350.00"), + max=MAX, + withhold_amount=Decimal("96489.63"), + percent=37, + ), + ], } diff --git a/src/python_taxes/federal/social_security.py b/src/python_taxes/federal/social_security.py index 9a325d1..7a115be 100644 --- a/src/python_taxes/federal/social_security.py +++ b/src/python_taxes/federal/social_security.py @@ -14,6 +14,7 @@ 2023: Decimal("160200"), 2024: Decimal("168600"), 2025: Decimal("176100"), + 2026: Decimal("184500"), } From 6b758a920449d2636795cab84f053689065e975d Mon Sep 17 00:00:00 2001 From: Rahul Aggarwal Date: Thu, 12 Mar 2026 04:24:30 -0700 Subject: [PATCH 2/3] Fix 2025 married standard_schedule max values (.9 -> .99) Five bracket max values in the 2025 married standard_schedule were missing a trailing digit, e.g. 40949.9 instead of 40949.99. This caused bracket boundaries to be off by $0.09. --- .../income/tables/percentage/automated/married.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/python_taxes/federal/income/tables/percentage/automated/married.py b/src/python_taxes/federal/income/tables/percentage/automated/married.py index 21802bf..a434d90 100644 --- a/src/python_taxes/federal/income/tables/percentage/automated/married.py +++ b/src/python_taxes/federal/income/tables/percentage/automated/married.py @@ -112,31 +112,31 @@ ), RateRow( min=Decimal("17100.00"), - max=Decimal("40949.9"), + max=Decimal("40949.99"), withhold_amount=Decimal("0.00"), percent=10, ), RateRow( min=Decimal("40950.00"), - max=Decimal("114049.9"), + max=Decimal("114049.99"), withhold_amount=Decimal("2385.00"), percent=12, ), RateRow( min=Decimal("114050.00"), - max=Decimal("223799.9"), + max=Decimal("223799.99"), withhold_amount=Decimal("11157.00"), percent=22, ), RateRow( min=Decimal("223800.00"), - max=Decimal("411699.9"), + max=Decimal("411699.99"), withhold_amount=Decimal("35302.00"), percent=24, ), RateRow( min=Decimal("411700.00"), - max=Decimal("518149.9"), + max=Decimal("518149.99"), withhold_amount=Decimal("80398.00"), percent=32, ), From 631ba1eca6f613fd3ec3cc4ada7d8e844308a71c Mon Sep 17 00:00:00 2001 From: Rahul Aggarwal Date: Thu, 12 Mar 2026 04:24:39 -0700 Subject: [PATCH 3/3] Add update_tax_year tool and tests Automates adding new tax year data by parsing IRS Publication 15-T for withholding tables and fetching the SS wage base from the Federal Register API. Includes 50 tests covering parsing, validation, code generation, and integration against 2024/2025/2026 PDFs. --- pyproject.toml | 3 + tests/tools/__init__.py | 0 tests/tools/update_tax_year_test.py | 479 +++++++++++++++++++ tools/update_tax_year.py | 682 ++++++++++++++++++++++++++++ 4 files changed, 1164 insertions(+) create mode 100644 tests/tools/__init__.py create mode 100644 tests/tools/update_tax_year_test.py create mode 100644 tools/update_tax_year.py diff --git a/pyproject.toml b/pyproject.toml index 0e68862..939e3ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,9 @@ dev = [ coverage = [ "pytest-cov (>=7.0.0,<8.0.0)" ] +tools = [ + "pdfplumber (>=0.11.0,<1.0.0)", +] [tool.poetry] packages = [{include = "python_taxes", from = "src"}] diff --git a/tests/tools/__init__.py b/tests/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/tools/update_tax_year_test.py b/tests/tools/update_tax_year_test.py new file mode 100644 index 0000000..105201b --- /dev/null +++ b/tests/tools/update_tax_year_test.py @@ -0,0 +1,479 @@ +"""Tests for tools/update_tax_year.py. + +Verifies the PDF extraction pipeline produces values that exactly match +the manually-verified data already committed in the codebase, for every +year where we have both a PDF and codebase data. + +Usage: + uv run --group tools --group test python -m pytest tests/tools/ -v +""" + +import sys +import urllib.request +from decimal import Decimal +from pathlib import Path + +import pytest + +# Make the tools directory importable +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "tools")) + +import update_tax_year as tool # noqa: E402 + +from python_taxes.federal.income.tables.percentage import MAX # noqa: E402 +from python_taxes.federal.income.tables.percentage.automated import ( # noqa: E402 + hoh, + married, + single, +) +from python_taxes.federal.social_security import wage_limit # noqa: E402 + +# Map (filing_status, schedule_type) to codebase dicts +CODEBASE_TABLES = { + ("single", "standard"): single.standard_schedule, + ("single", "multiple_jobs"): single.multiple_jobs, + ("married", "standard"): married.standard_schedule, + ("married", "multiple_jobs"): married.multiple_jobs, + ("hoh", "standard"): hoh.standard_schedule, + ("hoh", "multiple_jobs"): hoh.multiple_jobs, +} + + +# --------------------------------------------------------------------------- +# PDF download helpers +# --------------------------------------------------------------------------- + +PDF_DIR = Path("/tmp/claude") + +PDF_URLS = { + 2024: "https://www.irs.gov/pub/irs-prior/p15t--2024.pdf", + 2025: "https://www.irs.gov/pub/irs-prior/p15t--2025.pdf", + 2026: "https://www.irs.gov/pub/irs-prior/p15t--2026.pdf", +} + +PDF_PATHS = { + 2024: PDF_DIR / "p15t_2024.pdf", + 2025: PDF_DIR / "p15t_2025.pdf", + 2026: PDF_DIR / "p15t_2026.pdf", +} + + +def _ensure_pdf(year: int) -> bool: + """Download the PDF for a given year if not already cached. Returns True on success.""" + path = PDF_PATHS.get(year) + if path is None: + return False + if path.exists(): + return True + url = PDF_URLS.get(year) + if url is None: + return False + try: + PDF_DIR.mkdir(parents=True, exist_ok=True) + urllib.request.urlretrieve(url, path) + return path.exists() + except Exception: + return False + + +def _pdf_available(year: int) -> bool: + return _ensure_pdf(year) + + +def _extract_tables(year: int) -> list[tool.TableData]: + """Extract tables from a year's PDF.""" + import pdfplumber + + pdf_path = str(PDF_PATHS[year]) + with pdfplumber.open(pdf_path) as pdf: + page, detected_year = tool.find_target_page(pdf) + assert detected_year == year + return tool.extract_tables(page) + + +# --------------------------------------------------------------------------- +# Parsing unit tests (no PDF needed) +# --------------------------------------------------------------------------- + + +class TestParseDollar: + def test_simple(self): + assert tool.parse_dollar("$0") == Decimal("0") + + def test_with_cents(self): + assert tool.parse_dollar("$2,480.00") == Decimal("2480.00") + + def test_large(self): + assert tool.parse_dollar("$206,583.50") == Decimal("206583.50") + + def test_no_dollar_sign(self): + # Shouldn't happen in practice, but the function strips $ + assert tool.parse_dollar("19300") == Decimal("19300") + + +class TestParsePercent: + def test_zero(self): + assert tool.parse_percent("0%") == 0 + + def test_nonzero(self): + assert tool.parse_percent("37%") == 37 + + +class TestParseDataLine: + """Test the core line-parsing logic.""" + + def test_normal_row_10_tokens(self): + # Married row 2 from 2026 PDF + tokens = [ + "$19,300", "$44,100", "$0.00", "10%", "$19,300", + "$16,100", "$28,500", "$0.00", "10%", "$16,100", + ] + left, right = tool.parse_data_line(tokens) + + assert left.min == Decimal("19300") + assert left.max == Decimal("44099.99") + assert left.withhold_amount == Decimal("0.00") + assert left.percent == 10 + + assert right.min == Decimal("16100") + assert right.max == Decimal("28499.99") + assert right.withhold_amount == Decimal("0.00") + assert right.percent == 10 + + def test_last_row_8_tokens(self): + # Married last row from 2026 PDF + tokens = [ + "$788,000", "$206,583.50", "37%", "$788,000", + "$400,450", "$103,291.75", "37%", "$400,450", + ] + left, right = tool.parse_data_line(tokens) + + assert left.min == Decimal("788000") + assert left.max == MAX + assert left.withhold_amount == Decimal("206583.50") + assert left.percent == 37 + + assert right.min == Decimal("400450") + assert right.max == MAX + assert right.withhold_amount == Decimal("103291.75") + assert right.percent == 37 + + def test_bad_token_count_raises(self): + with pytest.raises(ValueError, match="Expected 8 or 10 tokens"): + tool.parse_data_line(["$0", "$100", "10%"]) + + +class TestParseHalf: + def test_5_tokens(self): + row = tool._parse_half(["$0", "$7,500", "$0.00", "0%", "$0"]) + assert row.min == Decimal("0") + assert row.max == Decimal("7499.99") + assert row.withhold_amount == Decimal("0.00") + assert row.percent == 0 + + def test_4_tokens_last_row(self): + row = tool._parse_half(["$648,100", "$192,979.25", "37%", "$648,100"]) + assert row.min == Decimal("648100") + assert row.max == MAX + assert row.withhold_amount == Decimal("192979.25") + assert row.percent == 37 + + def test_bad_count_raises(self): + with pytest.raises(ValueError, match="Expected 4 or 5 tokens"): + tool._parse_half(["$0", "$100"]) + + +# --------------------------------------------------------------------------- +# Validation unit tests (no PDF needed) +# --------------------------------------------------------------------------- + + +class TestValidation: + def _make_table(self, filing_status, schedule_type, rows): + return tool.TableData(filing_status, schedule_type, rows) + + def _good_rows(self): + """Minimal valid 3-bracket table.""" + return [ + tool.BracketRow(Decimal("0"), Decimal("9999.99"), Decimal("0"), 0), + tool.BracketRow(Decimal("10000"), Decimal("49999.99"), Decimal("0"), 10), + tool.BracketRow(Decimal("50000"), MAX, Decimal("4000"), 22), + ] + + def _make_6_tables(self, rows=None): + if rows is None: + rows = self._good_rows() + tables = [] + for status in ["married", "single", "hoh"]: + for sched in ["standard", "multiple_jobs"]: + tables.append(self._make_table(status, sched, rows)) + return tables + + def test_valid_tables_pass(self): + tables = self._make_6_tables() + tool.validate_tables(tables, 9999) # Should not raise + + def test_wrong_table_count_raises(self): + tables = self._make_6_tables()[:5] + with pytest.raises(ValueError, match="Expected 6 tables"): + tool.validate_tables(tables, 9999) + + def test_missing_filing_status_raises(self): + rows = self._good_rows() + tables = [] + for status in ["married", "single", "single"]: + for sched in ["standard", "multiple_jobs"]: + tables.append(self._make_table(status, sched, rows)) + with pytest.raises(ValueError, match="Missing table combinations"): + tool.validate_tables(tables, 9999) + + def test_different_row_counts_raises(self): + tables = self._make_6_tables() + # Give one table an extra row + bad_rows = self._good_rows() + [ + tool.BracketRow(Decimal("0"), MAX, Decimal("0"), 50), + ] + tables[0] = self._make_table("married", "standard", bad_rows) + with pytest.raises(ValueError, match="different row counts"): + tool.validate_tables(tables, 9999) + + def test_different_percentages_raises(self): + tables = self._make_6_tables() + # Change one table's percentages + bad_rows = [ + tool.BracketRow(Decimal("0"), Decimal("9999.99"), Decimal("0"), 0), + tool.BracketRow(Decimal("10000"), Decimal("49999.99"), Decimal("0"), 15), + tool.BracketRow(Decimal("50000"), MAX, Decimal("4000"), 22), + ] + tables[0] = self._make_table("married", "standard", bad_rows) + with pytest.raises(ValueError, match="different percentage sequences"): + tool.validate_tables(tables, 9999) + + def test_first_row_nonzero_min_raises(self): + bad_rows = [ + tool.BracketRow(Decimal("100"), Decimal("9999.99"), Decimal("0"), 0), + tool.BracketRow(Decimal("10000"), Decimal("49999.99"), Decimal("0"), 10), + tool.BracketRow(Decimal("50000"), MAX, Decimal("4000"), 22), + ] + tables = self._make_6_tables(bad_rows) + with pytest.raises(ValueError, match="first row min should be 0"): + tool.validate_tables(tables, 9999) + + def test_non_increasing_percentages_raises(self): + bad_rows = [ + tool.BracketRow(Decimal("0"), Decimal("9999.99"), Decimal("0"), 0), + tool.BracketRow(Decimal("10000"), Decimal("49999.99"), Decimal("0"), 10), + tool.BracketRow(Decimal("50000"), MAX, Decimal("4000"), 10), # same as prev + ] + tables = self._make_6_tables(bad_rows) + with pytest.raises(ValueError, match="not increasing"): + tool.validate_tables(tables, 9999) + + def test_bracket_gap_raises(self): + bad_rows = [ + tool.BracketRow(Decimal("0"), Decimal("9999.99"), Decimal("0"), 0), + tool.BracketRow(Decimal("10001"), Decimal("49999.99"), Decimal("0"), 10), # gap + tool.BracketRow(Decimal("50000"), MAX, Decimal("4000"), 22), + ] + tables = self._make_6_tables(bad_rows) + with pytest.raises(ValueError, match="min=10001"): + tool.validate_tables(tables, 9999) + + def test_last_row_not_max_raises(self): + bad_rows = [ + tool.BracketRow(Decimal("0"), Decimal("9999.99"), Decimal("0"), 0), + tool.BracketRow(Decimal("10000"), Decimal("49999.99"), Decimal("0"), 10), + tool.BracketRow(Decimal("50000"), Decimal("99999.99"), Decimal("4000"), 22), + ] + tables = self._make_6_tables(bad_rows) + with pytest.raises(ValueError, match="last row max should be MAX"): + tool.validate_tables(tables, 9999) + + +# --------------------------------------------------------------------------- +# Code generation unit tests (no PDF needed) +# --------------------------------------------------------------------------- + + +class TestCodeGeneration: + def test_generate_year_block(self): + rows = [ + tool.BracketRow(Decimal("0"), Decimal("9999.99"), Decimal("0"), 0), + tool.BracketRow(Decimal("10000"), MAX, Decimal("500.50"), 10), + ] + code = tool.generate_year_block(rows, 2099) + assert ' 2099: [' in code + assert 'min=Decimal("0.00")' in code + assert 'max=Decimal("9999.99")' in code + assert 'max=MAX' in code + assert 'withhold_amount=Decimal("500.50")' in code + assert 'percent=0,' in code + assert 'percent=10,' in code + + def test_format_decimal_always_2dp(self): + assert tool.format_decimal(Decimal("0")) == "0.00" + assert tool.format_decimal(Decimal("1234")) == "1234.00" + assert tool.format_decimal(Decimal("99.5")) == "99.50" + assert tool.format_decimal(Decimal("206583.50")) == "206583.50" + + +# --------------------------------------------------------------------------- +# Social Security wage base integration tests (require network) +# --------------------------------------------------------------------------- + + +class TestSSWageBase: + """Verify SS wage base fetched from Federal Register matches codebase data.""" + + @pytest.mark.parametrize("year", [2024, 2025, 2026]) + def test_matches_codebase(self, year): + fetched = tool.fetch_ss_wage_base(year) + assert fetched == wage_limit[year], ( + f"SS wage base mismatch for {year}: fetched {fetched} != codebase {wage_limit[year]}" + ) + + +# --------------------------------------------------------------------------- +# PDF extraction integration tests (require downloaded PDFs) +# --------------------------------------------------------------------------- + + +@pytest.mark.skipif( + not _pdf_available(2024), + reason="2024 PDF not available at /tmp/claude/p15t_2024.pdf", +) +class TestExtraction2024: + """Verify tool extraction matches every value in the 2024 codebase data.""" + + @pytest.fixture(scope="class") + def tables(self): + return _extract_tables(2024) + + def test_table_count(self, tables): + assert len(tables) == 6 + + def test_validates(self, tables): + tool.validate_tables(tables, 2024) + + @pytest.mark.parametrize( + "filing_status,schedule_type", + [ + ("single", "standard"), + ("single", "multiple_jobs"), + ("married", "standard"), + ("married", "multiple_jobs"), + ("hoh", "standard"), + ("hoh", "multiple_jobs"), + ], + ) + def test_matches_codebase(self, tables, filing_status, schedule_type): + extracted = { + (t.filing_status, t.schedule_type): t.rows for t in tables + } + tool_rows = extracted[(filing_status, schedule_type)] + codebase_rows = CODEBASE_TABLES[(filing_status, schedule_type)][2024] + + assert len(tool_rows) == len(codebase_rows) + for i, (ext, cb) in enumerate(zip(tool_rows, codebase_rows)): + assert ext.min == cb.min, f"row {i} min: {ext.min} != {cb.min}" + assert ext.max == cb.max, f"row {i} max: {ext.max} != {cb.max}" + assert ext.withhold_amount == cb.withhold_amount, ( + f"row {i} withhold: {ext.withhold_amount} != {cb.withhold_amount}" + ) + assert ext.percent == cb.percent, ( + f"row {i} percent: {ext.percent} != {cb.percent}" + ) + + +@pytest.mark.skipif( + not _pdf_available(2025), + reason="2025 PDF not available at /tmp/claude/p15t_2025.pdf", +) +class TestExtraction2025: + """Verify tool extraction matches every value in the 2025 codebase data.""" + + @pytest.fixture(scope="class") + def tables(self): + return _extract_tables(2025) + + def test_table_count(self, tables): + assert len(tables) == 6 + + def test_validates(self, tables): + tool.validate_tables(tables, 2025) + + @pytest.mark.parametrize( + "filing_status,schedule_type", + [ + ("single", "standard"), + ("single", "multiple_jobs"), + ("married", "standard"), + ("married", "multiple_jobs"), + ("hoh", "standard"), + ("hoh", "multiple_jobs"), + ], + ) + def test_matches_codebase(self, tables, filing_status, schedule_type): + extracted = { + (t.filing_status, t.schedule_type): t.rows for t in tables + } + tool_rows = extracted[(filing_status, schedule_type)] + codebase_rows = CODEBASE_TABLES[(filing_status, schedule_type)][2025] + + assert len(tool_rows) == len(codebase_rows) + for i, (ext, cb) in enumerate(zip(tool_rows, codebase_rows)): + assert ext.min == cb.min, f"row {i} min: {ext.min} != {cb.min}" + assert ext.max == cb.max, f"row {i} max: {ext.max} != {cb.max}" + assert ext.withhold_amount == cb.withhold_amount, ( + f"row {i} withhold: {ext.withhold_amount} != {cb.withhold_amount}" + ) + assert ext.percent == cb.percent, ( + f"row {i} percent: {ext.percent} != {cb.percent}" + ) + + +@pytest.mark.skipif( + not _pdf_available(2026), + reason="2026 PDF not available at /tmp/claude/p15t.pdf", +) +class TestExtraction2026: + """Verify tool extraction matches every value in the 2026 codebase data.""" + + @pytest.fixture(scope="class") + def tables(self): + return _extract_tables(2026) + + def test_table_count(self, tables): + assert len(tables) == 6 + + def test_validates(self, tables): + tool.validate_tables(tables, 2026) + + @pytest.mark.parametrize( + "filing_status,schedule_type", + [ + ("single", "standard"), + ("single", "multiple_jobs"), + ("married", "standard"), + ("married", "multiple_jobs"), + ("hoh", "standard"), + ("hoh", "multiple_jobs"), + ], + ) + def test_matches_codebase(self, tables, filing_status, schedule_type): + extracted = { + (t.filing_status, t.schedule_type): t.rows for t in tables + } + tool_rows = extracted[(filing_status, schedule_type)] + codebase_rows = CODEBASE_TABLES[(filing_status, schedule_type)][2026] + + assert len(tool_rows) == len(codebase_rows) + for i, (ext, cb) in enumerate(zip(tool_rows, codebase_rows)): + assert ext.min == cb.min, f"row {i} min: {ext.min} != {cb.min}" + assert ext.max == cb.max, f"row {i} max: {ext.max} != {cb.max}" + assert ext.withhold_amount == cb.withhold_amount, ( + f"row {i} withhold: {ext.withhold_amount} != {cb.withhold_amount}" + ) + assert ext.percent == cb.percent, ( + f"row {i} percent: {ext.percent} != {cb.percent}" + ) diff --git a/tools/update_tax_year.py b/tools/update_tax_year.py new file mode 100644 index 0000000..a619bca --- /dev/null +++ b/tools/update_tax_year.py @@ -0,0 +1,682 @@ +"""Update federal income tax withholding tables from IRS Publication 15-T. + +Downloads the PDF, extracts the Percentage Method Tables for Automated Payroll +Systems, validates the data, and updates the source files. Also fetches the +Social Security wage base from the Federal Register. + +Usage: + uv run --group tools python tools/update_tax_year.py [OPTIONS] + +Options: + --year YEAR Tax year to add (auto-detected from PDF if omitted) + --pdf PATH_OR_URL Path to local PDF or URL (default: IRS website) + --dry-run Print extracted data without modifying files +""" + +import argparse +import json +import re +import sys +import tempfile +import urllib.request +from decimal import Decimal +from pathlib import Path +from typing import NamedTuple + +MAX = Decimal("999999999999.99") + +IRS_PDF_URL = "https://www.irs.gov/pub/irs-pdf/p15t.pdf" + +REPO_ROOT = Path(__file__).resolve().parent.parent +SINGLE_PATH = ( + REPO_ROOT + / "src/python_taxes/federal/income/tables/percentage/automated/single.py" +) +MARRIED_PATH = ( + REPO_ROOT + / "src/python_taxes/federal/income/tables/percentage/automated/married.py" +) +HOH_PATH = ( + REPO_ROOT + / "src/python_taxes/federal/income/tables/percentage/automated/hoh.py" +) +SOCIAL_SECURITY_PATH = REPO_ROOT / "src/python_taxes/federal/social_security.py" +INIT_PATH = REPO_ROOT / "src/python_taxes/__init__.py" +FEDERAL_INIT_PATH = REPO_ROOT / "src/python_taxes/federal/__init__.py" + +# Federal Register API for Social Security wage base +FEDERAL_REGISTER_SEARCH_URL = ( + "https://www.federalregister.gov/api/v1/documents.json" + "?conditions[term]=%22cost-of-living+increase+and+other+determinations%22" + "&conditions[agencies][]=social-security-administration" + "&per_page=10&order=newest" +) +WAGE_BASE_RE = re.compile( + r"OASDI contribution and benefit base is \$([\d,]+)" +) + +# Regex to find the target page by title +PAGE_TITLE_RE = re.compile( + r"(\d{4})\s+Percentage Method Tables for Automated Payroll Systems" +) + +# Regex to extract monetary values ($X,XXX.XX) and percentages (XX%) +TOKEN_RE = re.compile(r"\$[\d,]+(?:\.\d+)?|\d+%") + +# Filing status patterns and their order on the page +FILING_STATUSES = [ + ("married", re.compile(r"Married Filing Jointly")), + ("single", re.compile(r"Single or Married Filing Separately")), + ("hoh", re.compile(r"Head of Household")), +] + + +class BracketRow(NamedTuple): + min: Decimal + max: Decimal + withhold_amount: Decimal + percent: int + + +class TableData(NamedTuple): + filing_status: str + schedule_type: str + rows: list[BracketRow] + + +# --------------------------------------------------------------------------- +# PDF acquisition +# --------------------------------------------------------------------------- + + +def acquire_pdf(source: str) -> str: + """Download PDF from URL or verify local path. Returns local file path.""" + if source.startswith(("http://", "https://")): + tmp = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) + print(f"Downloading PDF from {source} ...") + urllib.request.urlretrieve(source, tmp.name) + print(f"Saved to {tmp.name}") + return tmp.name + path = Path(source) + if not path.exists(): + raise FileNotFoundError(f"PDF not found: {source}") + return str(path) + + +# --------------------------------------------------------------------------- +# Page discovery +# --------------------------------------------------------------------------- + + +def find_target_page(pdf) -> tuple: + """Find the page with the Percentage Method Tables. + + Returns (page_object, detected_year). + """ + for page in pdf.pages: + text = page.extract_text() or "" + match = PAGE_TITLE_RE.search(text) + if match: + year = int(match.group(1)) + return page, year + raise ValueError( + "Could not find 'Percentage Method Tables for Automated Payroll " + "Systems' in any page of the PDF." + ) + + +# --------------------------------------------------------------------------- +# Parsing +# --------------------------------------------------------------------------- + + +def parse_dollar(token: str) -> Decimal: + """Parse a dollar token like '$19,300' or '$2,480.00' into a Decimal.""" + cleaned = token.replace("$", "").replace(",", "") + return Decimal(cleaned) + + +def parse_percent(token: str) -> int: + """Parse a percentage token like '10%' into an integer.""" + return int(token.replace("%", "")) + + +def parse_data_line(tokens: list[str]) -> tuple[BracketRow, BracketRow]: + """Parse a line's tokens into left (standard) and right (multiple_jobs) rows. + + Normal rows have 10 tokens: A B C D% E | A B C D% E + Last rows have 8 tokens: A C D% E | A C D% E + """ + n = len(tokens) + if n == 10: + left_tokens = tokens[:5] + right_tokens = tokens[5:] + elif n == 8: + left_tokens = tokens[:4] + right_tokens = tokens[4:] + else: + raise ValueError(f"Expected 8 or 10 tokens per data line, got {n}: {tokens}") + + return _parse_half(left_tokens), _parse_half(right_tokens) + + +def _parse_half(tokens: list[str]) -> BracketRow: + """Parse 4 or 5 tokens for one side of a data line into a BracketRow. + + 5 tokens: A, B, C, D%, E (normal row) + 4 tokens: A, C, D%, E (last row, no upper bound) + """ + if len(tokens) == 5: + a, b, c, d, _e = tokens + min_val = parse_dollar(a) + max_val = parse_dollar(b) - Decimal("0.01") + withhold = parse_dollar(c) + pct = parse_percent(d) + elif len(tokens) == 4: + a, c, d, _e = tokens + min_val = parse_dollar(a) + max_val = MAX + withhold = parse_dollar(c) + pct = parse_percent(d) + else: + raise ValueError(f"Expected 4 or 5 tokens, got {len(tokens)}: {tokens}") + + # Ensure withhold_amount always has 2 decimal places + withhold = withhold.quantize(Decimal("0.01")) + + return BracketRow( + min=min_val, + max=max_val, + withhold_amount=withhold, + percent=pct, + ) + + +def extract_tables(page) -> list[TableData]: + """Extract all 6 tax tables from the target page using text parsing. + + The page text has filing status headers followed by data lines for each + filing status. Each data line contains both left (standard) and right + (multiple_jobs) values as interleaved tokens. The last row of each table + is detected by having 8 tokens (no upper bound) instead of 10. + """ + text = page.extract_text() + lines = text.split("\n") + + tables: dict[str, dict[str, list[BracketRow]]] = {} + current_status = None + + for line in lines: + # Check for filing status header + for status_key, pattern in FILING_STATUSES: + if pattern.search(line): + current_status = status_key + if status_key not in tables: + tables[status_key] = {"standard": [], "multiple_jobs": []} + break + + # Check for data line (starts with $) + if current_status and line.strip().startswith("$"): + tokens = TOKEN_RE.findall(line) + if not tokens: + continue + + left_row, right_row = parse_data_line(tokens) + tables[current_status]["standard"].append(left_row) + tables[current_status]["multiple_jobs"].append(right_row) + + # Last row of a table has no upper bound (8 tokens instead of 10) + if len(tokens) == 8: + current_status = None + + # Build flat list of TableData + result = [] + for status_key in ["married", "single", "hoh"]: + if status_key not in tables: + raise ValueError(f"Missing filing status: {status_key}") + for schedule_type in ["standard", "multiple_jobs"]: + rows = tables[status_key][schedule_type] + result.append(TableData(status_key, schedule_type, rows)) + + return result + + +# --------------------------------------------------------------------------- +# Validation +# --------------------------------------------------------------------------- + + +def validate_tables(tables: list[TableData], year: int) -> None: + """Validate all 6 extracted tables against structural invariants. + + Does NOT hardcode the number of brackets or specific percentages, + so the tool remains correct if Congress changes the tax structure. + """ + if len(tables) != 6: + raise ValueError(f"Expected 6 tables, got {len(tables)}") + + expected = { + ("married", "standard"), + ("married", "multiple_jobs"), + ("single", "standard"), + ("single", "multiple_jobs"), + ("hoh", "standard"), + ("hoh", "multiple_jobs"), + } + actual = {(t.filing_status, t.schedule_type) for t in tables} + if actual != expected: + raise ValueError(f"Missing table combinations: {expected - actual}") + + # All 6 tables must have the same number of rows + row_counts = {f"{t.filing_status}/{t.schedule_type}": len(t.rows) for t in tables} + unique_counts = set(row_counts.values()) + if len(unique_counts) != 1: + raise ValueError(f"Tables have different row counts: {row_counts}") + + # All 6 tables must have the same percentage sequence + pct_sequences = { + f"{t.filing_status}/{t.schedule_type}": tuple(r.percent for r in t.rows) + for t in tables + } + unique_pcts = set(pct_sequences.values()) + if len(unique_pcts) != 1: + raise ValueError(f"Tables have different percentage sequences: {pct_sequences}") + + num_rows = unique_counts.pop() + pct_sequence = list(unique_pcts.pop()) + print(f" {num_rows} brackets, percentages: {pct_sequence}") + + for table in tables: + label = f"{table.filing_status}/{table.schedule_type}" + rows = table.rows + + if len(rows) < 2: + raise ValueError(f"{label}: need at least 2 rows, got {len(rows)}") + + # First row: starts at 0, no base withholding, 0% rate + if rows[0].min != Decimal("0.00"): + raise ValueError(f"{label}: first row min should be 0.00") + if rows[0].withhold_amount != Decimal("0.00"): + raise ValueError(f"{label}: first row withhold should be 0.00") + if rows[0].percent != 0: + raise ValueError(f"{label}: first row percent should be 0") + + # Percentages must be strictly increasing + for i in range(1, len(rows)): + if rows[i].percent <= rows[i - 1].percent: + raise ValueError( + f"{label}: percentages not increasing at row {i}: " + f"{rows[i-1].percent} -> {rows[i].percent}" + ) + + # Bracket continuity: each row's min == prev row's max + 0.01 + for i in range(1, len(rows)): + if rows[i - 1].max == MAX: + raise ValueError(f"{label}: row {i-1} has MAX but is not last") + expected_min = rows[i - 1].max + Decimal("0.01") + if rows[i].min != expected_min: + raise ValueError( + f"{label}: row {i} min={rows[i].min} != expected " + f"{expected_min} (prev max={rows[i-1].max})" + ) + + # Last row must have no upper bound + if rows[-1].max != MAX: + raise ValueError(f"{label}: last row max should be MAX") + + print(f"All 6 tables validated for year {year}.") + + +# --------------------------------------------------------------------------- +# Code generation +# --------------------------------------------------------------------------- + + +def format_decimal(value: Decimal) -> str: + """Format Decimal with exactly 2 decimal places.""" + return str(value.quantize(Decimal("0.01"))) + + +def generate_year_block(rows: list[BracketRow], year: int) -> str: + """Generate Python source for a year's RateRow list entry.""" + lines = [f" {year}: ["] + for row in rows: + min_str = format_decimal(row.min) + if row.max == MAX: + max_expr = "MAX" + else: + max_expr = f'Decimal("{format_decimal(row.max)}")' + withhold_str = format_decimal(row.withhold_amount) + + lines.append(" RateRow(") + lines.append(f' min=Decimal("{min_str}"),') + lines.append(f" max={max_expr},") + lines.append(f' withhold_amount=Decimal("{withhold_str}"),') + lines.append(f" percent={row.percent},") + lines.append(" ),") + lines.append(" ],") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# File modification +# --------------------------------------------------------------------------- + + +def update_table_file( + file_path: Path, + standard_rows: list[BracketRow], + multiple_jobs_rows: list[BracketRow], + year: int, + dry_run: bool, +) -> None: + """Insert a new year's data into a table file (single.py, married.py, hoh.py).""" + content = file_path.read_text() + + if f" {year}: [" in content: + print(f" {file_path.name}: year {year} already exists, skipping.") + return + + standard_code = generate_year_block(standard_rows, year) + multiple_code = generate_year_block(multiple_jobs_rows, year) + + # Insert into standard_schedule: find the closing "}" before "multiple_jobs" + pattern_std = re.compile( + r"(standard_schedule\s*=\s*\{.*?)(^\})", + re.MULTILINE | re.DOTALL, + ) + match = pattern_std.search(content) + if not match: + raise ValueError(f"Could not find standard_schedule closing brace in {file_path.name}") + content = content[: match.start(2)] + standard_code + "\n" + content[match.start(2) :] + + # Insert into multiple_jobs: find the closing "}" at end of file + pattern_mj = re.compile( + r"(multiple_jobs\s*=\s*\{.*?)(^\})\s*$", + re.MULTILINE | re.DOTALL, + ) + match = pattern_mj.search(content) + if not match: + raise ValueError(f"Could not find multiple_jobs closing brace in {file_path.name}") + content = content[: match.start(2)] + multiple_code + "\n" + content[match.start(2) :] + + if dry_run: + print(f" {file_path.name}: would insert standard_schedule[{year}] and multiple_jobs[{year}]") + else: + file_path.write_text(content) + print(f" {file_path.name}: updated.") + + +def update_current_tax_year(year: int, dry_run: bool) -> None: + """Update CURRENT_TAX_YEAR in __init__.py.""" + content = INIT_PATH.read_text() + pattern = re.compile(r"CURRENT_TAX_YEAR\s*=\s*\d+") + match = pattern.search(content) + if not match: + raise ValueError("Could not find CURRENT_TAX_YEAR in __init__.py") + + new_content = pattern.sub(f"CURRENT_TAX_YEAR = {year}", content) + + if dry_run: + print(f" __init__.py: would set CURRENT_TAX_YEAR = {year}") + else: + INIT_PATH.write_text(new_content) + print(f" __init__.py: set CURRENT_TAX_YEAR = {year}") + + +def fetch_ss_wage_base(year: int) -> Decimal: + """Fetch the Social Security wage base for a given year from the Federal Register. + + Searches for the SSA's annual "Cost-of-Living Increase and Other Determinations" + notice and extracts the OASDI contribution and benefit base. + """ + print(f" Fetching SS wage base for {year} from Federal Register ...") + + # Search for the COLA notice for this year + req = urllib.request.Request( + FEDERAL_REGISTER_SEARCH_URL, + headers={"Accept": "application/json"}, + ) + with urllib.request.urlopen(req) as resp: + data = json.loads(resp.read()) + + # Find the document for our target year + target_title = f"for {year}" + doc = None + for result in data.get("results", []): + if target_title.lower() in result.get("title", "").lower(): + doc = result + break + + if doc is None: + raise ValueError( + f"Could not find Federal Register COLA notice for {year}. " + f"Searched {len(data.get('results', []))} results." + ) + + doc_number = doc["document_number"] + print(f" Found document: {doc['title']} ({doc_number})") + + # Fetch the document details to get the raw text URL + detail_url = ( + f"https://www.federalregister.gov/api/v1/documents/{doc_number}.json" + f"?fields[]=raw_text_url" + ) + detail_req = urllib.request.Request( + detail_url, headers={"Accept": "application/json"} + ) + with urllib.request.urlopen(detail_req) as resp: + detail = json.loads(resp.read()) + + raw_text_url = detail.get("raw_text_url") + if not raw_text_url: + raise ValueError(f"No raw_text_url for document {doc_number}") + + # Fetch and parse the raw text + with urllib.request.urlopen(raw_text_url) as resp: + text = resp.read().decode("utf-8") + + match = WAGE_BASE_RE.search(text) + if not match: + raise ValueError( + f"Could not find OASDI contribution and benefit base in document {doc_number}" + ) + + wage_base = Decimal(match.group(1).replace(",", "")) + print(f" SS wage base for {year}: ${wage_base}") + return wage_base + + +def update_ss_wage_base(year: int, wage_base: Decimal, dry_run: bool) -> None: + """Add the SS wage base for a year to social_security.py.""" + content = SOCIAL_SECURITY_PATH.read_text() + + if f" {year}: Decimal(" in content: + print(f" social_security.py: year {year} already exists, skipping.") + return + + # Find the closing "}" of the wage_limit dict + pattern = re.compile( + r"(wage_limit\s*=\s*\{.*?)(^\})", + re.MULTILINE | re.DOTALL, + ) + match = pattern.search(content) + if not match: + raise ValueError("Could not find wage_limit closing brace in social_security.py") + + new_entry = f' {year}: Decimal("{int(wage_base)}"),\n' + content = content[: match.start(2)] + new_entry + content[match.start(2) :] + + if dry_run: + print(f" social_security.py: would add wage_limit[{year}] = ${wage_base}") + else: + SOCIAL_SECURITY_PATH.write_text(content) + print(f" social_security.py: added wage_limit[{year}] = ${wage_base}") + + +def update_valid_tax_years(year: int, dry_run: bool) -> None: + """Add year to is_valid_tax_year in federal/__init__.py.""" + content = FEDERAL_INIT_PATH.read_text() + + # Update the year list: "if value in [2023, 2024, 2025, 2026]:" + list_pattern = re.compile(r"(if value in \[)([\d, ]+)(\]:)") + list_match = list_pattern.search(content) + if not list_match: + raise ValueError("Could not find year list in is_valid_tax_year") + + existing_years = list_match.group(2) + if str(year) in existing_years.split(", "): + print(f" federal/__init__.py: year {year} already present, skipping.") + return + + new_years = existing_years + f", {year}" + content = content[: list_match.start(2)] + new_years + content[list_match.end(2) :] + + # Update error message: "Valid tax years are 2023, 2024, 2025, and 2026." + msg_pattern = re.compile( + r"(Valid tax years are )([\d, ]+),\s+and\s+(\d+)\." + ) + msg_match = msg_pattern.search(content) + if msg_match: + old_last = msg_match.group(3) + prefix_years = msg_match.group(2) + ", " + old_last + new_msg = f"{msg_match.group(1)}{prefix_years}, and {year}." + content = content[: msg_match.start()] + new_msg + content[msg_match.end() :] + + if dry_run: + print(f" federal/__init__.py: would add {year} to is_valid_tax_year") + else: + FEDERAL_INIT_PATH.write_text(content) + print(f" federal/__init__.py: added {year} to is_valid_tax_year") + + +# --------------------------------------------------------------------------- +# Display +# --------------------------------------------------------------------------- + + +def print_tables(tables: list[TableData]) -> None: + """Pretty-print extracted tables.""" + for table in tables: + print(f"\n {table.filing_status} / {table.schedule_type}:") + for row in table.rows: + max_str = "MAX" if row.max == MAX else f"${format_decimal(row.max):>15}" + print( + f" min=${format_decimal(row.min):>12} " + f"max={max_str:>16} " + f"withhold=${format_decimal(row.withhold_amount):>12} " + f"pct={row.percent:>2}%" + ) + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Update tax year tables from IRS Publication 15-T" + ) + parser.add_argument( + "--year", + type=int, + default=None, + help="Tax year to add (auto-detected from PDF if omitted)", + ) + parser.add_argument( + "--pdf", + type=str, + default=IRS_PDF_URL, + help="Path to local PDF or URL to download", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Print extracted data without modifying files", + ) + return parser.parse_args() + + +def main() -> None: + args = parse_args() + + try: + import pdfplumber + except ImportError: + print( + "Error: pdfplumber is required. Install with:\n" + " uv sync --group tools", + file=sys.stderr, + ) + sys.exit(1) + + # Step 1: Acquire PDF + pdf_path = acquire_pdf(args.pdf) + + # Step 2: Find target page + with pdfplumber.open(pdf_path) as pdf: + page, detected_year = find_target_page(pdf) + + year = args.year or detected_year + print(f"Tax year: {year}") + + if args.year and args.year != detected_year: + print( + f"WARNING: specified year ({args.year}) differs from " + f"PDF year ({detected_year})" + ) + + # Step 3: Extract tables + tables = extract_tables(page) + + # Step 4: Validate + validate_tables(tables, year) + + if args.dry_run: + print("\n--- Extracted data (dry run) ---") + print_tables(tables) + print() + + # Organize by filing status + by_status: dict[str, dict[str, list[BracketRow]]] = {} + for t in tables: + by_status.setdefault(t.filing_status, {})[t.schedule_type] = t.rows + + # Step 5: Update files + print("\nUpdating files:") + file_map = { + "single": SINGLE_PATH, + "married": MARRIED_PATH, + "hoh": HOH_PATH, + } + for status, path in file_map.items(): + update_table_file( + path, + by_status[status]["standard"], + by_status[status]["multiple_jobs"], + year, + args.dry_run, + ) + + # Step 6: Fetch and update Social Security wage base + print("\nSocial Security wage base:") + wage_base = fetch_ss_wage_base(year) + update_ss_wage_base(year, wage_base, args.dry_run) + + update_current_tax_year(year, args.dry_run) + update_valid_tax_years(year, args.dry_run) + + if not args.dry_run: + print(f"\nDone! Tax year {year} has been added.") + print("Next steps:") + print(" 1. Review changes: git diff") + print(" 2. Run tests: uv run --group test python -m pytest tests/") + + +if __name__ == "__main__": + try: + main() + except Exception as e: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1)