From f4c892e6d9ee6b94592d161ba9127f6a98a4bc73 Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 21:53:43 +0200
Subject: [PATCH 01/18] feat: add code_system discriminator field to NUTSResult

---
 app/models.py     | 7 +++++++
 tests/test_api.py | 5 +++++
 2 files changed, 12 insertions(+)

diff --git a/app/models.py b/app/models.py
index ee141db..e378914 100644
--- a/app/models.py
+++ b/app/models.py
@@ -6,6 +6,13 @@
 class NUTSResult(BaseModel):
     postal_code: str = Field(description="The queried postal code (normalized)")
     country_code: str = Field(description="ISO 3166-1 alpha-2 country code")
+    code_system: Literal["NUTS", "ITL"] = Field(
+        default="NUTS",
+        description=(
+            "Territorial coding scheme of the nuts1/2/3 fields. 'NUTS' for GISCO-sourced "
+            "EU/EFTA/candidate data; 'ITL' for UK data from the ONS NSPL."
+        ),
+    )
     match_type: Literal["exact", "estimated", "approximate"] = Field(
         description="How the result was determined"
     )
diff --git a/tests/test_api.py b/tests/test_api.py
index de9bcd6..49f7cc8 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -17,6 +17,11 @@ def test_200_cache_header(self, client):
         resp = client.get("/lookup", params={"postal_code": "10115", "country": "DE"})
         assert "public" in resp.headers.get("cache-control", "")
 
+    def test_response_includes_code_system_nuts(self, client):
+        resp = client.get("/lookup", params={"postal_code": "10115", "country": "DE"})
+        assert resp.status_code == 200
+        assert resp.json()["code_system"] == "NUTS"
+
     def test_400_unsupported_country(self, client):
         resp = client.get("/lookup", params={"postal_code": "12345", "country": "ZZ"})
         assert resp.status_code == 400

From 665f21179696a0b9702350e7bf321c334d3672fd Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 21:54:49 +0200
Subject: [PATCH 02/18] feat: add UK postcode regex and bump patterns version
 to 1.3

---
 app/postal_patterns.json      |  7 ++++++-
 tests/test_postal_patterns.py | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/app/postal_patterns.json b/app/postal_patterns.json
index 7e2fdd3..a0ea06c 100644
--- a/app/postal_patterns.json
+++ b/app/postal_patterns.json
@@ -1,5 +1,5 @@
 {
-  "_meta": { "version": "1.2", "date": "2026-07-02" },
+  "_meta": { "version": "1.3", "date": "2026-07-03" },
   "AL": {
     "regex": "^(?:AL[\\s\\-–—.]*)?([0-9]{4})$",
     "example": "1001, AL-1001, AL 1001"
@@ -182,5 +182,10 @@
     "regex": "^(?:TR[\\s\\-\u2013\u2014.]*)?(\\d{5})$",
     "example": "06100, TR-06100, TR 06100",
     "expected_digits": 5
+  },
+  "UK": {
+    "regex": "^([A-Z]{1,2}[0-9][0-9A-Z]?\\s?[0-9][A-Z]{2})$",
+    "example": "SW1A 2AA, EC1A 1BB, M1 1AA, B33 8TH",
+    "tercet_map": "outward_only"
   }
 }
diff --git a/tests/test_postal_patterns.py b/tests/test_postal_patterns.py
index 76fe1cd..f121d02 100644
--- a/tests/test_postal_patterns.py
+++ b/tests/test_postal_patterns.py
@@ -1,6 +1,13 @@
 """Tests for postal_patterns.py — preprocessing, tercet_map, extraction."""
 
-from app.postal_patterns import _apply_tercet_map, _preprocess, extract_postal_code
+import pytest
+
+from app.postal_patterns import (
+    PATTERNS_META,
+    _apply_tercet_map,
+    _preprocess,
+    extract_postal_code,
+)
 
 
 # ── _preprocess tests ─────────────────────────────────────────────────────────
@@ -177,3 +184,27 @@ def test_lowercase_prefix(self):
     def test_three_digit_not_matched_as_four(self):
         # Too short: regex requires exactly 4 digits; must NOT become a 4-digit code.
         assert extract_postal_code("AL", "100") != "1000"
+
+
+class TestUKExtraction:
+    @pytest.mark.parametrize(
+        "raw, expected",
+        [
+            ("SW1A 2AA", "SW1A2AA"),
+            ("sw1a 2aa", "SW1A2AA"),
+            ("SW1A2AA", "SW1A2AA"),
+            ("M1 1AA", "M11AA"),
+            ("B33 8TH", "B338TH"),
+            ("W1A 1HQ", "W1A1HQ"),
+            ("CR2 6XH", "CR26XH"),
+            ("DN55 1PT", "DN551PT"),
+            ("EC1A 1BB", "EC1A1BB"),
+        ],
+    )
+    def test_uk_regex_extracts_normalized_full_postcode(self, raw, expected):
+        assert extract_postal_code("UK", raw) == expected
+
+
+def test_patterns_meta_version_bumped():
+    # Adding UK is an additive coverage change; minor version bump.
+    assert PATTERNS_META["version"] == "1.3"

From c033ff9bdaef3dd83780509179b1d0f4ead2db36 Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 21:55:26 +0200
Subject: [PATCH 03/18] feat: add outward_only action and extract_outward
 helper

---
 app/postal_patterns.py        | 30 +++++++++++++++++++++++++++---
 tests/test_postal_patterns.py | 27 +++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/app/postal_patterns.py b/app/postal_patterns.py
index 31e5833..c4725d3 100644
--- a/app/postal_patterns.py
+++ b/app/postal_patterns.py
@@ -5,9 +5,11 @@
   - example:         Human-readable format examples
   - tercet_map:      Optional transform to align extracted code with TERCET lookup key.
                      Supported actions:
-                       truncate:N  — keep only the first N characters
-                       prepend:XX  — prepend string XX to the extracted code
-                       keep_alpha  — keep only leading alphabetic characters
+                       truncate:N   — keep only the first N characters
+                       prepend:XX   — prepend string XX to the extracted code
+                       keep_alpha   — keep only leading alphabetic characters
+                       outward_only — marker: country supports outward-code
+                                      fallback (lookup Tier 3.5); no key transform
   - expected_digits: Expected number of digits for all-numeric postal codes.
                      Used by _preprocess() to restore leading zeros lost in Excel/CSV
                      exports (e.g. "8461" → "08461" for ES with expected_digits=5).
@@ -77,9 +79,31 @@ def _apply_tercet_map(code: str, rule: str) -> str:
     if action == "keep_alpha":
         m = re.match(r"^([A-Z]+)", code)
         return m.group(1) if m else code
+    if action == "outward_only":
+        # Marker: the country supports outward-code-only fallback (lookup Tier 3.5).
+        # It does not transform the Tier 1 key; see extract_outward().
+        return code
     return code
 
 
+def extract_outward(country_code: str, raw_input: str) -> str | None:
+    """Return the outward (district) portion for countries flagged outward_only.
+
+    For UK postcodes, the outward portion is the normalised code minus its last
+    three characters (the inward code). Input shorter than 4 chars after
+    normalisation is treated as already being an outward code (e.g. bare "SW1A").
+
+    Returns None for countries that do not declare tercet_map="outward_only".
+    """
+    entry = POSTAL_PATTERNS.get(country_code)
+    if not entry or entry.get("tercet_map") != "outward_only":
+        return None
+    normalised = normalize_postal_code(raw_input)
+    if len(normalised) <= 4:
+        return normalised
+    return normalised[:-3]
+
+
 def extract_postal_code(country_code: str, raw_input: str) -> str:
     """Extract and normalize postal code using country-specific pattern.
 
diff --git a/tests/test_postal_patterns.py b/tests/test_postal_patterns.py
index f121d02..4f57392 100644
--- a/tests/test_postal_patterns.py
+++ b/tests/test_postal_patterns.py
@@ -6,6 +6,7 @@
     PATTERNS_META,
     _apply_tercet_map,
     _preprocess,
+    extract_outward,
     extract_postal_code,
 )
 
@@ -208,3 +209,29 @@ def test_uk_regex_extracts_normalized_full_postcode(self, raw, expected):
 def test_patterns_meta_version_bumped():
     # Adding UK is an additive coverage change; minor version bump.
     assert PATTERNS_META["version"] == "1.3"
+
+
+class TestExtractOutward:
+    @pytest.mark.parametrize(
+        "raw, expected_outward",
+        [
+            ("SW1A 2AA", "SW1A"),
+            ("sw1a2aa", "SW1A"),
+            ("M1 1AA", "M1"),
+            ("B33 8TH", "B33"),
+            ("EC1A 1BB", "EC1A"),
+            ("DN55 1PT", "DN55"),
+            ("SW1A", "SW1A"),  # outward-only input
+            ("M1", "M1"),
+        ],
+    )
+    def test_extract_outward_for_uk(self, raw, expected_outward):
+        assert extract_outward("UK", raw) == expected_outward
+
+    def test_returns_none_for_country_without_flag(self):
+        # AT does not declare outward_only; outward extraction is undefined.
+        assert extract_outward("AT", "1010") is None
+
+    def test_extract_postal_code_unaffected_by_outward_only_flag(self):
+        # Tier 1 lookup for UK must still yield the full normalised postcode.
+        assert extract_postal_code("UK", "SW1A 2AA") == "SW1A2AA"

From 3ac8260d9d3f60cc1ce2b5b1bfd5768a746b0cef Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 21:56:28 +0200
Subject: [PATCH 04/18] feat: recognise NSPL pcds/itl columns in
 _parse_csv_content

---
 app/data_loader.py        | 17 +++++++++++++----
 tests/test_data_loader.py | 11 +++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/app/data_loader.py b/app/data_loader.py
index a3720bc..915af80 100644
--- a/app/data_loader.py
+++ b/app/data_loader.py
@@ -247,16 +247,25 @@ def _parse_csv_content(text: str, country_code: str, *, overwrite: bool = False)
         reader = csv.DictReader(io.StringIO(text), delimiter=delimiter)
     fieldnames = [f.strip().upper() for f in (reader.fieldnames or [])]
 
-    # Find the postal code column
+    # Find the postal code column ("PCDS" is the NSPL formatted-postcode column)
     pc_col = None
-    for candidate in ("CODE", "PC", "POSTAL_CODE", "POSTCODE", "PC_FMT"):
+    for candidate in ("CODE", "PC", "POSTAL_CODE", "POSTCODE", "PC_FMT", "PCDS"):
         if candidate in fieldnames:
             pc_col = candidate
             break
 
-    # Find the NUTS3 column — prefer current version, never fall back to old versions
+    # Find the NUTS3 column — prefer current version, never fall back to old versions.
+    # ITL* candidates cover the UK NSPL dataset (ITL3 codes are NUTS3-equivalent).
     nuts3_col = None
-    for candidate in (f"NUTS3_{settings.nuts_version}", "NUTS3", "NUTS_ID", "NUTS"):
+    for candidate in (
+        f"NUTS3_{settings.nuts_version}",
+        "NUTS3",
+        "NUTS_ID",
+        "NUTS",
+        "ITL3CD",
+        "ITL3",
+        "ITL",
+    ):
         if candidate in fieldnames:
             nuts3_col = candidate
             break
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index a43d951..fd96ca9 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -1,5 +1,6 @@
 """Tests for data_loader.py — normalize functions and lookup tiers."""
 
+from app import data_loader
 from app.data_loader import lookup, normalize_country, normalize_postal_code
 
 
@@ -287,3 +288,13 @@ def test_sample_codes_resolve_estimated(self):
             assert result["nuts3"] in self.VALID_AL_NUTS3
             assert result["nuts2"] == result["nuts3"][:4]
             assert result["nuts1"] == "AL0"
+
+
+class TestNSPLColumnParsing:
+    def test_parse_csv_recognises_nspl_columns(self, monkeypatch):
+        monkeypatch.setattr(data_loader, "_lookup", {})
+        nspl_csv = "pcds,itl,doterm\nSW1A 2AA,TLI32,\nEC1A 1BB,TLI32,\n"
+        rows = data_loader._parse_csv_content(nspl_csv, "UK")
+        assert rows == 2
+        assert data_loader._lookup[("UK", "SW1A2AA")] == "TLI32"
+        assert data_loader._lookup[("UK", "EC1A1BB")] == "TLI32"

From 2f9f78018070706b32d1f984a0715f5d78d16652 Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 21:57:07 +0200
Subject: [PATCH 05/18] feat: add skip_terminated flag to filter NSPL doterm
 rows

---
 app/data_loader.py        | 26 ++++++++++++++++++++++++--
 tests/test_data_loader.py | 18 ++++++++++++++++++
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/app/data_loader.py b/app/data_loader.py
index 915af80..837ac0d 100644
--- a/app/data_loader.py
+++ b/app/data_loader.py
@@ -232,8 +232,19 @@ def _sniff_dialect(text: str) -> csv.Dialect | None:
         return None
 
 
-def _parse_csv_content(text: str, country_code: str, *, overwrite: bool = False) -> int:
-    """Parse CSV/TSV content and populate the lookup table. Returns row count."""
+def _parse_csv_content(
+    text: str,
+    country_code: str,
+    *,
+    overwrite: bool = False,
+    skip_terminated: bool = False,
+) -> int:
+    """Parse CSV/TSV content and populate the lookup table. Returns row count.
+
+    When skip_terminated is True (used for the NSPL dataset), rows with a
+    non-blank DOTERM (date of termination) column are skipped so only live
+    postcodes are loaded.
+    """
     count = 0
     skipped = 0
 
@@ -285,17 +296,28 @@ def _parse_csv_content(text: str, country_code: str, *, overwrite: bool = False)
             cc_col = candidate
             break
 
+    # Detect optional DOTERM column for live-only filtering (NSPL)
+    doterm_col = None
+    if skip_terminated:
+        for candidate in ("DOTERM", "DOT", "DATE_OF_TERMINATION"):
+            if candidate in fieldnames:
+                doterm_col = candidate
+                break
+
     # Map back to original-case field names from DictReader
     orig_fields = list(reader.fieldnames or [])
     pc_orig = orig_fields[fieldnames.index(pc_col)]
     nuts3_orig = orig_fields[fieldnames.index(nuts3_col)]
     cc_orig = orig_fields[fieldnames.index(cc_col)] if cc_col else None
+    doterm_orig = orig_fields[fieldnames.index(doterm_col)] if doterm_col else None
 
     if not country_code and cc_col is None:
         logger.warning("No country code available (not in URL or CSV columns), skipping file")
         return 0
 
     for row in reader:
+        if doterm_orig and row.get(doterm_orig, "").strip():
+            continue
         pc = row.get(pc_orig, "")
         nuts3 = row.get(nuts3_orig, "").strip()
         if not pc or not nuts3:
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index fd96ca9..189b374 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -298,3 +298,21 @@ def test_parse_csv_recognises_nspl_columns(self, monkeypatch):
         assert rows == 2
         assert data_loader._lookup[("UK", "SW1A2AA")] == "TLI32"
         assert data_loader._lookup[("UK", "EC1A1BB")] == "TLI32"
+
+    def test_skip_terminated_filters_doterm_rows(self, monkeypatch):
+        monkeypatch.setattr(data_loader, "_lookup", {})
+        nspl_csv = (
+            "pcds,itl,doterm\n"
+            "SW1A 2AA,TLI32,\n"
+            "M1 9NS,TLD46,202312\n"  # terminated, skip
+            "EC1A 1BB,TLI32,\n"
+        )
+        rows = data_loader._parse_csv_content(nspl_csv, "UK", skip_terminated=True)
+        assert rows == 2
+        assert ("UK", "M19NS") not in data_loader._lookup
+
+    def test_skip_terminated_default_false_keeps_all_rows(self, monkeypatch):
+        monkeypatch.setattr(data_loader, "_lookup", {})
+        nspl_csv = "pcds,itl,doterm\nSW1A 2AA,TLI32,\nM1 9NS,TLD46,202312\n"
+        rows = data_loader._parse_csv_content(nspl_csv, "UK")
+        assert rows == 2

From 50c543fdccce05a733b2e1333e9b4ff1cba9b48e Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 21:58:14 +0200
Subject: [PATCH 06/18] feat: add NSPL URL and ITL names URLs to settings

---
 app/config.py        | 10 ++++++++++
 app/settings.json    |  1 +
 tests/test_config.py | 24 ++++++++++++++++++++++++
 3 files changed, 35 insertions(+)

diff --git a/app/config.py b/app/config.py
index 4a5de69..533d724 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,6 +31,9 @@ class Settings(BaseSettings):
     cache_max_age: int = _defaults.get("cache_max_age", 3600)
     startup_timeout: int = 300
     docs_enabled: bool = True
+    # NSPL (UK postcode → ITL3) — optional, no-op when unset (TERCET-only deployment)
+    nspl_url: str = _defaults.get("nspl_url", "")
+    itl_names_urls: str = ""
     photon_url: str = ""
     photon_timeout_seconds: float = 5.0
     nuts_geojson_url: str = (
@@ -70,6 +73,13 @@ def extra_source_urls(self) -> list[str]:
             return []
         return [u.strip() for u in self.extra_sources.split(",") if u.strip()]
 
+    @property
+    def itl_names_url_list(self) -> list[str]:
+        """Parse PC2NUTS_ITL_NAMES_URLS comma-separated string into a URL list."""
+        if not self.itl_names_urls.strip():
+            return []
+        return [u.strip() for u in self.itl_names_urls.split(",") if u.strip()]
+
     @property
     def trusted_tokens(self) -> frozenset[str]:
         """Parse PC2NUTS_TRUSTED_TOKENS comma-separated list into a frozenset.
diff --git a/app/settings.json b/app/settings.json
index 4f47c75..d3b478f 100644
--- a/app/settings.json
+++ b/app/settings.json
@@ -1,5 +1,6 @@
 {
   "tercet_base_url": "https://gisco-services.ec.europa.eu/tercet/NUTS-2024/",
+  "nspl_url": "",
   "countries": [
     "AT", "BE", "BG", "CY", "CZ", "DE", "DK", "EE", "EL", "ES",
     "FI", "FR", "HR", "HU", "IE", "IT", "LT", "LU", "LV", "MT",
diff --git a/tests/test_config.py b/tests/test_config.py
index fe3bf40..8af0aaf 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -62,3 +62,27 @@ def test_interval_negative_is_rejected(self, monkeypatch):
 def test_synthetic_nuts_fallback_has_fo():
     from app.config import settings
     assert settings.synthetic_nuts_fallback.get("FO") == "FO000"
+
+
+class TestNSPLSettings:
+    def test_nspl_url_defaults_empty(self):
+        assert Settings().nspl_url == ""
+
+    def test_itl_names_urls_defaults_empty(self):
+        assert Settings().itl_names_urls == ""
+
+    def test_itl_names_url_list_parses_csv(self):
+        s = Settings(itl_names_urls="https://a/x.csv, https://b/y.csv ,")
+        assert s.itl_names_url_list == ["https://a/x.csv", "https://b/y.csv"]
+
+    def test_itl_names_url_list_empty_when_unset(self):
+        assert Settings().itl_names_url_list == []
+
+    def test_nspl_url_from_env(self, monkeypatch):
+        monkeypatch.setenv("PC2NUTS_NSPL_URL", "https://ons/nspl.zip")
+        assert Settings().nspl_url == "https://ons/nspl.zip"
+
+    def test_uk_not_in_settings_countries(self):
+        """Regression guard: UK must not appear in the GISCO country list —
+        it would trigger wasted GISCO URL guesses (Codex review, PR #52)."""
+        assert "UK" not in Settings().countries

From dd54c28665a491012f3cdce258fbea673f62f1be Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 21:59:02 +0200
Subject: [PATCH 07/18] feat: add conditional GET wrapper for cached ZIP
 downloads

---
 app/data_loader.py        | 17 +++++++++++++++++
 tests/test_data_loader.py | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/app/data_loader.py b/app/data_loader.py
index 837ac0d..cbe4c09 100644
--- a/app/data_loader.py
+++ b/app/data_loader.py
@@ -347,6 +347,23 @@ def _parse_csv_content(
     return count
 
 
+def _download_zip_conditional(
+    client: httpx.Client, url: str, cached_meta: dict
+) -> httpx.Response:
+    """Download with conditional-GET headers; returns the raw httpx.Response.
+
+    cached_meta keys: 'etag' and 'last_modified' (either may be absent). The
+    caller handles 200 (re-parse), 304 (keep cache), and error statuses. Applies
+    to both TERCET and NSPL so an unchanged upstream ZIP is not re-fetched.
+    """
+    headers = {}
+    if cached_meta.get("etag"):
+        headers["If-None-Match"] = cached_meta["etag"]
+    if cached_meta.get("last_modified"):
+        headers["If-Modified-Since"] = cached_meta["last_modified"]
+    return client.get(url, headers=headers, timeout=60, follow_redirects=True)
+
+
 def _download_zip(client: httpx.Client, url: str) -> bytes | None:
     """Download a ZIP with one retry on transient network errors.
 
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index 189b374..be0ae79 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -1,5 +1,7 @@
 """Tests for data_loader.py — normalize functions and lookup tiers."""
 
+import httpx
+
 from app import data_loader
 from app.data_loader import lookup, normalize_country, normalize_postal_code
 
@@ -316,3 +318,36 @@ def test_skip_terminated_default_false_keeps_all_rows(self, monkeypatch):
         nspl_csv = "pcds,itl,doterm\nSW1A 2AA,TLI32,\nM1 9NS,TLD46,202312\n"
         rows = data_loader._parse_csv_content(nspl_csv, "UK")
         assert rows == 2
+
+
+class TestConditionalGet:
+    def test_sends_conditional_headers_when_etag_known(self):
+        captured = {}
+
+        def handler(request: httpx.Request) -> httpx.Response:
+            captured["headers"] = dict(request.headers)
+            return httpx.Response(304)
+
+        client = httpx.Client(transport=httpx.MockTransport(handler))
+        cached_meta = {
+            "etag": '"abc123"',
+            "last_modified": "Wed, 01 Jan 2025 00:00:00 GMT",
+        }
+        result = data_loader._download_zip_conditional(
+            client, "https://example.com/foo.zip", cached_meta
+        )
+        assert result.status_code == 304
+        assert captured["headers"]["if-none-match"] == '"abc123"'
+        assert captured["headers"]["if-modified-since"] == "Wed, 01 Jan 2025 00:00:00 GMT"
+
+    def test_omits_headers_when_meta_empty(self):
+        captured = {}
+
+        def handler(request: httpx.Request) -> httpx.Response:
+            captured["headers"] = dict(request.headers)
+            return httpx.Response(200, content=b"x")
+
+        client = httpx.Client(transport=httpx.MockTransport(handler))
+        data_loader._download_zip_conditional(client, "https://example.com/foo.zip", {})
+        assert "if-none-match" not in captured["headers"]
+        assert "if-modified-since" not in captured["headers"]

From 2ef5762ca1fa390411f5c18bfa051bf0e68da2bc Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 22:02:23 +0200
Subject: [PATCH 08/18] feat: implement NSPL loader with isolated failure
 handling

---
 app/data_loader.py        | 68 +++++++++++++++++++++++++++++++++++++++
 tests/test_api.py         |  2 +-
 tests/test_data_loader.py | 53 ++++++++++++++++++++++++++++++
 3 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/app/data_loader.py b/app/data_loader.py
index cbe4c09..6ed8985 100644
--- a/app/data_loader.py
+++ b/app/data_loader.py
@@ -24,6 +24,10 @@
 
 _MAX_UNCOMPRESSED_SIZE = 100 * 1024 * 1024  # 100 MB
 
+# The NSPL postcode CSV (~1.79M live rows) is far larger than a TERCET file; it
+# needs its own, higher extraction cap. Source is operator-configured (trusted).
+_MAX_NSPL_UNCOMPRESSED_SIZE = 1024 * 1024 * 1024  # 1 GB
+
 logger = logging.getLogger(__name__)
 
 # postal_code -> NUTS3 code, keyed by (country_code, normalized_postal_code)
@@ -463,6 +467,64 @@ def _download_and_parse_zip(
     return total
 
 
+def _load_nspl(client: httpx.Client, url: str, cache_dir: Path) -> int:
+    """Fetch the NSPL ZIP and load UK postcode → ITL3 entries into _lookup.
+
+    Returns the number of rows added. Returns 0 when url is empty or any error
+    occurs — an NSPL failure must never block TERCET-only operation. Terminated
+    postcodes (non-blank DOTERM) are filtered out. UK is registered in the loaded
+    country set automatically because its rows land in _lookup.
+    """
+    if not url:
+        return 0
+    cache_path = cache_dir / "nspl.zip"
+    try:
+        resp = _download_zip_conditional(client, url, {})
+        if resp.status_code == 304:
+            # Unchanged upstream — nothing to (re)load this run.
+            return 0
+        resp.raise_for_status()
+        content = resp.content
+        if not zipfile.is_zipfile(io.BytesIO(content)):
+            logger.warning("NSPL response from %s is not a valid ZIP, skipping", url)
+            return 0
+        try:
+            cache_path.write_bytes(content)
+        except OSError as exc:
+            logger.warning("Failed to cache NSPL ZIP: %s", exc)
+
+        total = 0
+        with zipfile.ZipFile(io.BytesIO(content)) as zf:
+            for name in zf.namelist():
+                # The postcode CSV is the "NSPL*.csv" (real releases ship it under
+                # Data/); other bundled CSVs (user guide, column lookups) lack the
+                # pcds/itl columns and are ignored by _parse_csv_content anyway.
+                if not name.lower().endswith(".csv") or "nspl" not in name.lower():
+                    continue
+                file_size = zf.getinfo(name).file_size
+                if file_size > _MAX_NSPL_UNCOMPRESSED_SIZE:
+                    logger.warning(
+                        "Skipping %s: uncompressed size %d exceeds NSPL limit %d",
+                        name,
+                        file_size,
+                        _MAX_NSPL_UNCOMPRESSED_SIZE,
+                    )
+                    continue
+                raw = zf.read(name)
+                for enc in ("utf-8-sig", "utf-8", "latin-1"):
+                    try:
+                        text = raw.decode(enc)
+                        break
+                    except UnicodeDecodeError:
+                        continue
+                total += _parse_csv_content(text, "UK", overwrite=False, skip_terminated=True)
+        logger.info("NSPL loaded: %d live UK postcodes", total)
+        return total
+    except (httpx.HTTPError, zipfile.BadZipFile, OSError) as exc:
+        logger.warning("NSPL load failed: %s", exc)
+        return 0
+
+
 def _db_path() -> Path:
     """Return the path for the SQLite cache DB, scoped by NUTS version."""
     return Path(settings.data_dir) / f"postalcode2nuts_NUTS-{settings.nuts_version}.db"
@@ -1007,6 +1069,12 @@ def load_data() -> None:
                 if extra_count:
                     logger.info("Extra sources added %d entries (overwrite mode)", extra_count)
 
+            # NSPL (UK postcodes via ITL) — optional, no-op when nspl_url unset
+            if not timed_out and settings.nspl_url:
+                nspl_count = _load_nspl(client, settings.nspl_url, cache_dir)
+                if nspl_count > 0:
+                    logger.info("Loaded %d entries for UK from NSPL", nspl_count)
+
             # NUTS region names
             if not timed_out:
                 _download_nuts_names(client)
diff --git a/tests/test_api.py b/tests/test_api.py
index 49f7cc8..5852924 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -195,7 +195,7 @@ def test_includes_patterns_version(self, client):
         resp = client.get("/health")
         data = resp.json()
         assert "patterns_version" in data
-        assert data["patterns_version"] == "1.2"
+        assert data["patterns_version"] == "1.3"
 
     def test_includes_nuts_names(self, client):
         resp = client.get("/health")
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index be0ae79..a5dd2fb 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -320,6 +320,59 @@ def test_skip_terminated_default_false_keeps_all_rows(self, monkeypatch):
         assert rows == 2
 
 
+class TestLoadNSPL:
+    @staticmethod
+    def _zip_bytes(csv_text, arcname="NSPL.csv"):
+        import io as _io
+        import zipfile
+
+        buf = _io.BytesIO()
+        with zipfile.ZipFile(buf, "w") as zf:
+            zf.writestr(arcname, csv_text)
+        return buf.getvalue()
+
+    def test_populates_lookup_from_zip(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(data_loader, "_lookup", {})
+        csv_text = (
+            "pcds,itl,doterm\n"
+            "SW1A 2AA,TLI32,\n"
+            "EC1A 1BB,TLI32,\n"
+            "M1 9NS,TLD46,202312\n"  # terminated
+        )
+        content = self._zip_bytes(csv_text)
+
+        def handler(request):
+            return httpx.Response(200, content=content, headers={"ETag": '"v1"'})
+
+        client = httpx.Client(transport=httpx.MockTransport(handler))
+        count = data_loader._load_nspl(client, "https://example.com/NSPL.zip", tmp_path)
+        assert count == 2
+        assert data_loader._lookup[("UK", "SW1A2AA")] == "TLI32"
+        assert ("UK", "M19NS") not in data_loader._lookup
+
+    def test_returns_zero_when_url_unset(self, tmp_path):
+        client = httpx.Client(transport=httpx.MockTransport(lambda r: httpx.Response(404)))
+        assert data_loader._load_nspl(client, "", tmp_path) == 0
+
+    def test_swallows_exceptions(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(data_loader, "_lookup", {})
+
+        def handler(request):
+            raise httpx.ConnectError("boom")
+
+        client = httpx.Client(transport=httpx.MockTransport(handler))
+        assert data_loader._load_nspl(client, "https://example.com/x.zip", tmp_path) == 0
+
+    def test_non_zip_response_returns_zero(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(data_loader, "_lookup", {})
+
+        def handler(request):
+            return httpx.Response(200, content=b"not a zip")
+
+        client = httpx.Client(transport=httpx.MockTransport(handler))
+        assert data_loader._load_nspl(client, "https://example.com/x.zip", tmp_path) == 0
+
+
 class TestConditionalGet:
     def test_sends_conditional_headers_when_etag_known(self):
         captured = {}

From 2678d06da0a4d83782eec3ed56ffc7fb790f5848 Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 22:05:05 +0200
Subject: [PATCH 09/18] feat: build outward-code majority-vote index for Tier
 3.5

---
 app/data_loader.py        | 30 ++++++++++++++++++++++++++++++
 tests/conftest.py         | 16 ++++++++++++++++
 tests/test_data_loader.py | 26 ++++++++++++++++++++++++++
 3 files changed, 72 insertions(+)

diff --git a/app/data_loader.py b/app/data_loader.py
index 6ed8985..c784db5 100644
--- a/app/data_loader.py
+++ b/app/data_loader.py
@@ -57,6 +57,10 @@
 # NUTS region names: nuts_id -> name_latn
 _nuts_names: dict[str, str] = {}
 
+# Outward-code index for lookup Tier 3.5 (UK): (country_code, outward) ->
+# (nuts3, agreement_ratio). Built from _lookup by majority vote at load time.
+_outward_lookup: dict[tuple[str, str], tuple[str, float]] = {}
+
 # Staleness tracking
 _data_stale: bool = False
 _data_loaded_at: str = ""
@@ -825,6 +829,29 @@ def _build_prefix_index() -> None:
         )
 
 
+def _build_outward_index(country_code: str) -> None:
+    """Populate _outward_lookup for one country by majority vote per outward code.
+
+    Outward = the full normalised postcode minus its last three characters (UK
+    convention). Codes shorter than four characters are skipped (no meaningful
+    split). Used by lookup Tier 3.5 for outward-only or otherwise-unmatched input.
+    """
+    groups: dict[str, list[str]] = {}
+    for (cc, code), nuts3 in _lookup.items():
+        if cc != country_code or len(code) < 4:
+            continue
+        outward = code[:-3]
+        groups.setdefault(outward, []).append(nuts3)
+
+    for outward, nuts3_list in groups.items():
+        counts = Counter(nuts3_list)
+        winner, count = counts.most_common(1)[0]
+        agreement = count / len(nuts3_list)
+        _outward_lookup[(country_code, outward)] = (winner, agreement)
+    if groups:
+        logger.info("Built outward index for %s: %d outward codes", country_code, len(groups))
+
+
 def _estimate_by_prefix(cc: str, postal_code: str) -> dict | None:
     """Runtime estimation via longest prefix match + majority vote.
 
@@ -996,6 +1023,7 @@ def load_data() -> None:
         _lookup.clear()
         _estimates.clear()
         _nuts_names.clear()
+        _outward_lookup.clear()
         _data_stale = False
         _extra_source_count = len(settings.extra_source_urls)
 
@@ -1017,6 +1045,7 @@ def load_data() -> None:
             _revalidate_estimates()
             _load_nuts_names_from_db(db)
             _build_prefix_index()
+            _build_outward_index("UK")
             return
 
         _lookup.clear()
@@ -1109,6 +1138,7 @@ def load_data() -> None:
             logger.warning("TERCET refresh failed — serving stale cache")
 
         _build_prefix_index()
+        _build_outward_index("UK")
 
 
 def _build_result(match_type: str, nuts3: str, nuts1: str = "", nuts2: str = "", **confidence) -> dict:
diff --git a/tests/conftest.py b/tests/conftest.py
index d93e956..0c57fe9 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -29,6 +29,11 @@
     ("YY", "1002"): "YY111",
     ("YY", "1003"): "YY111",
     ("YY", "2001"): "YY112",
+    # UK (ITL via NSPL): SW1A → TLI32 majority, EC1A → TLI32, M1 → TLD45
+    ("UK", "SW1A2AA"): "TLI32",
+    ("UK", "SW1A1AA"): "TLI32",
+    ("UK", "EC1A1BB"): "TLI32",
+    ("UK", "M11AA"): "TLD45",
 }
 
 MOCK_ESTIMATES = {
@@ -76,6 +81,12 @@
     "AL0": "Shqipëria",
     "AL02": "Qender",
     "AL022": "Tiranë",
+    "TLI": "London",
+    "TLI3": "Inner London - East",
+    "TLI32": "Tower Hamlets and Newham",
+    "TLD": "North West (England)",
+    "TLD4": "Greater Manchester",
+    "TLD45": "Manchester",
 }
 
 
@@ -94,6 +105,7 @@ def mock_data():
     orig_single = data_loader._single_nuts3.copy()
     orig_synthetic = data_loader._synthetic_nuts.copy()
     orig_fallback = data_loader._country_fallback.copy()
+    orig_outward = data_loader._outward_lookup.copy()
 
     # Populate
     data_loader._lookup.clear()
@@ -103,6 +115,8 @@ def mock_data():
     data_loader._nuts_names.clear()
     data_loader._nuts_names.update(MOCK_NUTS_NAMES)
     data_loader._build_prefix_index()
+    data_loader._outward_lookup.clear()
+    data_loader._build_outward_index("UK")
 
     yield
 
@@ -121,6 +135,8 @@ def mock_data():
     data_loader._synthetic_nuts.update(orig_synthetic)
     data_loader._country_fallback.clear()
     data_loader._country_fallback.update(orig_fallback)
+    data_loader._outward_lookup.clear()
+    data_loader._outward_lookup.update(orig_outward)
 
 
 @pytest.fixture()
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index a5dd2fb..4a52356 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -1,6 +1,7 @@
 """Tests for data_loader.py — normalize functions and lookup tiers."""
 
 import httpx
+import pytest
 
 from app import data_loader
 from app.data_loader import lookup, normalize_country, normalize_postal_code
@@ -373,6 +374,31 @@ def handler(request):
         assert data_loader._load_nspl(client, "https://example.com/x.zip", tmp_path) == 0
 
 
+class TestBuildOutwardIndex:
+    def test_majority_vote(self, monkeypatch):
+        monkeypatch.setattr(
+            data_loader,
+            "_lookup",
+            {
+                ("UK", "SW1A2AA"): "TLI32",
+                ("UK", "SW1A1AA"): "TLI32",
+                ("UK", "SW1A0AA"): "TLI31",  # minority
+                ("UK", "M11AA"): "TLD45",
+                ("UK", "M11AB"): "TLD45",
+            },
+        )
+        monkeypatch.setattr(data_loader, "_outward_lookup", {})
+        data_loader._build_outward_index("UK")
+        assert data_loader._outward_lookup[("UK", "SW1A")] == ("TLI32", pytest.approx(2 / 3))
+        assert data_loader._outward_lookup[("UK", "M1")] == ("TLD45", pytest.approx(1.0))
+
+    def test_skips_short_codes(self, monkeypatch):
+        monkeypatch.setattr(data_loader, "_lookup", {("UK", "AB1"): "TLC11"})
+        monkeypatch.setattr(data_loader, "_outward_lookup", {})
+        data_loader._build_outward_index("UK")
+        assert data_loader._outward_lookup == {}
+
+
 class TestConditionalGet:
     def test_sends_conditional_headers_when_etag_known(self):
         captured = {}

From 3487042611f2ace4aef0799428cfde0cc2eb6dc7 Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 22:05:46 +0200
Subject: [PATCH 10/18] feat: alias GB to UK for ISO 3166-1 input compatibility

---
 app/data_loader.py        | 12 ++++++++++--
 tests/test_api.py         |  7 +++++++
 tests/test_data_loader.py |  9 +++++++++
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/app/data_loader.py b/app/data_loader.py
index c784db5..ef17803 100644
--- a/app/data_loader.py
+++ b/app/data_loader.py
@@ -82,9 +82,17 @@ def normalize_postal_code(code: str) -> str:
 
 
 def normalize_country(country_code: str) -> str:
-    """Normalize a country code: uppercase + map GR→EL (ISO vs GISCO convention)."""
+    """Normalize a country code: uppercase + map non-canonical aliases.
+
+    GR → EL  (ISO vs GISCO convention)
+    GB → UK  (ISO vs NSPL/internal convention)
+    """
     cc = country_code.strip().upper()
-    return "EL" if cc == "GR" else cc
+    if cc == "GR":
+        return "EL"
+    if cc == "GB":
+        return "UK"
+    return cc
 
 
 def get_lookup_table() -> dict[tuple[str, str], str]:
diff --git a/tests/test_api.py b/tests/test_api.py
index 5852924..b42bb24 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -22,6 +22,13 @@ def test_response_includes_code_system_nuts(self, client):
         assert resp.status_code == 200
         assert resp.json()["code_system"] == "NUTS"
 
+    def test_lookup_accepts_gb_alias(self, client):
+        resp_uk = client.get("/lookup", params={"country": "UK", "postal_code": "SW1A 2AA"})
+        resp_gb = client.get("/lookup", params={"country": "GB", "postal_code": "SW1A 2AA"})
+        assert resp_uk.status_code == 200
+        assert resp_gb.status_code == 200
+        assert resp_uk.json() == resp_gb.json()
+
     def test_400_unsupported_country(self, client):
         resp = client.get("/lookup", params={"postal_code": "12345", "country": "ZZ"})
         assert resp.status_code == 400
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index 4a52356..b2b8c06 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -46,6 +46,15 @@ def test_strips_whitespace(self):
     def test_el_stays_el(self):
         assert normalize_country("EL") == "EL"
 
+    def test_gb_to_uk(self):
+        assert normalize_country("GB") == "UK"
+
+    def test_gb_lowercase(self):
+        assert normalize_country("gb") == "UK"
+
+    def test_uk_stays_uk(self):
+        assert normalize_country("UK") == "UK"
+
 
 # ── lookup tests (all 5 tiers) ──────────────────────────────────────────────
 

From 5a9f7187ba370dc234a656283aa5a8d5e18a078b Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 22:08:08 +0200
Subject: [PATCH 11/18] feat: add outward-code lookup tier to lookup waterfall
 (UK)

---
 app/data_loader.py        | 24 +++++++++++++++++++++++-
 tests/test_api.py         |  7 +++++++
 tests/test_data_loader.py | 27 +++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/app/data_loader.py b/app/data_loader.py
index ef17803..6645380 100644
--- a/app/data_loader.py
+++ b/app/data_loader.py
@@ -1192,6 +1192,8 @@ def lookup(country_code: str, postal_code: str) -> dict | None:
     1. Exact TERCET match → confidence 1.0
     2. Pre-computed estimate → stored confidence per level
     2b. Albania block map → district-block NUTS3, match_type='estimated' (#118)
+    3.5. Outward-code lookup (UK) → majority-vote ITL3 for the outward code,
+       match_type='estimated', medium confidence (before generic prefix)
     3. Runtime prefix-based estimation → calculated confidence
     4. Country-level majority vote → unanimous NUTS1/2, dominant NUTS3 (e.g. MT)
     5. Single-NUTS3 country fallback → confidence 1.0 (e.g. LI, CY, LU)
@@ -1200,7 +1202,7 @@ def lookup(country_code: str, postal_code: str) -> dict | None:
 
     Returns a dict with nuts1/2/3, match_type, and per-level confidence, or None.
     """
-    from app.postal_patterns import extract_postal_code
+    from app.postal_patterns import extract_outward, extract_postal_code
 
     cc = normalize_country(country_code)
 
@@ -1240,6 +1242,26 @@ def lookup(country_code: str, postal_code: str) -> dict | None:
                 nuts3_confidence=conf["nuts3"],
             )
 
+    # Tier 3.5: Outward-code lookup (UK and any country flagged outward_only).
+    # Placed before generic prefix estimation because the outward code is the
+    # meaningful UK boundary: a curated majority vote over the whole outward
+    # beats an arbitrary prefix match, and it yields match_type='estimated' with
+    # medium confidence. extract_outward returns None for non-outward countries,
+    # so this tier is inert for everything except UK.
+    outward = extract_outward(cc, postal_code)
+    if outward is not None:
+        outward_hit = _outward_lookup.get((cc, outward))
+        if outward_hit is not None:
+            o_nuts3, _agreement = outward_hit
+            conf = settings.confidence_map["medium"]
+            return _build_result(
+                "estimated",
+                o_nuts3,
+                nuts1_confidence=conf["nuts1"],
+                nuts2_confidence=conf["nuts2"],
+                nuts3_confidence=conf["nuts3"],
+            )
+
     # Tier 3: Runtime prefix-based estimation
     approx = _estimate_by_prefix(cc, extracted)
     if approx is not None:
diff --git a/tests/test_api.py b/tests/test_api.py
index b42bb24..aa116d8 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -29,6 +29,13 @@ def test_lookup_accepts_gb_alias(self, client):
         assert resp_gb.status_code == 200
         assert resp_uk.json() == resp_gb.json()
 
+    def test_uk_outward_only_input_returns_estimated(self, client):
+        resp = client.get("/lookup", params={"country": "UK", "postal_code": "SW1A"})
+        assert resp.status_code == 200
+        body = resp.json()
+        assert body["match_type"] == "estimated"
+        assert body["nuts3"] == "TLI32"
+
     def test_400_unsupported_country(self, client):
         resp = client.get("/lookup", params={"postal_code": "12345", "country": "ZZ"})
         assert resp.status_code == 400
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index b2b8c06..b95a32a 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -383,6 +383,33 @@ def handler(request):
         assert data_loader._load_nspl(client, "https://example.com/x.zip", tmp_path) == 0
 
 
+class TestUKOutwardLookup:
+    def test_outward_only_input_returns_estimated(self, mock_data):
+        # "SW1A" has no inward part; resolves via the outward majority-vote tier.
+        result = lookup("UK", "SW1A")
+        assert result is not None
+        assert result["nuts3"] == "TLI32"
+        assert result["match_type"] == "estimated"
+        assert result["nuts1_confidence"] == pytest.approx(0.90)
+        assert result["nuts2_confidence"] == pytest.approx(0.80)
+        assert result["nuts3_confidence"] == pytest.approx(0.70)
+
+    def test_full_postcode_still_exact(self, mock_data):
+        result = lookup("UK", "SW1A 2AA")
+        assert result["match_type"] == "exact"
+        assert result["nuts3"] == "TLI32"
+
+    def test_unlisted_full_postcode_resolves_via_outward(self, mock_data):
+        # Valid-format UK postcode not in the data → outward "SW1A" still resolves.
+        result = lookup("UK", "SW1A 9ZZ")
+        assert result is not None
+        assert result["nuts3"] == "TLI32"
+        assert result["match_type"] == "estimated"
+
+    def test_unknown_outward_returns_none(self, mock_data):
+        assert lookup("UK", "ZZ99") is None
+
+
 class TestBuildOutwardIndex:
     def test_majority_vote(self, monkeypatch):
         monkeypatch.setattr(

From 95b85aba62e0937b44d4a2820606f8f284138cd1 Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 22:09:41 +0200
Subject: [PATCH 12/18] feat: tag UK lookups with code_system=ITL via code
 prefix

---
 app/data_loader.py        | 6 ++++++
 tests/test_api.py         | 5 +++++
 tests/test_data_loader.py | 8 ++++++++
 3 files changed, 19 insertions(+)

diff --git a/app/data_loader.py b/app/data_loader.py
index 6645380..53fe21f 100644
--- a/app/data_loader.py
+++ b/app/data_loader.py
@@ -1154,10 +1154,16 @@ def _build_result(match_type: str, nuts3: str, nuts1: str = "", nuts2: str = "",
 
     If nuts1/nuts2 are not provided, they are derived from nuts3.
     Confidence keys: nuts1_confidence, nuts2_confidence, nuts3_confidence.
+
+    code_system is derived from the code itself: ITL codes are the UK's
+    NUTS successor and uniquely carry the "TL" prefix (no NUTS country code is
+    "TL"), so every "TL…" result is tagged "ITL" and all others "NUTS".
     """
     n1 = nuts1 or nuts3[:3]
     n2 = nuts2 or nuts3[:4]
+    code_system = "ITL" if nuts3[:2] == "TL" else "NUTS"
     return {
+        "code_system": code_system,
         "match_type": match_type,
         "nuts1": n1,
         "nuts1_confidence": confidence.get("nuts1_confidence", 1.0),
diff --git a/tests/test_api.py b/tests/test_api.py
index aa116d8..41f05e4 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -36,6 +36,11 @@ def test_uk_outward_only_input_returns_estimated(self, client):
         assert body["match_type"] == "estimated"
         assert body["nuts3"] == "TLI32"
 
+    def test_uk_response_has_code_system_itl(self, client):
+        resp = client.get("/lookup", params={"country": "UK", "postal_code": "SW1A 2AA"})
+        assert resp.status_code == 200
+        assert resp.json()["code_system"] == "ITL"
+
     def test_400_unsupported_country(self, client):
         resp = client.get("/lookup", params={"postal_code": "12345", "country": "ZZ"})
         assert resp.status_code == 400
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index b95a32a..fe7a440 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -409,6 +409,14 @@ def test_unlisted_full_postcode_resolves_via_outward(self, mock_data):
     def test_unknown_outward_returns_none(self, mock_data):
         assert lookup("UK", "ZZ99") is None
 
+    def test_uk_result_tagged_itl(self, mock_data):
+        assert lookup("UK", "SW1A 2AA")["code_system"] == "ITL"
+        assert lookup("UK", "SW1A")["code_system"] == "ITL"
+
+    def test_non_uk_result_tagged_nuts(self, mock_data):
+        assert lookup("AT", "1010")["code_system"] == "NUTS"
+        assert lookup("DE", "10118")["code_system"] == "NUTS"
+
 
 class TestBuildOutwardIndex:
     def test_majority_vote(self, monkeypatch):

From 34c28cd66579fd8a43b63be5c54c404f4a0f53ca Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 22:10:33 +0200
Subject: [PATCH 13/18] feat: load ITL region names from ONS Names-and-Codes
 CSVs

---
 app/data_loader.py        | 49 +++++++++++++++++++++++++++++++++++++++
 tests/test_data_loader.py | 36 ++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+)

diff --git a/app/data_loader.py b/app/data_loader.py
index 53fe21f..814dce6 100644
--- a/app/data_loader.py
+++ b/app/data_loader.py
@@ -725,6 +725,51 @@ def _download_nuts_names(client: httpx.Client) -> int:
     return count
 
 
+def _load_itl_names(client: httpx.Client, urls: list[str]) -> int:
+    """Fetch ONS ITL "Names and Codes" CSVs and merge them into _nuts_names.
+
+    NSPL carries ITL codes but not names. Each ONS CSV pairs a code column with
+    a name column whose headers vary by release year (e.g. ITL321CD/ITL321NM at
+    level 3, ITL221CD/ITL221NM at level 2) — columns are matched by the CD/NM
+    suffix rather than exact name. Failures are logged and skipped, never raised.
+    """
+    if not urls:
+        return 0
+    total = 0
+    for url in urls:
+        try:
+            resp = client.get(url, timeout=30, follow_redirects=True)
+            resp.raise_for_status()
+            text = resp.text
+        except httpx.HTTPError as exc:
+            logger.warning("ITL names fetch failed for %s: %s", url, exc)
+            continue
+        try:
+            reader = csv.DictReader(io.StringIO(text))
+            fieldnames = [f.strip() for f in (reader.fieldnames or [])]
+            code_col = next(
+                (f for f in fieldnames if f.upper().endswith("CD") and "ITL" in f.upper()),
+                None,
+            )
+            name_col = next(
+                (f for f in fieldnames if f.upper().endswith("NM") and "ITL" in f.upper()),
+                None,
+            )
+            if not code_col or not name_col:
+                logger.warning("No ITL CD/NM columns in %s; headers=%s", url, fieldnames)
+                continue
+            for row in reader:
+                code = (row.get(code_col) or "").strip().upper()
+                name = (row.get(name_col) or "").strip()
+                if code and name:
+                    _nuts_names[code] = name
+                    total += 1
+        except csv.Error as exc:
+            logger.warning("ITL names parse failed for %s: %s", url, exc)
+    logger.info("ITL names loaded: %d entries from %d URLs", total, len(urls))
+    return total
+
+
 def _load_nuts_names_from_db(db: Path) -> bool:
     """Load NUTS region names from SQLite cache. Graceful if table is missing."""
     try:
@@ -1112,6 +1157,10 @@ def load_data() -> None:
                 if nspl_count > 0:
                     logger.info("Loaded %d entries for UK from NSPL", nspl_count)
 
+            # ITL region names (ONS Names-and-Codes) — optional, no-op when unset
+            if not timed_out and settings.itl_names_url_list:
+                _load_itl_names(client, settings.itl_names_url_list)
+
             # NUTS region names
             if not timed_out:
                 _download_nuts_names(client)
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index fe7a440..fe8731b 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -443,6 +443,42 @@ def test_skips_short_codes(self, monkeypatch):
         assert data_loader._outward_lookup == {}
 
 
+class TestLoadITLNames:
+    def test_populates_nuts_names(self, monkeypatch):
+        monkeypatch.setattr(data_loader, "_nuts_names", {})
+
+        def handler(request):
+            body = "ITL321CD,ITL321NM\nTLI32,Tower Hamlets\nTLI31,Hackney and Newham\n"
+            return httpx.Response(200, content=body.encode())
+
+        client = httpx.Client(transport=httpx.MockTransport(handler))
+        count = data_loader._load_itl_names(client, ["https://example.com/itl3.csv"])
+        assert count == 2
+        assert data_loader._nuts_names["TLI32"] == "Tower Hamlets"
+
+    def test_empty_url_list_no_op(self):
+        client = httpx.Client(transport=httpx.MockTransport(lambda r: httpx.Response(404)))
+        assert data_loader._load_itl_names(client, []) == 0
+
+    def test_missing_columns_skipped(self, monkeypatch):
+        monkeypatch.setattr(data_loader, "_nuts_names", {})
+
+        def handler(request):
+            return httpx.Response(200, content=b"foo,bar\n1,2\n")
+
+        client = httpx.Client(transport=httpx.MockTransport(handler))
+        assert data_loader._load_itl_names(client, ["https://example.com/x.csv"]) == 0
+
+    def test_http_error_swallowed(self, monkeypatch):
+        monkeypatch.setattr(data_loader, "_nuts_names", {})
+
+        def handler(request):
+            raise httpx.ConnectError("boom")
+
+        client = httpx.Client(transport=httpx.MockTransport(handler))
+        assert data_loader._load_itl_names(client, ["https://example.com/x.csv"]) == 0
+
+
 class TestConditionalGet:
     def test_sends_conditional_headers_when_etag_known(self):
         captured = {}

From 2741093ac63ff33f7bc07e1318ca0aa84d5b48bf Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 22:10:54 +0200
Subject: [PATCH 14/18] test: confirm NSPL failure does not block TERCET
 serving

---
 tests/test_data_loader.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index fe8731b..349e4e6 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -382,6 +382,21 @@ def handler(request):
         client = httpx.Client(transport=httpx.MockTransport(handler))
         assert data_loader._load_nspl(client, "https://example.com/x.zip", tmp_path) == 0
 
+    def test_nspl_failure_does_not_block_tercet(self, tmp_path, monkeypatch):
+        """If NSPL is unreachable, previously-loaded TERCET data must still serve."""
+        monkeypatch.setattr(data_loader, "_lookup", {("AT", "1010"): "AT130"})
+
+        def handler(request):
+            raise httpx.ConnectError("ons unavailable")
+
+        client = httpx.Client(transport=httpx.MockTransport(handler))
+        nspl_count = data_loader._load_nspl(client, "https://ons.invalid/nspl.zip", tmp_path)
+        assert nspl_count == 0
+        # AT lookup must still work (TERCET data untouched)
+        result = data_loader.lookup("AT", "1010")
+        assert result is not None
+        assert result["nuts3"] == "AT130"
+
 
 class TestUKOutwardLookup:
     def test_outward_only_input_returns_estimated(self, mock_data):

From 63bb1a3dbc2d45d4116b44af40ec9aa9322d83de Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 22:14:00 +0200
Subject: [PATCH 15/18] docs: document UK/ITL support, outward-code tier, OGL
 attribution

---
 README.md | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f8824c0..8d6a165 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,16 @@ Faroe Islands (FO) — not part of NUTS; synthetic result.
 
 > **Faroe Islands** is an autonomous Danish territory with no NUTS coverage and no GISCO TERCET file. Lookups for FO are served by a synthetic single-region fallback (Tier 6) configured via `synthetic_nuts_fallback` in `app/settings.json`, returning `FO0` / `FO00` / `FO000` with `match_type="approximate"` and capped confidence (`0.90` / `0.85` / `0.80`) for any well-formed 3-digit code. The code is fabricated, not derived from a real NUTS dataset — contrast Montenegro's `ME000`, which is a genuine single-region NUTS code.
 
+### United Kingdom (ITL)
+
+The UK left the EU, so it is no longer part of NUTS. Its successor classification, **ITL (International Territorial Level)**, is published by the ONS and mapped to postcodes via the [National Statistics Postcode Lookup (NSPL)](https://geoportal.statistics.gov.uk/). When configured (see `PC2NUTS_NSPL_URL`), the service accepts UK postcodes (`country=UK`, or `country=GB` as an alias) and returns ITL1/2/3 codes in the same `nuts1/2/3` fields, with `code_system: "ITL"` to distinguish them.
+
+ITL is **not** a drop-in for NUTS-2016 UK: it diverges at L2 (41 vs 40 regions) and L3 (179 vs 174), and ONS discontinued the bidirectional NUTS↔ITL lookups in 2023. Branch on `code_system` when comparing UK results against historical NUTS-UK data.
+
+UK coverage is **optional and operator-configured** — the ~178 MB NSPL ZIP is not bundled. When `PC2NUTS_NSPL_URL` is unset (the default), UK is unsupported and returns the standard `400`. Outward-code-only input (e.g. `SW1A`) resolves to the majority ITL3 for that outward code with `estimated`/medium confidence.
+
+> **Out of scope:** Crown Dependencies (Jersey JE, Guernsey GG, Isle of Man IM) and Gibraltar (GI) use UK-style postcodes but are not in ITL geography or NSPL, and are not supported — lookups for those country codes return `400`.
+
 ## Deployment tiers
 
 PostalCode2NUTS runs in one of two tiers, chosen at deploy time by a single config
@@ -157,6 +167,7 @@ GET /lookup?country=AT&postal_code=A-1010
 {
   "postal_code": "A-1010",
   "country_code": "AT",
+  "code_system": "NUTS",
   "match_type": "exact",
   "nuts1": "AT1",
   "nuts1_name": "Ostösterreich",
@@ -193,10 +204,37 @@ GET /lookup?country=AT&postal_code=1012
 }
 ```
 
+**Example — UK postcode (ITL):**
+
+```
+GET /lookup?country=UK&postal_code=SW1A%202AA
+```
+
+```json
+{
+  "postal_code": "SW1A2AA",
+  "country_code": "UK",
+  "code_system": "ITL",
+  "match_type": "exact",
+  "nuts1": "TLI",
+  "nuts1_name": "London",
+  "nuts1_confidence": 1.0,
+  "nuts2": "TLI3",
+  "nuts2_name": "Inner London - East",
+  "nuts2_confidence": 1.0,
+  "nuts3": "TLI32",
+  "nuts3_name": "Tower Hamlets and Newham",
+  "nuts3_confidence": 1.0
+}
+```
+
+`country=GB` is accepted as an alias for `UK`. See [United Kingdom (ITL)](#united-kingdom-itl) for the NUTS-vs-ITL distinction.
+
 Every response includes:
 
 | Field | Description |
 |-------|-------------|
+| `code_system` | Territorial scheme of the `nuts{1,2,3}` fields: `NUTS` for EU/EFTA/candidate data, `ITL` for UK data (see [United Kingdom (ITL)](#united-kingdom-itl)) |
 | `match_type` | How the result was determined: `exact`, `estimated`, or `approximate` |
 | `nuts{1,2,3}_name` | Human-readable region name (Latin script), or `null` if unavailable |
 | `nuts{1,2,3}_confidence` | Confidence score (0.0–1.0) for each NUTS level |
@@ -432,6 +470,7 @@ User input: "Traiskirchen"
 | SI | 4 digits | SI- | `1000`, `SI-1000` |
 | SK | 3 digits + optional space + 2 digits | SK- | `81101`, `811 01`, `SK-81101` |
 | TR | 5 digits | TR- | `06100`, `TR-06100`, `34000` |
+| UK | 1–2 letters + digit + optional letter/digit + optional space + digit + 2 letters (ITL via NSPL; requires `PC2NUTS_NSPL_URL`) | GB accepted as alias | `SW1A 2AA`, `EC1A 1BB`, `M1 1AA`, `B33 8TH`, `SW1A` (outward only) |
 
 ## Configuration
 
@@ -444,6 +483,8 @@ All settings are overridable via environment variables prefixed with `PC2NUTS_`:
 | `PC2NUTS_DB_CACHE_TTL_DAYS` | `30` | Days between automatic TERCET data refreshes. If the refresh fails, the service falls back to the previous data and sets `data_stale: true` in the health endpoint. |
 | `PC2NUTS_ESTIMATES_CSV` | `./tercet_missing_codes.csv` | Path to the estimates CSV. Loaded automatically at startup if the file exists. |
 | `PC2NUTS_EXTRA_SOURCES` | *(empty)* | Comma-separated list of ZIP URLs containing additional postal code data. Loaded after TERCET; entries overwrite TERCET data. |
+| `PC2NUTS_NSPL_URL` | *(empty)* | URL to the latest [NSPL](https://geoportal.statistics.gov.uk/) ZIP from the ONS Open Geography Portal. Enables UK (ITL) support; when unset, UK is unsupported. The URL changes each quarterly release, so update it accordingly. |
+| `PC2NUTS_ITL_NAMES_URLS` | *(empty)* | Comma-separated list of ONS "Names and Codes" CSV URLs (one per ITL level) that supply UK region names. Loaded after NSPL. |
 | `PC2NUTS_RATE_LIMIT` | `120/minute` | Rate limit for `/lookup` and `/pattern` endpoints. Uses [slowapi](https://github.com/laurentS/slowapi) syntax (e.g. `100/minute`, `5/second`). `/health` is exempt. The default leaves comfortable headroom under the measured aggregate ceiling (~30 RPS) — see [`docs/performance.md`](docs/performance.md) for the rationale. |
 | `PC2NUTS_RATE_LIMIT_HEADERS` | `true` | When `true`, `429` responses include `Retry-After` and `X-RateLimit-Limit` / `X-RateLimit-Remaining` headers. |
 | `PC2NUTS_CACHE_MAX_AGE` | `3600` | `Cache-Control: public, max-age=<n>` (seconds) set on `/lookup`, `/pattern`, and `/` responses. |
@@ -648,6 +689,10 @@ Each estimate carries a confidence label (high / medium / low) that is mapped to
 
 Confidence is higher at coarser NUTS levels because neighbouring codes are more likely to share the same NUTS1 region than the same NUTS3 region.
 
+### UK outward-code lookup (`match_type: "estimated"`) — UK only
+
+For UK postcodes (loaded from NSPL — see [United Kingdom (ITL)](#united-kingdom-itl)), when the full postcode is not an exact match, the service looks up the **outward code** — everything before the final three characters, e.g. `SW1A` for `SW1A 2AA`. An index built at load time maps each outward code to the majority-vote ITL3 among all postcodes sharing it. This runs ahead of the generic prefix approximation below because the outward code is the meaningful UK boundary. It also handles outward-only input (`SW1A` submitted alone). Confidence uses the medium tier (NUTS1 0.90 / NUTS2 0.80 / NUTS3 0.70), since one outward code can straddle two adjacent ITL3 regions in dense urban areas.
+
 ### Tier 3: Runtime approximation (`match_type: "approximate"`)
 
 If neither an exact match nor a pre-computed estimate exists, the service performs a runtime estimation using prefix matching against all known TERCET codes for that country.
@@ -1010,7 +1055,7 @@ Optional `tercet_map` field for countries where the TERCET key differs from the
 }
 ```
 
-Supported `tercet_map` actions: `truncate:N`, `prepend:XX`, `keep_alpha`.
+Supported `tercet_map` actions: `truncate:N`, `prepend:XX`, `keep_alpha`, `outward_only` (marks a country for outward-code fallback, as used by UK).
 
 ### 3. `README.md` — update coverage section
 
@@ -1023,10 +1068,14 @@ Add the country to the appropriate group (EU, EFTA, or candidate) and add a row
 
 No Python code changes are required.
 
+> **Non-GISCO sources** (currently only the UK via NSPL) are different: they require a dedicated loader path and configuration (a source ZIP URL and any names files), not just a JSON edit — and must **not** be added to `settings.json` `countries`, or the GISCO loader would waste requests guessing non-existent TERCET URLs. See `_load_nspl` and `_load_itl_names` in `app/data_loader.py` for the NSPL precedent.
+
 ## Data sources & attribution
 
 **Postal code → NUTS (both tiers).** [GISCO TERCET flat files](https://ec.europa.eu/eurostat/web/gisco/geodata/administrative-units/postal-codes) ([download](https://gisco-services.ec.europa.eu/tercet/flat-files)), &copy; European Union &ndash; GISCO, licensed [CC-BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/). Albanian NUTS3 assignments come from the country's official postal-code block-allocation scheme (Posta Shqiptare), cross-validated against [GeoNames](https://www.geonames.org/) admin1 tagging ([CC BY 4.0](https://creativecommons.org/licenses/by/4.0/)).
 
+**UK postal code → ITL (optional).** [ONS National Statistics Postcode Lookup (NSPL)](https://geoportal.statistics.gov.uk/) and the ONS ITL "Names and Codes" files, &copy; Crown copyright and database right, licensed under the [Open Government Licence v3.0](https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/). Contains OS data &copy; Crown copyright and database right. Loaded only when `PC2NUTS_NSPL_URL` is configured.
+
 The [EU Open Data Portal dataset](https://data.europa.eu/data/datasets/postcodes-and-nuts-nomenclature-of-territorial-units-for-statistics) was also considered as a data source. However, its refresh cycle lags behind the GISCO TERCET flat files, so direct sourcing from GISCO was chosen for more up-to-date coverage.
 
 **Address → geocode → NUTS (Full tier only).** The optional geocoding tier relies on:

From 6088941b1696b8c85eb1051a5006306741d3f331 Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 22:15:38 +0200
Subject: [PATCH 16/18] chore: release v1.1.0 (UK/ITL support #7)

---
 CHANGELOG.md    | 20 ++++++++++++++++++++
 app/__init__.py |  2 +-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2135a0f..0f76374 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,8 +6,28 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/).
 
 ## [Unreleased]
 
+## [1.1.0] - 2026-07-03
+
 ### Added
 
+- **United Kingdom (ITL) support** (#7): the service can now resolve UK postcodes
+  to [ITL](https://www.ons.gov.uk/methodology/geography/ukgeographies/eurostat)
+  (International Territorial Level) codes — the UK's post-Brexit successor to
+  NUTS. Sourced from the ONS [National Statistics Postcode Lookup
+  (NSPL)](https://geoportal.statistics.gov.uk/), loaded only when
+  `PC2NUTS_NSPL_URL` is configured (the ~178 MB dataset is not bundled). UK is
+  treated as a parallel data channel: it reuses the same in-memory lookup, SQLite
+  cache, and waterfall as TERCET, and an NSPL failure never blocks TERCET serving.
+  - New response field **`code_system`** (`"NUTS"` | `"ITL"`) on `/lookup`
+    (additive, non-breaking) marks which scheme the `nuts1/2/3` fields carry.
+    ITL diverges from NUTS-2016 UK at L2/L3, so consumers should branch on it.
+  - **`country=GB` accepted** as an alias for `UK` (like `GR → EL`).
+  - **Outward-code lookup**: outward-only input (e.g. `SW1A`) or an unlisted
+    full postcode resolves to the majority-vote ITL3 for that outward code with
+    `match_type="estimated"` and medium confidence.
+  - New config: `PC2NUTS_NSPL_URL`, `PC2NUTS_ITL_NAMES_URLS`. `patterns_version`
+    bumped to `1.3`. Crown Dependencies (JE/GG/IM) and Gibraltar (GI) are out of
+    scope and return `400`.
 - **Albania coverage completeness** (#118): AL postal codes now resolve via the
   official postal-code block-allocation scheme (`app/albania_blocks.py`) instead
   of the incomplete GeoNames estimates. A code maps to its NUTS3 region by its
diff --git a/app/__init__.py b/app/__init__.py
index 5becc17..6849410 100644
--- a/app/__init__.py
+++ b/app/__init__.py
@@ -1 +1 @@
-__version__ = "1.0.0"
+__version__ = "1.1.0"

From 03c267f02c565177f28f5da7ab64439cde9ad3c8 Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 22:17:35 +0200
Subject: [PATCH 17/18] style: apply ruff format to data_loader

---
 app/data_loader.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/app/data_loader.py b/app/data_loader.py
index 814dce6..e5528dc 100644
--- a/app/data_loader.py
+++ b/app/data_loader.py
@@ -363,9 +363,7 @@ def _parse_csv_content(
     return count
 
 
-def _download_zip_conditional(
-    client: httpx.Client, url: str, cached_meta: dict
-) -> httpx.Response:
+def _download_zip_conditional(client: httpx.Client, url: str, cached_meta: dict) -> httpx.Response:
     """Download with conditional-GET headers; returns the raw httpx.Response.
 
     cached_meta keys: 'etag' and 'last_modified' (either may be absent). The

From 9050047b387404b9fef6216392393734d9a23adc Mon Sep 17 00:00:00 2001
From: bk86a <michal@osmenda.com>
Date: Fri, 3 Jul 2026 22:25:27 +0200
Subject: [PATCH 18/18] fix: address Codex review on UK/ITL (#7)

- outward miss for outward_only countries no longer falls through to generic
  prefix estimation (would answer from an arbitrary 1-2 char prefix)
- reuse cached nspl.zip when the NSPL fetch transiently fails, so an ONS
  outage does not silently drop UK support on a rebuild
- bust the SQLite fast-path cache when NSPL/ITL-names config changes, so
  enabling/disabling/swapping the URL takes effect without waiting for TTL
---
 .gitignore                    |   3 +
 app/data_loader.py            | 118 ++++++++++++++++++++++++----------
 tests/test_api.py             |  12 +---
 tests/test_config.py          |   1 +
 tests/test_data_loader.py     |  65 +++++++++++++++++--
 tests/test_postal_patterns.py |   3 +
 6 files changed, 154 insertions(+), 48 deletions(-)

diff --git a/.gitignore b/.gitignore
index 119d6e8..1a4ca0e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,6 @@ local-data/
 # Private planning / SDD artifacts — infra context + operator token; never commit or push
 docs/superpowers/
 .superpowers/
+
+# Local SQLite cache artifacts
+postalcode2nuts_NUTS-*.db
diff --git a/app/data_loader.py b/app/data_loader.py
index e5528dc..b81d4b3 100644
--- a/app/data_loader.py
+++ b/app/data_loader.py
@@ -155,6 +155,20 @@ def _extra_sources_hash() -> str:
     return hashlib.sha256(joined.encode()).hexdigest()[:16]
 
 
+def _nspl_config_hash() -> str:
+    """SHA-256 hash (16 hex chars) of the NSPL / ITL-names configuration.
+
+    Returns empty string when NSPL is unconfigured, so a TERCET-only deployment's
+    cache stays valid. Enabling, disabling, or changing PC2NUTS_NSPL_URL /
+    PC2NUTS_ITL_NAMES_URLS changes the hash, busting the fast-path cache so UK
+    rows are added (or dropped) on the next load instead of after TTL expiry.
+    """
+    if not settings.nspl_url and not settings.itl_names_url_list:
+        return ""
+    joined = settings.nspl_url + "|" + ",".join(settings.itl_names_url_list)
+    return hashlib.sha256(joined.encode()).hexdigest()[:16]
+
+
 def _load_extra_sources(client: httpx.Client, cache_dir: Path, *, deadline: float = 0) -> int:
     """Download and parse extra data source ZIPs. Returns total entries written."""
     global _extra_source_count
@@ -477,12 +491,47 @@ def _download_and_parse_zip(
     return total
 
 
+def _parse_nspl_zip(content: bytes) -> int:
+    """Parse NSPL ZIP bytes and load live UK postcode → ITL3 rows into _lookup.
+
+    Returns the number of rows added. Raises zipfile.BadZipFile for invalid input.
+    """
+    total = 0
+    with zipfile.ZipFile(io.BytesIO(content)) as zf:
+        for name in zf.namelist():
+            # The postcode CSV is the "NSPL*.csv" (real releases ship it under
+            # Data/); other bundled CSVs (user guide, column lookups) lack the
+            # pcds/itl columns and are ignored by _parse_csv_content anyway.
+            if not name.lower().endswith(".csv") or "nspl" not in name.lower():
+                continue
+            file_size = zf.getinfo(name).file_size
+            if file_size > _MAX_NSPL_UNCOMPRESSED_SIZE:
+                logger.warning(
+                    "Skipping %s: uncompressed size %d exceeds NSPL limit %d",
+                    name,
+                    file_size,
+                    _MAX_NSPL_UNCOMPRESSED_SIZE,
+                )
+                continue
+            raw = zf.read(name)
+            for enc in ("utf-8-sig", "utf-8", "latin-1"):
+                try:
+                    text = raw.decode(enc)
+                    break
+                except UnicodeDecodeError:
+                    continue
+            total += _parse_csv_content(text, "UK", overwrite=False, skip_terminated=True)
+    return total
+
+
 def _load_nspl(client: httpx.Client, url: str, cache_dir: Path) -> int:
     """Fetch the NSPL ZIP and load UK postcode → ITL3 entries into _lookup.
 
-    Returns the number of rows added. Returns 0 when url is empty or any error
-    occurs — an NSPL failure must never block TERCET-only operation. Terminated
-    postcodes (non-blank DOTERM) are filtered out. UK is registered in the loaded
+    Returns the number of rows added. Returns 0 when url is empty. An NSPL
+    failure must never block TERCET-only operation, so on a fetch/parse failure
+    the previously-cached nspl.zip is reused when present (a transient ONS outage
+    must not silently drop UK support for a configured deployment). Terminated
+    postcodes (non-blank DOTERM) are filtered out. UK registers in the loaded
     country set automatically because its rows land in _lookup.
     """
     if not url:
@@ -491,47 +540,38 @@ def _load_nspl(client: httpx.Client, url: str, cache_dir: Path) -> int:
     try:
         resp = _download_zip_conditional(client, url, {})
         if resp.status_code == 304:
-            # Unchanged upstream — nothing to (re)load this run.
-            return 0
+            # Unchanged upstream — reload from the cached copy if we have one.
+            return _load_nspl_from_cache(cache_path)
         resp.raise_for_status()
         content = resp.content
         if not zipfile.is_zipfile(io.BytesIO(content)):
-            logger.warning("NSPL response from %s is not a valid ZIP, skipping", url)
-            return 0
+            logger.warning("NSPL response from %s is not a valid ZIP", url)
+            return _load_nspl_from_cache(cache_path)
         try:
             cache_path.write_bytes(content)
         except OSError as exc:
             logger.warning("Failed to cache NSPL ZIP: %s", exc)
-
-        total = 0
-        with zipfile.ZipFile(io.BytesIO(content)) as zf:
-            for name in zf.namelist():
-                # The postcode CSV is the "NSPL*.csv" (real releases ship it under
-                # Data/); other bundled CSVs (user guide, column lookups) lack the
-                # pcds/itl columns and are ignored by _parse_csv_content anyway.
-                if not name.lower().endswith(".csv") or "nspl" not in name.lower():
-                    continue
-                file_size = zf.getinfo(name).file_size
-                if file_size > _MAX_NSPL_UNCOMPRESSED_SIZE:
-                    logger.warning(
-                        "Skipping %s: uncompressed size %d exceeds NSPL limit %d",
-                        name,
-                        file_size,
-                        _MAX_NSPL_UNCOMPRESSED_SIZE,
-                    )
-                    continue
-                raw = zf.read(name)
-                for enc in ("utf-8-sig", "utf-8", "latin-1"):
-                    try:
-                        text = raw.decode(enc)
-                        break
-                    except UnicodeDecodeError:
-                        continue
-                total += _parse_csv_content(text, "UK", overwrite=False, skip_terminated=True)
+        total = _parse_nspl_zip(content)
         logger.info("NSPL loaded: %d live UK postcodes", total)
         return total
     except (httpx.HTTPError, zipfile.BadZipFile, OSError) as exc:
-        logger.warning("NSPL load failed: %s", exc)
+        logger.warning("NSPL fetch failed (%s); trying cached copy", exc)
+        return _load_nspl_from_cache(cache_path)
+
+
+def _load_nspl_from_cache(cache_path: Path) -> int:
+    """Load UK rows from a previously-cached nspl.zip. Returns 0 if unavailable."""
+    if not cache_path.is_file():
+        return 0
+    try:
+        content = cache_path.read_bytes()
+        if not zipfile.is_zipfile(io.BytesIO(content)):
+            return 0
+        total = _parse_nspl_zip(content)
+        logger.info("NSPL loaded from cache: %d live UK postcodes", total)
+        return total
+    except (zipfile.BadZipFile, OSError) as exc:
+        logger.warning("Cached NSPL ZIP unusable: %s", exc)
         return 0
 
 
@@ -564,6 +604,10 @@ def _db_is_valid(db: Path) -> bool:
         if stored_hash != _extra_sources_hash():
             logger.info("Extra sources configuration changed, will rebuild")
             return False
+        # Check if NSPL / ITL-names configuration changed (enable/disable/URL swap)
+        if meta.get("nspl_config_hash", "") != _nspl_config_hash():
+            logger.info("NSPL configuration changed, will rebuild")
+            return False
         return True
     except (sqlite3.Error, KeyError, ValueError) as exc:
         logger.info("DB cache unusable (%s), will rebuild", exc)
@@ -1038,6 +1082,7 @@ def _save_to_db(db: Path) -> None:
                     ("estimate_count", str(len(_estimates))),
                     ("nuts_names_count", str(len(_nuts_names))),
                     ("extra_sources_hash", _extra_sources_hash()),
+                    ("nspl_config_hash", _nspl_config_hash()),
                 ],
             )
             con.commit()
@@ -1314,6 +1359,11 @@ def lookup(country_code: str, postal_code: str) -> dict | None:
                 nuts2_confidence=conf["nuts2"],
                 nuts3_confidence=conf["nuts3"],
             )
+        # Outward is the authoritative boundary for outward_only countries. A
+        # miss means the code isn't in NSPL — stop here rather than fall through
+        # to generic prefix estimation, which would answer from an arbitrary 1–2
+        # character prefix (e.g. "SW" for an unknown SW99, mixing distinct ITL3s).
+        return None
 
     # Tier 3: Runtime prefix-based estimation
     approx = _estimate_by_prefix(cc, extracted)
diff --git a/tests/test_api.py b/tests/test_api.py
index 41f05e4..8aac62d 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -454,9 +454,7 @@ def test_200_on_successful_refresh(self, trusted_client, monkeypatch):
         )
 
         async def fake_refresh(client=None):
-            return RefreshResult(
-                status="refreshed", previous_count=7000, new_count=7042, skipped_rows=0
-            )
+            return RefreshResult(status="refreshed", previous_count=7000, new_count=7042, skipped_rows=0)
 
         monkeypatch.setattr(estimates_refresh, "refresh_estimates_once", fake_refresh)
 
@@ -483,9 +481,7 @@ def test_200_on_unchanged_refresh(self, trusted_client, monkeypatch):
         )
 
         async def fake_refresh(client=None):
-            return RefreshResult(
-                status="unchanged", previous_count=7000, new_count=7000, skipped_rows=0
-            )
+            return RefreshResult(status="unchanged", previous_count=7000, new_count=7000, skipped_rows=0)
 
         monkeypatch.setattr(estimates_refresh, "refresh_estimates_once", fake_refresh)
 
@@ -510,9 +506,7 @@ def test_502_on_failed_refresh(self, trusted_client, monkeypatch):
         )
 
         async def fake_refresh(client=None):
-            return RefreshResult(
-                status="failed", previous_count=7000, new_count=7000, reason="http=503"
-            )
+            return RefreshResult(status="failed", previous_count=7000, new_count=7000, reason="http=503")
 
         monkeypatch.setattr(estimates_refresh, "refresh_estimates_once", fake_refresh)
 
diff --git a/tests/test_config.py b/tests/test_config.py
index 8af0aaf..cdbb8ae 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -61,6 +61,7 @@ def test_interval_negative_is_rejected(self, monkeypatch):
 
 def test_synthetic_nuts_fallback_has_fo():
     from app.config import settings
+
     assert settings.synthetic_nuts_fallback.get("FO") == "FO000"
 
 
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index 349e4e6..7166c92 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -183,6 +183,7 @@ def test_tier6_fo_rejects_two_digit(self, mock_data):
 
     def test_tier6_fo_in_loaded_countries(self, mock_data):
         from app.data_loader import get_loaded_countries
+
         assert "FO" in get_loaded_countries()
 
 
@@ -272,8 +273,18 @@ def test_al_stays_in_loaded_countries(self):
 
 class TestBundledAlbaniaData:
     VALID_AL_NUTS3 = {
-        "AL011", "AL012", "AL013", "AL014", "AL015", "AL021",
-        "AL022", "AL031", "AL032", "AL033", "AL034", "AL035",
+        "AL011",
+        "AL012",
+        "AL013",
+        "AL014",
+        "AL015",
+        "AL021",
+        "AL022",
+        "AL031",
+        "AL032",
+        "AL033",
+        "AL034",
+        "AL035",
     }
 
     def test_no_al_rows_remain_in_estimates_csv(self):
@@ -382,6 +393,19 @@ def handler(request):
         client = httpx.Client(transport=httpx.MockTransport(handler))
         assert data_loader._load_nspl(client, "https://example.com/x.zip", tmp_path) == 0
 
+    def test_transient_failure_reuses_cached_zip(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(data_loader, "_lookup", {})
+        # Seed the on-disk cache from a prior successful run.
+        (tmp_path / "nspl.zip").write_bytes(self._zip_bytes("pcds,itl,doterm\nSW1A 2AA,TLI32,\n"))
+
+        def handler(request):
+            raise httpx.ConnectError("ons down")
+
+        client = httpx.Client(transport=httpx.MockTransport(handler))
+        count = data_loader._load_nspl(client, "https://example.com/x.zip", tmp_path)
+        assert count == 1
+        assert data_loader._lookup[("UK", "SW1A2AA")] == "TLI32"
+
     def test_nspl_failure_does_not_block_tercet(self, tmp_path, monkeypatch):
         """If NSPL is unreachable, previously-loaded TERCET data must still serve."""
         monkeypatch.setattr(data_loader, "_lookup", {("AT", "1010"): "AT130"})
@@ -424,6 +448,11 @@ def test_unlisted_full_postcode_resolves_via_outward(self, mock_data):
     def test_unknown_outward_returns_none(self, mock_data):
         assert lookup("UK", "ZZ99") is None
 
+    def test_outward_miss_does_not_fall_through_to_prefix(self, mock_data):
+        # "SW99 9ZZ" shares the "SW" prefix with SW1A… but SW99 is not a known
+        # outward; must NOT return an arbitrary prefix-based ITL3 — stop instead.
+        assert lookup("UK", "SW99 9ZZ") is None
+
     def test_uk_result_tagged_itl(self, mock_data):
         assert lookup("UK", "SW1A 2AA")["code_system"] == "ITL"
         assert lookup("UK", "SW1A")["code_system"] == "ITL"
@@ -494,6 +523,34 @@ def handler(request):
         assert data_loader._load_itl_names(client, ["https://example.com/x.csv"]) == 0
 
 
+class TestNSPLConfigHash:
+    def test_empty_when_unconfigured(self, monkeypatch):
+        monkeypatch.setattr(data_loader.settings, "nspl_url", "", raising=False)
+        monkeypatch.setattr(data_loader.settings, "itl_names_urls", "", raising=False)
+        assert data_loader._nspl_config_hash() == ""
+
+    def test_changes_when_url_set(self, monkeypatch):
+        monkeypatch.setattr(data_loader.settings, "nspl_url", "", raising=False)
+        monkeypatch.setattr(data_loader.settings, "itl_names_urls", "", raising=False)
+        empty = data_loader._nspl_config_hash()
+        monkeypatch.setattr(data_loader.settings, "nspl_url", "https://ons/nspl.zip", raising=False)
+        assert data_loader._nspl_config_hash() != empty
+        assert data_loader._nspl_config_hash() != ""
+
+    def test_db_invalidated_when_nspl_config_changes(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(data_loader.settings, "nspl_url", "", raising=False)
+        monkeypatch.setattr(data_loader.settings, "itl_names_urls", "", raising=False)
+        monkeypatch.setattr(data_loader, "_lookup", {("AT", "1010"): "AT130"})
+        monkeypatch.setattr(data_loader, "_estimates", {})
+        monkeypatch.setattr(data_loader, "_nuts_names", {})
+        db = tmp_path / "cache.db"
+        data_loader._save_to_db(db)
+        assert data_loader._db_is_valid(db) is True
+        # Operator now enables NSPL → cache must be considered invalid.
+        monkeypatch.setattr(data_loader.settings, "nspl_url", "https://ons/nspl.zip", raising=False)
+        assert data_loader._db_is_valid(db) is False
+
+
 class TestConditionalGet:
     def test_sends_conditional_headers_when_etag_known(self):
         captured = {}
@@ -507,9 +564,7 @@ def handler(request: httpx.Request) -> httpx.Response:
             "etag": '"abc123"',
             "last_modified": "Wed, 01 Jan 2025 00:00:00 GMT",
         }
-        result = data_loader._download_zip_conditional(
-            client, "https://example.com/foo.zip", cached_meta
-        )
+        result = data_loader._download_zip_conditional(client, "https://example.com/foo.zip", cached_meta)
         assert result.status_code == 304
         assert captured["headers"]["if-none-match"] == '"abc123"'
         assert captured["headers"]["if-modified-since"] == "Wed, 01 Jan 2025 00:00:00 GMT"
diff --git a/tests/test_postal_patterns.py b/tests/test_postal_patterns.py
index 4f57392..918497e 100644
--- a/tests/test_postal_patterns.py
+++ b/tests/test_postal_patterns.py
@@ -151,10 +151,12 @@ def test_me_with_space_prefix(self):
 class TestFaroeIslands:
     def test_bare_three_digits(self):
         from app.postal_patterns import extract_postal_code
+
         assert extract_postal_code("FO", "100") == "100"
 
     def test_prefixed_variants(self):
         from app.postal_patterns import extract_postal_code
+
         assert extract_postal_code("FO", "FO-100") == "100"
         assert extract_postal_code("FO", "FO 100") == "100"
         assert extract_postal_code("FO", "FO100") == "100"
@@ -163,6 +165,7 @@ def test_rejects_non_three_digit(self):
         # No regex match → falls back to normalize_postal_code(cleaned),
         # which is NOT a clean 3-digit extraction.
         from app.postal_patterns import _COMPILED
+
         pat = _COMPILED["FO"]
         assert pat.match("1234") is None
         assert pat.match("ABC") is None