From ce3b6195704dfc084668dcf68a85a99881095f0b Mon Sep 17 00:00:00 2001 From: mcarans Date: Wed, 10 Jun 2026 15:29:49 +1200 Subject: [PATCH] Add sector mappings --- CLAUDE.md | 58 +++++++++++++++++++ .../pipelineutils/sector_configuration.yaml | 17 ++++++ 2 files changed, 75 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..c56e93c --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,58 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +**hdx-python-pipelineutils** is a shared utility library for HDX data pipeline scrapers. It provides reusable components for reading data, resolving admin levels, and looking up sector and org-type codes from YAML-driven configuration. + +## Commands + +Install dependencies: +```bash +uv sync +``` + +Run tests: +```bash +uv run pytest +``` + +Run a single test: +```bash +uv run pytest tests/test_lookup.py +``` + +Lint check: +```bash +pre-commit run --all-files +``` + +## Architecture + +Source lives under `src/hdx/pipelineutils/`: + +- **`reader.py`** — `Read` (subclass of `hdx-python-utilities` `Retrieve`): downloads and caches tabular data (CSV, XLS, XLSX), with save/use-saved modes, date extraction from time periods, and HDX dataset/resource helpers. +- **`lookup.py`** — `Lookup`: base class for YAML-driven code lookups. Loads a configuration YAML, builds normalised name→code mappings, and resolves codes via `hdx-python-country`'s fuzzy matching. +- **`org_type.py`** — `OrgType(Lookup)`: resolves organisation-type codes using `org_type_configuration.yaml`. +- **`sector.py`** — `Sector(Lookup)`: resolves sector codes using `sector_configuration.yaml`. +- **`hapi_admins.py`** — `complete_admins()`: populates adm-level names from p-codes and vice-versa using `hdx-python-country` `AdminLevel` objects; returns the resolved level and a list of warnings. +- **`__init__.py`** — shared helpers: `string_params_to_dict`, `match_template` (extracts `{{...}}` placeholders), `get_startend_dates_from_time_period`. + +## Testing + +Tests are in `tests/` and use `pytest`. No live HDX connection is required — data is mocked or loaded from `tests/fixtures/`. Coverage is written to `coverage.lcov` and JUnit XML to `test-results.xml`. + +## Code Style + +Formatted and linted with `ruff` (rules: E, F, I, UP; line-length not enforced). Python ≥ 3.10. Use `X | Y` union syntax (PEP 604), not `Optional`/`Union`. Google-style docstrings with `Args:` and `Returns:` sections. + +## Collaboration Style + +- Be objective, not agreeable. Act as a partner, not a sycophant. Push back when you disagree, flag tradeoffs honestly, and don't sugarcoat problems. +- Keep explanations brief and to the point. +- Don't rely on recalled knowledge for facts that could be stale (API behaviour, library versions, external systems). Search or read the actual source first. + +## Scope of Changes + +When fixing a bug or addressing PR feedback, change only what is necessary to resolve the specific issue. Do not refactor surrounding code, rename variables, adjust formatting, or make improvements in the same commit unless they are directly required by the fix. diff --git a/src/hdx/pipelineutils/sector_configuration.yaml b/src/hdx/pipelineutils/sector_configuration.yaml index d3c3552..44a786c 100644 --- a/src/hdx/pipelineutils/sector_configuration.yaml +++ b/src/hdx/pipelineutils/sector_configuration.yaml @@ -32,6 +32,7 @@ initial_lookup: agriculture: "FSC" agua saneamiento e higiene: "WSH" all: "Intersectoral" + alojamiento: "SHL" alojamiento de emergencia: "SHL" alojamiento de emergencia shelter: "SHL" alojamiento energía y enseres: "SHL" @@ -46,6 +47,7 @@ initial_lookup: cccm: "CCM" ccs: "CCM" cluster coordination: "CCM" + coord log support services: "CCM" coord services support: "CCM" coordinacion informacion: "CCM" coord support services: "CCM" @@ -55,6 +57,7 @@ initial_lookup: cp: "PRO-CPN" css: "CCM" eah: "WSH" + early recovery livelihoods: "ERY" eau: "WSH" eau assainissement et hygiene: "WSH" eau hygiene: "WSH" @@ -75,6 +78,7 @@ initial_lookup: food: "FSC" food safety: "FSC" food security and agriculture: "FSC" + food security and agriculture working group: "FSC" food security and livelihoods: "FSC" food security and nutrition: "FSC" food security livelihood: "FSC" @@ -84,6 +88,7 @@ initial_lookup: gbv: "PRO-GBV" general protection: "PRO" global protection: "PRO" + grupo de trabajo de infraestructura y alojamientos temporales: "SHL" hlp: "PRO-HLP" housing land property: "PRO-HLP" humanitaire: "Hum" @@ -106,6 +111,13 @@ initial_lookup: nutricion: "NUT" nutrition: "NUT" operatioanl presence water sanitation hygiene: "WSH" + operational capacity education in emergencies: "EDU" + operational capacity emergency shelter non food items: "SHL" + operational capacity food security agriculture: "FSC" + operational capacity health: "HEA" + operational capacity nutrition: "NUT" + operational capacity protection: "PRO" + operational capacity water sanitation hygiene: "WSH" operational presence education in emergencies: "EDU" operational presence emergency shelter non food items: "SHL" operational presence food security agriculture: "FSC" @@ -115,6 +127,7 @@ initial_lookup: pro cpm: "PRO-CPN" pronna: "PRO-CPN" propg: "PRO" + proteccion accion contra minas: "PRO-MIN" proteccion infantil: "PRO-CPN" proteccion ninos ninas adolescentes: "PRO-CPN" proteccion violencia de genero: "PRO-GBV" @@ -151,6 +164,7 @@ initial_lookup: same: "FSC" samv: "FSC" sante: "HEA" + secal: "FSC" securite alimentaire: "FSC" securite alimentaire et moyen dexistence: "FSC" seguridad alimentaria: "FSC" @@ -160,6 +174,7 @@ initial_lookup: shelter: "SHL" shelter nfi: "SHL" shelter nfis: "SHL" + shelter non food items: "SHL" shelter and nfi: "SHL" shelter and nfis: "SHL" shelter and non food items: "SHL" @@ -170,7 +185,9 @@ initial_lookup: telecommunications: "TEL" telecommunications durgence: "TEL" telecomunicaciones de emergencia: "TEL" + transferencias monetarias multiproposito: "Cash" transversal: "Multi" + transversal peas: "PRO-GBV" vbg: "PRO-GBV" violences basees sur le genre: "PRO-GBV" violence basee sur le genre: "PRO-GBV"