diff --git a/DESCRIPTION b/DESCRIPTION index 291d514..46c169c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: link Title: Stream Network Habitat Interpretation (Experimental) -Version: 0.40.5 -Date: 2026-05-26 +Version: 0.41.0 +Date: 2026-05-27 Authors@R: c( person("Allan", "Irvine", , "airvine@newgraphenvironment.com", role = c("aut", "cre"), @@ -30,12 +30,12 @@ Imports: yaml Remotes: NewGraphEnvironment/crate, - NewGraphEnvironment/fresh@v0.31.0 + NewGraphEnvironment/fresh@v0.32.0 Suggests: bcdata, digest, dplyr, - fresh (>= 0.31.0), + fresh (>= 0.32.0), lintr, mockery, sf, diff --git a/NAMESPACE b/NAMESPACE index 1d40ce6..877b309 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,6 +48,7 @@ export(lnk_source) export(lnk_stamp) export(lnk_stamp_finish) export(lnk_thresholds) +export(lnk_wsg_resolve) import(DBI) importFrom(RPostgres,Postgres) importFrom(utils,read.csv) diff --git a/NEWS.md b/NEWS.md index ed03e0e..2ee4c7d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# link 0.41.0 + +New exported function `lnk_wsg_resolve()` — the bundle-aware "what WSGs should we model?" resolver ([#207](https://github.com/NewGraphEnvironment/link/issues/207)). Composes the FWA drainage closure (now a fresh primitive: `fresh::frs_wsg_drainage()`, [NewGraphEnvironment/fresh#211](https://github.com/NewGraphEnvironment/fresh/pull/212) / fresh v0.32.0) with the bundle's `wsg_species_presence` filter (link#157). Three call patterns dispatched by `(wsgs, expand)`: province mode (`wsgs = NULL` → all bundle-species WSGs, sorted alphabetically), closure mode (`wsgs = c(...), expand = TRUE` → focal + drainage closure, DS-first preserved), strict mode (`wsgs = c(...), expand = FALSE` → species-filter input verbatim). Validation mirrors `lnk_pipeline_species`; closure mode opens its own DB conn via `lnk_db_conn()` with `on.exit` cleanup; closure + strict modes emit `message()` listing any species-less WSGs dropped from the result (parity with the previous inline diagnostic). New `@family wsg` — pre-stages a `lnk_wsg_*` family for follow-on topology helpers (e.g. cross-host DS-first bucketing). + +`data-raw/study_area_wsgs.R` shrinks 76 → 33 lines — pure CLI shim now, delegating to `lnk_wsg_resolve()`. Stdout is **byte-identical** for the regression baseline (`PARS,BULK` → the exact 15-WSG closure `KISP, KLUM, LKEL, LSKE, MSKE, USKE, BULK, FINA, LBTN, LPCE, MORR, PARA, PCEA, UPCE, PARS`), so `data-raw/study_area_run.sh` and downstream consumers are unchanged. fresh dependency pin: `Remotes: NewGraphEnvironment/fresh@v0.31.0 → @v0.32.0`. 22 tests added (`tests/testthat/test-lnk_wsg_resolve.R`): arg validation, stub-based province/strict (stub deliberately non-alphabetical so `sort()` is load-bearing), live-DB closure + province (gated on `skip_if_no_db()`). + # link 0.40.5 Tunnel-free per-segment `mapping_code` parity for the 3 FWCP study areas ([#175](https://github.com/NewGraphEnvironment/link/issues/175)) — 50 drainage-closed WSGs across Peace / Fraser / Skeena, authoritative median match 99.66% / mean 99.11% / 130 of 148 rows ≥99%. Built around a new `lnk_access()` export ([#205](https://github.com/NewGraphEnvironment/link/issues/205)) — the portable access builder that's the missing twin of `lnk_mapping_code`. Its `merge = TRUE` mode is the cheap post-consolidate recompute: rebuild only access + mapping_code from persisted streams/habitat/barriers (no streams segmentation or habitat classification re-derived), ~8× faster than the full-pipeline path (FINA 11.9 s wall vs ~90 s, identical bcfp parity). Methodology is now correctness-regardless-of-bucketing — distribute (any bucketing) → consolidate → recompute → compare — with the recompute as the correctness guarantee, bucketing as a speed knob. diff --git a/R/lnk_wsg_resolve.R b/R/lnk_wsg_resolve.R new file mode 100644 index 0000000..bf55379 --- /dev/null +++ b/R/lnk_wsg_resolve.R @@ -0,0 +1,126 @@ +#' Resolve the Set of Watershed Groups to Model +#' +#' Bundle-aware WSG resolver. Given a config + loaded overrides and an +#' optional focal set, returns the character vector of WSG codes that +#' should be modelled — composing FWA drainage closure (via +#' [fresh::frs_wsg_drainage()]) with the bundle's species-presence +#' filter (link#157). +#' +#' Three call patterns dispatched by `wsgs` + `expand`: +#' +#' - `wsgs = NULL` — *province mode*: every WSG in +#' `loaded$wsg_species_presence` that has at least one of +#' `cfg$species` flagged present. +#' - `wsgs = c(...)` + `expand = TRUE` (default) — *closure mode*: +#' expand the focal set to its drainage closure (focal + every WSG +#' they flow through, ordered downstream-first), then species-filter. +#' Opens a connection via [lnk_db_conn()] and closes it on exit. +#' - `wsgs = c(...)` + `expand = FALSE` — *strict mode*: species-filter +#' the input verbatim, no closure expansion, no DB. +#' +#' Species filter: a WSG is kept if *any* of `tolower(cfg$species)` +#' columns in `loaded$wsg_species_presence` carries `"t"` (or `"TRUE"` / +#' `TRUE`, defensively). DS-first ordering from the closure is preserved. +#' +#' @param cfg An `lnk_config` object from [lnk_config()]. +#' @param loaded Named list of tibbles from [lnk_load_overrides()]. +#' Must carry `wsg_species_presence`. +#' @param wsgs Character vector of focal WSG codes, or `NULL` (default) +#' for province mode. Codes are upper-cased internally before use. +#' @param expand Logical. When `wsgs` is non-`NULL`, `TRUE` (default) +#' closure-expands via [fresh::frs_wsg_drainage()]; `FALSE` uses the +#' input as-is (species-filter only). +#' +#' @return Character vector of WSG codes. Province mode returns the +#' species-filtered set sorted alphabetically; closure mode preserves the +#' downstream-first order from [fresh::frs_wsg_drainage()]; strict mode +#' preserves the caller-provided focal order. WSGs dropped by the +#' species filter (closure / strict modes) are reported via `message()`. +#' +#' @family wsg +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' cfg <- lnk_config("bcfishpass") +#' loaded <- lnk_load_overrides(cfg) +#' +#' # Province mode — all bundle-species WSGs +#' lnk_wsg_resolve(cfg, loaded) +#' +#' # Study-area mode — focal + drainage closure (default) +#' lnk_wsg_resolve(cfg, loaded, wsgs = c("PARS", "BULK")) +#' #> [1] "KISP" "KLUM" "LKEL" "LSKE" "MSKE" "USKE" "BULK" "FINA" +#' #> "LBTN" "LPCE" "MORR" "PARA" "PCEA" "UPCE" "PARS" +#' +#' # Strict mode — exactly these, species-filtered, no closure +#' lnk_wsg_resolve(cfg, loaded, wsgs = c("BBAR", "BULK"), expand = FALSE) +#' } +lnk_wsg_resolve <- function(cfg, loaded, wsgs = NULL, expand = TRUE) { + if (!inherits(cfg, "lnk_config")) { + stop("cfg must be an lnk_config object (from lnk_config())", + call. = FALSE) + } + if (!is.list(loaded)) { + stop("loaded must be a named list (from lnk_load_overrides())", + call. = FALSE) + } + if (!is.null(wsgs)) { + bad <- !is.character(wsgs) || length(wsgs) == 0L || + anyNA(wsgs) || !all(nzchar(wsgs)) + if (bad) { + stop("wsgs must be NULL or a non-empty character vector free of NA", + call. = FALSE) + } + } + if (!is.logical(expand) || length(expand) != 1L || is.na(expand)) { + stop("expand must be a single logical (TRUE or FALSE)", call. = FALSE) + } + + wp <- loaded$wsg_species_presence + if (is.null(wp) || !nrow(wp)) { + stop("loaded$wsg_species_presence is missing or empty — ", + "did `lnk_load_overrides(cfg)` populate it?", call. = FALSE) + } + spp_cols <- tolower(cfg$species %||% + unique(loaded$parameters_fresh$species_code)) + missing_cols <- setdiff(spp_cols, names(wp)) + if (length(missing_cols)) { + stop("loaded$wsg_species_presence missing species columns: ", + paste(missing_cols, collapse = ", "), call. = FALSE) + } + has_spp <- apply(wp[, spp_cols, drop = FALSE], 1, + function(r) any(r %in% c("t", "TRUE", TRUE))) + modelable <- wp$watershed_group_code[has_spp] + + # Province mode -------------------------------------------------------- + if (is.null(wsgs)) return(sort(modelable)) + + focal <- toupper(wsgs) + + # Strict mode ---------------------------------------------------------- + if (!expand) { + kept <- focal[focal %in% modelable] + dropped <- setdiff(focal, kept) + if (length(dropped)) { + message("lnk_wsg_resolve: dropped ", length(dropped), + " species-less WSG(s): ", paste(dropped, collapse = ", ")) + } + return(kept) + } + + # Closure mode --------------------------------------------------------- + conn <- lnk_db_conn() + on.exit(try(DBI::dbDisconnect(conn), silent = TRUE), add = TRUE) + closure <- fresh::frs_wsg_drainage(conn, focal) + # Preserve DS-first order from frs_wsg_drainage by indexing closure, + # not the modelable set + kept <- closure[closure %in% modelable] + dropped <- setdiff(closure, kept) + if (length(dropped)) { + message("lnk_wsg_resolve: dropped ", length(dropped), + " species-less closure WSG(s): ", paste(dropped, collapse = ", ")) + } + kept +} diff --git a/data-raw/study_area_wsgs.R b/data-raw/study_area_wsgs.R index eb1ae9c..6b04c7b 100755 --- a/data-raw/study_area_wsgs.R +++ b/data-raw/study_area_wsgs.R @@ -2,21 +2,12 @@ # study_area_wsgs.R — given a set of FOCAL watershed groups, print the # drainage-CLOSED, MODELABLE set in DOWNSTREAM-FIRST order (one comma line). # -# Closure: every WSG whose outlet wscode_ltree is an ancestor of (== at or -# downstream of) any focal WSG's outlet — i.e. the WSGs a focal WSG's water -# drains through. DS-first: ordered by outlet ltree depth ascending, so the -# most-downstream WSGs come first. Running a host's bucket in this order -# persists downstream dam barriers before upstream WSGs compute access, which -# is what makes cross-WSG `;DAM` correct from the per-host run (no recompute). +# Thin CLI shim around [link::lnk_wsg_resolve()] — see `?lnk_wsg_resolve` +# for the methodology (FWA drainage closure via fresh::frs_wsg_drainage() +# composed with the bundle's wsg_species_presence filter, link#157). # -# MODELABLE filter (link#157, mirrors data-raw/wsgs_run_host.R): drop closure -# WSGs with no bundle-species presence. lnk_pipeline_run errors hard ("No -# species resolved for AOI") on a species-less WSG (e.g. lower-mainstem groups -# pulled in by closure), which would abort the whole host run. bcfp doesn't -# model those WSGs either, so excluding them matches the proven methodology. -# -# Sources of truth: public.wsg_outlet (closure) + loaded$wsg_species_presence -# (modelable), both in fwapg / the bundle. +# Stdout: one line — comma-separated WSG codes (DS-first). Used by +# `data-raw/study_area_run.sh` to seed per-host buckets. # # Usage: [LNK_LOAD=loadall] Rscript study_area_wsgs.R [config] @@ -33,42 +24,11 @@ if (identical(Sys.getenv("LNK_LOAD"), "loadall")) { } else { suppressPackageStartupMessages(library(link)) } -suppressPackageStartupMessages({ - library(DBI); library(RPostgres) -}) -conn <- DBI::dbConnect(RPostgres::Postgres(), host = "localhost", port = 5432, - dbname = "fwapg", user = "postgres", password = "postgres") -on.exit(try(DBI::dbDisconnect(conn), silent = TRUE), add = TRUE) - -# 1. Drainage closure, DS-first. -focal_lit <- paste(DBI::dbQuoteLiteral(conn, focal), collapse = ", ") -q <- sprintf(" - SELECT DISTINCT w.wsg, nlevel(w.outlet) AS depth - FROM public.wsg_outlet w - JOIN public.wsg_outlet f ON f.wsg IN (%s) - WHERE f.outlet <@ w.outlet - ORDER BY depth ASC, w.wsg ASC", focal_lit) -res <- DBI::dbGetQuery(conn, q) -if (nrow(res) == 0L) { - stop("no closure found — are the focal WSGs present in public.wsg_outlet?", - call. = FALSE) -} -# 2. Modelable filter (link#157): keep only WSGs with bundle-species presence. -cfg <- lnk_config(config) -loaded <- lnk_load_overrides(cfg) -spp_cols <- tolower(cfg$species) -wp <- loaded$wsg_species_presence -has_spp <- apply(wp[, spp_cols, drop = FALSE], 1, - function(r) any(r %in% c("t", "TRUE", TRUE))) -modelable <- wp$watershed_group_code[has_spp] +cfg <- lnk_config(config) +loaded <- lnk_load_overrides(cfg) +keep <- lnk_wsg_resolve(cfg, loaded, wsgs = focal) -keep <- res$wsg[res$wsg %in% modelable] # preserves DS-first order -dropped <- setdiff(res$wsg, keep) -if (length(dropped) > 0L) { - message(sprintf("[study_area_wsgs] dropped %d species-less closure WSG(s): %s", - length(dropped), paste(dropped, collapse = ","))) -} if (length(keep) == 0L) { stop("no modelable WSGs after species-presence filter", call. = FALSE) } diff --git a/man/lnk_wsg_resolve.Rd b/man/lnk_wsg_resolve.Rd new file mode 100644 index 0000000..5a2a81a --- /dev/null +++ b/man/lnk_wsg_resolve.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lnk_wsg_resolve.R +\name{lnk_wsg_resolve} +\alias{lnk_wsg_resolve} +\title{Resolve the Set of Watershed Groups to Model} +\usage{ +lnk_wsg_resolve(cfg, loaded, wsgs = NULL, expand = TRUE) +} +\arguments{ +\item{cfg}{An \code{lnk_config} object from \code{\link[=lnk_config]{lnk_config()}}.} + +\item{loaded}{Named list of tibbles from \code{\link[=lnk_load_overrides]{lnk_load_overrides()}}. +Must carry \code{wsg_species_presence}.} + +\item{wsgs}{Character vector of focal WSG codes, or \code{NULL} (default) +for province mode. Codes are upper-cased internally before use.} + +\item{expand}{Logical. When \code{wsgs} is non-\code{NULL}, \code{TRUE} (default) +closure-expands via \code{\link[fresh:frs_wsg_drainage]{fresh::frs_wsg_drainage()}}; \code{FALSE} uses the +input as-is (species-filter only).} +} +\value{ +Character vector of WSG codes. Province mode returns the +species-filtered set sorted alphabetically; closure mode preserves the +downstream-first order from \code{\link[fresh:frs_wsg_drainage]{fresh::frs_wsg_drainage()}}; strict mode +preserves the caller-provided focal order. WSGs dropped by the +species filter (closure / strict modes) are reported via \code{message()}. +} +\description{ +Bundle-aware WSG resolver. Given a config + loaded overrides and an +optional focal set, returns the character vector of WSG codes that +should be modelled — composing FWA drainage closure (via +\code{\link[fresh:frs_wsg_drainage]{fresh::frs_wsg_drainage()}}) with the bundle's species-presence +filter (link#157). +} +\details{ +Three call patterns dispatched by \code{wsgs} + \code{expand}: +\itemize{ +\item \code{wsgs = NULL} — \emph{province mode}: every WSG in +\code{loaded$wsg_species_presence} that has at least one of +\code{cfg$species} flagged present. +\item \code{wsgs = c(...)} + \code{expand = TRUE} (default) — \emph{closure mode}: +expand the focal set to its drainage closure (focal + every WSG +they flow through, ordered downstream-first), then species-filter. +Opens a connection via \code{\link[=lnk_db_conn]{lnk_db_conn()}} and closes it on exit. +\item \code{wsgs = c(...)} + \code{expand = FALSE} — \emph{strict mode}: species-filter +the input verbatim, no closure expansion, no DB. +} + +Species filter: a WSG is kept if \emph{any} of \code{tolower(cfg$species)} +columns in \code{loaded$wsg_species_presence} carries \code{"t"} (or \code{"TRUE"} / +\code{TRUE}, defensively). DS-first ordering from the closure is preserved. +} +\examples{ +\dontrun{ +cfg <- lnk_config("bcfishpass") +loaded <- lnk_load_overrides(cfg) + +# Province mode — all bundle-species WSGs +lnk_wsg_resolve(cfg, loaded) + +# Study-area mode — focal + drainage closure (default) +lnk_wsg_resolve(cfg, loaded, wsgs = c("PARS", "BULK")) +#> [1] "KISP" "KLUM" "LKEL" "LSKE" "MSKE" "USKE" "BULK" "FINA" +#> "LBTN" "LPCE" "MORR" "PARA" "PCEA" "UPCE" "PARS" + +# Strict mode — exactly these, species-filtered, no closure +lnk_wsg_resolve(cfg, loaded, wsgs = c("BBAR", "BULK"), expand = FALSE) +} +} +\concept{wsg} diff --git a/planning/archive/2026-05-issue-207-lnk-wsg-resolve/.gitkeep b/planning/archive/2026-05-issue-207-lnk-wsg-resolve/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/planning/archive/2026-05-issue-207-lnk-wsg-resolve/README.md b/planning/archive/2026-05-issue-207-lnk-wsg-resolve/README.md new file mode 100644 index 0000000..fa94c38 --- /dev/null +++ b/planning/archive/2026-05-issue-207-lnk-wsg-resolve/README.md @@ -0,0 +1,11 @@ +# Issue #207 — lnk_wsg_resolve + study_area_wsgs.R → CLI shim + +## Outcome + +Added `lnk_wsg_resolve(cfg, loaded, wsgs = NULL, expand = TRUE)` — the bundle-aware WSG resolver that composes `fresh::frs_wsg_drainage()` (the FWA drainage-closure primitive from NewGraphEnvironment/fresh#211 / v0.32.0) with the bundle's `wsg_species_presence` filter (link#157). Three call patterns: province (`wsgs = NULL`, sorted alphabetically), closure (`wsgs + expand = TRUE`, focal + drainage DS-first), strict (`wsgs + expand = FALSE`, species-filter input verbatim). Closure + strict modes emit `message()` listing any species-less WSGs dropped — preserving the diagnostic the old script had inline. New `@family wsg` pre-stages the family for future `lnk_wsg_*` helpers. + +`data-raw/study_area_wsgs.R` shrank from 76 → 33 lines — pure CLI shim now, delegating to `lnk_wsg_resolve()`. **Byte-identical stdout** for the regression baseline (`PARS,BULK` → the 15-WSG `KISP, KLUM, LKEL, LSKE, MSKE, USKE, BULK, FINA, LBTN, LPCE, MORR, PARA, PCEA, UPCE, PARS`), so `data-raw/study_area_run.sh` is unaffected. fresh dependency pin bumped `Remotes: fresh@v0.31.0 → @v0.32.0`. 22 tests added (`tests/testthat/test-lnk_wsg_resolve.R`); /code-check ran 2 rounds on the function (3 findings → all fixed: undocumented province ordering, silent strict-mode drops, silent closure-mode drops) and 2 rounds on the tests (1 finding → stub deliberately reordered so `sort()` is load-bearing). + +Released as **v0.41.0**. + +Closed by: commits `196fd63` (Phase 1: fresh dep bump), `c7ae248` (Phase 2: function), `9a95081` (Phase 3: tests), `bb1a6ab` (Phase 4: shim), `c0735f3` (Release v0.41.0). PR forthcoming via `/gh-pr-push`. diff --git a/planning/archive/2026-05-issue-207-lnk-wsg-resolve/findings.md b/planning/archive/2026-05-issue-207-lnk-wsg-resolve/findings.md new file mode 100644 index 0000000..cf7c5cf --- /dev/null +++ b/planning/archive/2026-05-issue-207-lnk-wsg-resolve/findings.md @@ -0,0 +1,116 @@ +# Findings — lnk_wsg_resolve (#207) + +## Issue context + +### Problem + +`data-raw/study_area_wsgs.R` does three things inline: +1. Read `public.wsg_outlet`, compute drainage closure (DS-first) +2. Species-filter via the bundle's `wsg_species_presence` (#157) +3. Print a comma list to stdout + +It's callable only from bash. Not testable. Not reusable from R sessions, vignettes, or other drivers. The "what WSGs should we model?" decision is captured in a script when it should be a function. + +### Proposed + +```r +lnk_wsg_resolve(cfg, loaded, wsgs = NULL, expand = TRUE) +``` + +| Param | Role | +|---|---| +| `cfg` | `lnk_config()` manifest | +| `loaded` | `lnk_load_overrides(cfg)` — carries `wsg_species_presence` for the #157 filter (consistent with the rest of link's API) | +| `wsgs` | character vector of seed WSGs; `NULL` = all bundle-species WSGs (province mode) | +| `expand` | when `wsgs` is non-NULL: `TRUE` (default) = closure-expand via `fresh::frs_wsg_drainage`; `FALSE` = use as-is (species-filter only) | + +Returns: character vector of WSG codes, DS-first ordered when expanded. + +### Three call patterns + +```r +lnk_wsg_resolve(cfg, loaded) # province (all bundle-species WSGs) +lnk_wsg_resolve(cfg, loaded, wsgs = c("PARS","BULK")) # study-area + drainage closure (default) +lnk_wsg_resolve(cfg, loaded, wsgs = c("BBAR","BULK"), + expand = FALSE) # exactly these, species-filtered, no closure +``` + +### Acceptance + +- [ ] `lnk_wsg_resolve(cfg, loaded, wsgs = c("PARS","BULK"))` reproduces the current `study_area_wsgs.R` output: `KISP,KLUM,LKEL,LSKE,MSKE,USKE,BULK,FINA,LBTN,LPCE,MORR,PARA,PCEA,UPCE,PARS` (15 WSGs) +- [ ] `lnk_wsg_resolve(cfg, loaded)` returns the full bundle-species province list +- [ ] `expand = FALSE` returns input verbatim (after species-filter) +- [ ] Tests for all three call patterns +- [ ] Runnable `@example` +- [ ] `data-raw/study_area_wsgs.R` rewritten as CLI shim; `study_area_run.sh` adapted, interface unchanged + +### Blocked on / Composes with + +- NewGraphEnvironment/fresh#211 (`frs_wsg_drainage`) — **SHIPPED v0.32.0** (now unblocked) +- #157 (species-presence filter — the rule this function applies) + +## Codebase exploration + +### `lnk_config()` shape (`R/lnk_config.R`) + +Returns manifest with `cfg$species` (UPPERCASE character vector from rules.yaml keys, line 121: `rules_species <- names(yaml::read_yaml(rules_path))`), `cfg$rules`, `cfg$dimensions`, `cfg$files`, `cfg$pipeline`, `cfg$provenance`, `cfg$extends`. Class: `c("lnk_config", "list")`. + +### `loaded$wsg_species_presence` shape + +Tibble columns: +- `watershed_group_code` — UPPERCASE WSG identifier (e.g. "BULK") +- Per-species columns LOWERCASE: `bt, ch, cm, co, ct, dv, pk, rb, sk, st, wct` +- Optional `notes` +- Values: `"t"` (present) / `"f"` (absent) as STRINGS + +### Species filter idiom (`study_area_wsgs.R:60-64` and `wsgs_run_host.R:91-96` — two callers) + +```r +spp_cols <- tolower(cfg$species) +wp <- loaded$wsg_species_presence +has_spp <- apply(wp[, spp_cols, drop = FALSE], 1, + function(r) any(r %in% c("t", "TRUE", TRUE))) +modelable <- wp$watershed_group_code[has_spp] +``` + +Defensive against format drift (matches `"t"`, `"TRUE"` string, or `TRUE` boolean). + +### Closest sibling: `lnk_pipeline_species(cfg, loaded, aoi)` (`R/lnk_pipeline_species.R:41`) + +Same `cfg` + `loaded` validation pattern. Uses helper `.lnk_wsg_species_present(row)` from `R/utils.R:135` which works on ONE row (one WSG). The new function needs the vectorized form (all rows), which matches the `apply()` idiom in the inline scripts above. + +### Tests + +`tests/testthat/test-lnk_pipeline_species.R` uses both inline live `lnk_config("bcfishpass")` + `lnk_load_overrides(cfg)` AND stub fixtures: + +```r +cfg_stub <- structure(list( + species = c("BT", "CH", "CO", "SK", "ST", "WCT") +), class = c("lnk_config", "list")) +loaded_stub <- list( + wsg_species_presence = data.frame( + watershed_group_code = "ELKR", + bt = "t", ch = "f", cm = "f", co = "f", ct = "f", dv = "f", + pk = "f", rb = "f", sk = "f", st = "f", wct = "t", + stringsAsFactors = FALSE + ) +) +``` + +`skip_if_no_db()` helper (in `tests/testthat/setup.R`) gates live tests. + +### fresh dep + import idiom + +DESCRIPTION line 33: `Remotes: NewGraphEnvironment/fresh@v0.31.0` → needs bump to `@v0.32.0`. Convention: qualified calls (`fresh::frs_wsg_drainage()`) rather than `@importFrom`. + +### CLI shim (`study_area_wsgs.R`) + +Args parsing (lines 23-29): `args <- commandArgs(trailingOnly = TRUE)`; focal WSGs from `args[1]` (comma-separated, uppercased); config from `args[2]` (default `"bcfishpass"`). LNK_LOAD env idiom (lines 31-35): `loadall` → `pkgload::load_all`, else `library(link)`. Lines 39-41 open conn directly to `localhost:5432/fwapg postgres/postgres`. Lines 43-74 = inline closure + filter (the part being replaced). + +### `study_area_run.sh` interface + +`DISP_BUCKET=$(Rscript data-raw/study_area_wsgs.R "${FOCAL_ARR[0]}")` — captures stdout; bash `set -euo pipefail` means non-zero exit aborts. Stderr (warnings, messages) goes to logs but doesn't break the script. Implication: fresh#211's `warning()` on unmatched focals will appear in logs but not break anything; stdout must remain a single comma-separated WSG line. + +### Naming / family + +No existing `lnk_wsg_*` exports (only `lnk_compare_wsg` which is `@family compare`). New `@family wsg` recommended — pre-stages the family per issue body. diff --git a/planning/archive/2026-05-issue-207-lnk-wsg-resolve/progress.md b/planning/archive/2026-05-issue-207-lnk-wsg-resolve/progress.md new file mode 100644 index 0000000..3fbf37e --- /dev/null +++ b/planning/archive/2026-05-issue-207-lnk-wsg-resolve/progress.md @@ -0,0 +1,14 @@ +# Progress — lnk_wsg_resolve (#207) + +## Session 2026-05-27 + +- Plan-mode exploration — phases approved by user +- Archived #175 PWF (commit `f964537`, pushed to main) +- Created branch `207-lnk-wsg-resolve-bundle-aware-wsg-resolve` off main +- Scaffolded PWF baseline from issue #207 with approved 5-phase plan +- **Phase 1 complete:** DESCRIPTION bumped `fresh@v0.31.0 → @v0.32.0` (Remotes + Suggests); fresh v0.32.0 installed via pak (pkg sha `5e7fa81` matches fresh main); smoke test passed — `fresh::frs_wsg_drainage(conn, c("PARS","BULK"))` returns the exact 15-WSG closure from inside link's session. +- **Phase 2 complete:** Wrote `R/lnk_wsg_resolve.R` — signature `lnk_wsg_resolve(cfg, loaded, wsgs = NULL, expand = TRUE)`; validation mirrors `lnk_pipeline_species`; 3-branch dispatch (province/closure/strict); composes `fresh::frs_wsg_drainage()` for closure expansion; species filter inline. `/code-check` Round 1 caught 3 issues — (a) undocumented province ordering → now sorted alphabetically + documented in `@return`; (b) silent strict-mode drops → now `message()` with dropped list; (c) silent closure-mode drops → now `message()` (parity with `study_area_wsgs.R:67-71`); Round 2 Clean. Smoke-validated all four behaviors against live fwapg. Commit `196fd63`. Function commit pending. +- **Phase 3 complete:** Wrote `tests/testthat/test-lnk_wsg_resolve.R` — 13 test_that blocks / 22 expectations. Code-check Round 1 caught stub-was-pre-sorted bug (`sort()` not exercised) → reordered stub to `c("CCCC","AAAA","BBBB")` so positives are NOT in alpha order; Round 2 caught misleading test name → renamed. 22/22 PASS against live fwapg. Commit `c7ae248` (function); test commit pending. +- **Phase 4 complete:** `data-raw/study_area_wsgs.R` shrunk 76 → 33 lines; closure + filter + ordering block replaced with single `lnk_wsg_resolve()` call. Byte-identical stdout vs pre-#207 (76 bytes for `PARS,BULK` regression baseline). Stderr unchanged. `/code-check` Round 1 Clean. Commit `9a95081` (tests); shim commit pending. +- **Phase 5 release-prep:** NEWS.md `# link 0.41.0` section added (two paragraphs matching v0.40.x style); DESCRIPTION bumped 0.40.5 → 0.41.0, Date 2026-05-27. Lintr installed + run; 2 indent findings on R/lnk_wsg_resolve.R + test file → fixed by extracting `bad` predicate and lifting `empty_wp` out of nested `expect_error`. All 3 changed files lint clean. Tests still pass. Commit `bb1a6ab` (shim); Release commit pending. +- Next: Release v0.41.0 commit, then `/planning-archive` + `/gh-pr-push` diff --git a/planning/archive/2026-05-issue-207-lnk-wsg-resolve/task_plan.md b/planning/archive/2026-05-issue-207-lnk-wsg-resolve/task_plan.md new file mode 100644 index 0000000..1c99611 --- /dev/null +++ b/planning/archive/2026-05-issue-207-lnk-wsg-resolve/task_plan.md @@ -0,0 +1,85 @@ +# Task: lnk_wsg_resolve — bundle-aware WSG resolver (closure + species filter); study_area_wsgs.R → CLI shim (#207) + +## Problem + +`data-raw/study_area_wsgs.R` does three things inline: +1. Read `public.wsg_outlet`, compute drainage closure (DS-first) +2. Species-filter via the bundle's `wsg_species_presence` (#157) +3. Print a comma list to stdout + +It's callable only from bash. Not testable. Not reusable from R sessions, vignettes, or other drivers. The "what WSGs should we model?" decision is captured in a script when it should be a function. + +The closure piece just landed in fresh as `frs_wsg_drainage()` (NewGraphEnvironment/fresh#211 / v0.32.0). With that primitive in place, the remaining composition belongs in a link function. + +## Approach + +New exported function `lnk_wsg_resolve(cfg, loaded, wsgs = NULL, expand = TRUE)` returning a character vector of WSG codes. Three call patterns dispatched by the `wsgs` + `expand` args: + +- `wsgs = NULL` → province mode: every bundle-species WSG +- `wsgs = c(...) + expand = TRUE` (default) → focal + drainage closure via `fresh::frs_wsg_drainage`, then species-filter, DS-first ordering preserved +- `wsgs = c(...) + expand = FALSE` → strict mode: species-filter input verbatim (no closure) + +`study_area_wsgs.R` becomes a thin CLI shim around this function. + +### Critical files + +- **NEW** `R/lnk_wsg_resolve.R` — function + roxygen + `\dontrun{}` example +- **NEW** `tests/testthat/test-lnk_wsg_resolve.R` — arg-validation (stub) + live-DB (gated) +- **EDIT** `data-raw/study_area_wsgs.R` — shrink to CLI shim (~30 lines) +- **EDIT** `DESCRIPTION` — `Remotes: fresh@v0.31.0 → @v0.32.0`; `Suggests: fresh (>= 0.31.0) → (>= 0.32.0)`; `Version: 0.40.5 → 0.41.0` +- **EDIT** `NAMESPACE` + `man/lnk_wsg_resolve.Rd` — regenerated by `devtools::document()` +- **EDIT** `NEWS.md` — new `# link 0.41.0` section + +## Phase 1 — fresh dep bump + install + +- [x] Edit `DESCRIPTION`: `Remotes: NewGraphEnvironment/fresh@v0.31.0 → @v0.32.0` and `Suggests: fresh (>= 0.31.0) → (>= 0.32.0)` +- [x] Reinstall fresh into link's library via `pak::pkg_install("NewGraphEnvironment/fresh@v0.32.0")` — confirmed `packageVersion("fresh") == "0.32.0"`, `frs_wsg_drainage` present in fresh namespace +- [x] Smoke test from link's session: `fresh::frs_wsg_drainage(conn, c("PARS","BULK"))` returns the 15-WSG closure — exact match to expected `KISP, KLUM, LKEL, LSKE, MSKE, USKE, BULK, FINA, LBTN, LPCE, MORR, PARA, PCEA, UPCE, PARS` + +## Phase 2 — Write `R/lnk_wsg_resolve.R` + +- [x] Function with `cfg` + `loaded` validation mirroring `lnk_pipeline_species` (`R/lnk_pipeline_species.R:42-53`); 3-branch dispatch on `(wsgs, expand)`; `expand = TRUE` branch opens DB via `lnk_db_conn()` + `on.exit(dbDisconnect)`; species filter inline using `study_area_wsgs.R:60-64` idiom; preserve DS-first order via `closure[closure %in% modelable]`. Province mode sorts alphabetically (Round 1 fix); closure + strict modes `message()` listing dropped species-less WSGs (Round 1 fix, parity with CLI script). +- [x] Roxygen: `@family wsg`, `@param` blocks, `\dontrun{}` example showing all 3 call patterns; `@return` documents per-mode ordering + drop-message behaviour; `@export` +- [x] `devtools::document()` → regenerated `NAMESPACE` + `man/lnk_wsg_resolve.Rd` +- [x] Smoke-validated live: closure 15/15 exact match; strict returns input verbatim; province 217 WSGs sorted; species-less message fires +- [x] `/code-check` Round 1 (3 findings: undocumented province order, silent strict drops, silent closure drops) all fixed; Round 2 Clean → atomic commit + +## Phase 3 — Tests + +- [x] `tests/testthat/test-lnk_wsg_resolve.R` — 13 test_that blocks / 22 expectations: 6 arg-validation + 1 missing-wp + 1 missing-species-columns (no DB), 4 stub-based province/strict (stub deliberately not pre-sorted so `sort()` is load-bearing per code-check Round 1), 2 live-DB (gated on `skip_if_no_db()`): PARS+BULK exact 15-WSG closure + province mode ≥ 200 WSGs sorted +- [x] `devtools::test(filter = "lnk_wsg_resolve")` green — 22/22 PASS against live fwapg +- [x] `/code-check` Round 1: 1 finding (stub pre-sorted → province sort not actually exercised); fixed by reordering stub to `c("CCCC","AAAA","BBBB")`. Round 2: 1 naming finding (misleading test name); renamed. → atomic commit + +## Phase 4 — Convert `data-raw/study_area_wsgs.R` to CLI shim + +- [x] Rewrite: 76 → 33 lines. Args parsing + `LNK_LOAD=loadall` idiom preserved; closure + filter + ordering block replaced with one `lnk_wsg_resolve(cfg, loaded, wsgs = focal)` call; loud-fail on empty result preserved +- [x] **Byte-identical stdout** validated: `diff /tmp/pre207_stdout.txt /tmp/post207_stdout.txt` returns 0 for `PARS,BULK` input (76 bytes both) +- [x] Stderr unchanged (only pre-existing `cabd_blkey_xref.csv` warning from `lnk_load_overrides`); no new noise from `lnk_wsg_resolve` in the happy path +- [x] `data-raw/study_area_run.sh` interface unchanged +- [x] `/code-check` Round 1 Clean → atomic commit + +## Phase 5 — Release + +- [x] `NEWS.md`: new `# link 0.41.0` section — two paragraphs matching v0.40.x style (lnk_wsg_resolve framing + fresh#211 composition; study_area_wsgs.R shim numbers + test count) +- [x] `DESCRIPTION`: `Version: 0.40.5 → 0.41.0`, `Date: 2026-05-27` +- [x] `lintr::lint(...)` clean on all 3 changed files (R/lnk_wsg_resolve.R, tests/testthat/test-lnk_wsg_resolve.R, data-raw/study_area_wsgs.R) — fixed 2 indent lints (extracted `bad <-` predicate in the function; lifted `empty_wp` out of nested `expect_error` in tests). Tests still pass. +- [x] `/code-check` skipped on the Release commit — substantive R code already cleared 2 rounds in Phases 2 + 3; this commit is NEWS + version bump + lint-fix refactors (equivalent transformations) +- [ ] Atomic commit `"Release v0.41.0"` +- [ ] `/planning-archive` → `/gh-pr-push` (PR body: `Closes #207` + `Relates to NewGraphEnvironment/sred#24`) + +## Validation + +- [ ] `devtools::test()` green +- [ ] `lintr::lint_package()` clean +- [ ] Live `lnk_wsg_resolve(cfg, loaded, c("PARS","BULK"))` returns: `KISP, KLUM, LKEL, LSKE, MSKE, USKE, BULK, FINA, LBTN, LPCE, MORR, PARA, PCEA, UPCE, PARS` (15 WSGs) +- [ ] `Rscript data-raw/study_area_wsgs.R "PARS,BULK"` stdout byte-identical to pre-#207 output +- [ ] pkgdown reference page renders new function +- [ ] `/code-check` clean on each commit +- [ ] PWF checkboxes match landed work +- [ ] `/planning-archive` on completion + +## Out of scope + +- Refactoring species-filter idiom into shared `.lnk_wsg_species_filter` helper that also services `wsgs_run_host.R:91-96` (defer until that script is touched) +- Retagging `lnk_compare_wsg` to `@family wsg` (it's a compare lens, not a topology resolver) +- Adding `lnk_wsg_buckets` / `lnk_wsg_list` (separate issues) diff --git a/tests/testthat/test-lnk_wsg_resolve.R b/tests/testthat/test-lnk_wsg_resolve.R new file mode 100644 index 0000000..cf73d25 --- /dev/null +++ b/tests/testthat/test-lnk_wsg_resolve.R @@ -0,0 +1,153 @@ +# -- arg validation (no DB needed) ------------------------------------------- + +test_that("lnk_wsg_resolve rejects non-lnk_config cfg", { + loaded_stub <- list(wsg_species_presence = data.frame( + watershed_group_code = "ELKR", + bt = "t", ch = "f", cm = "f", co = "f", ct = "f", dv = "f", + pk = "f", rb = "f", sk = "f", st = "f", wct = "f", + stringsAsFactors = FALSE + )) + expect_error(lnk_wsg_resolve(list(species = "BT"), loaded_stub), + "cfg must be an lnk_config object") +}) + +test_that("lnk_wsg_resolve rejects non-list loaded", { + cfg_stub <- structure(list(species = "BT"), + class = c("lnk_config", "list")) + expect_error(lnk_wsg_resolve(cfg_stub, "not-a-list"), + "loaded must be a named list") +}) + +test_that("lnk_wsg_resolve rejects malformed wsgs", { + cfg_stub <- structure(list(species = "BT"), + class = c("lnk_config", "list")) + loaded_stub <- list(wsg_species_presence = data.frame( + watershed_group_code = "ELKR", bt = "t", stringsAsFactors = FALSE + )) + expect_error(lnk_wsg_resolve(cfg_stub, loaded_stub, wsgs = 1:3), + "wsgs must be NULL or a non-empty character vector") + expect_error(lnk_wsg_resolve(cfg_stub, loaded_stub, wsgs = character(0)), + "wsgs must be NULL or a non-empty character vector") + expect_error(lnk_wsg_resolve(cfg_stub, loaded_stub, wsgs = c("BULK", NA)), + "wsgs must be NULL or a non-empty character vector") + expect_error(lnk_wsg_resolve(cfg_stub, loaded_stub, wsgs = c("BULK", "")), + "wsgs must be NULL or a non-empty character vector") +}) + +test_that("lnk_wsg_resolve rejects malformed expand", { + cfg_stub <- structure(list(species = "BT"), + class = c("lnk_config", "list")) + loaded_stub <- list(wsg_species_presence = data.frame( + watershed_group_code = "ELKR", bt = "t", stringsAsFactors = FALSE + )) + expect_error(lnk_wsg_resolve(cfg_stub, loaded_stub, "BULK", expand = "yes"), + "expand must be a single logical") + expect_error(lnk_wsg_resolve(cfg_stub, loaded_stub, "BULK", + expand = c(TRUE, FALSE)), + "expand must be a single logical") + expect_error(lnk_wsg_resolve(cfg_stub, loaded_stub, "BULK", expand = NA), + "expand must be a single logical") +}) + +test_that("lnk_wsg_resolve rejects missing/empty wsg_species_presence", { + cfg_stub <- structure(list(species = "BT"), + class = c("lnk_config", "list")) + expect_error(lnk_wsg_resolve(cfg_stub, list()), + "wsg_species_presence is missing or empty") + empty_wp <- list(wsg_species_presence = data.frame( + watershed_group_code = character(0), + bt = character(0), stringsAsFactors = FALSE + )) + expect_error(lnk_wsg_resolve(cfg_stub, empty_wp), + "wsg_species_presence is missing or empty") +}) + +test_that("lnk_wsg_resolve rejects missing species columns", { + cfg_stub <- structure(list(species = c("BT", "GR")), + class = c("lnk_config", "list")) + loaded_stub <- list(wsg_species_presence = data.frame( + watershed_group_code = "ELKR", bt = "t", stringsAsFactors = FALSE + # no `gr` column + )) + expect_error(lnk_wsg_resolve(cfg_stub, loaded_stub), + "missing species columns: gr") +}) + +# -- stub-based province + strict modes (no DB) ------------------------------ + +# Stub row order DELIBERATELY NOT alphabetical so the province-mode sort +# is actually exercised (otherwise removing `sort()` from the function +# wouldn't break the test). +.wsg_stub <- function() { + cfg <- structure(list(species = c("BT", "WCT")), + class = c("lnk_config", "list")) + loaded <- list(wsg_species_presence = data.frame( + watershed_group_code = c("CCCC", "AAAA", "BBBB"), + bt = c("t", "f", "f"), + wct = c("f", "t", "f"), + stringsAsFactors = FALSE + )) + list(cfg = cfg, loaded = loaded) +} + +test_that("lnk_wsg_resolve province mode returns species-positive WSGs sorted", { + s <- .wsg_stub() + # Unsorted-result without sort() would be c("CCCC", "AAAA") (row order) + expect_identical(lnk_wsg_resolve(s$cfg, s$loaded), c("AAAA", "CCCC")) +}) + +test_that("lnk_wsg_resolve strict mode preserves caller order when all WSGs are species-positive", { + s <- .wsg_stub() + # Caller-supplied order preserved (no sort in strict mode); no drops. + expect_identical( + lnk_wsg_resolve(s$cfg, s$loaded, wsgs = c("CCCC", "AAAA"), expand = FALSE), + c("CCCC", "AAAA") + ) +}) + +test_that("lnk_wsg_resolve strict mode messages on dropped species-less WSGs", { + s <- .wsg_stub() + expect_message( + res <- lnk_wsg_resolve(s$cfg, s$loaded, + wsgs = c("CCCC", "BBBB"), expand = FALSE), + "dropped 1 species-less WSG" + ) + expect_identical(res, "CCCC") +}) + +test_that("lnk_wsg_resolve strict mode upper-cases focal codes", { + s <- .wsg_stub() + expect_identical( + lnk_wsg_resolve(s$cfg, s$loaded, + wsgs = c("cccc", "aaaa"), expand = FALSE), + c("CCCC", "AAAA") + ) +}) + +# -- live DB (closure mode) -------------------------------------------------- + +test_that("lnk_wsg_resolve closure mode returns PARS+BULK 15-WSG closure DS-first", { + skip_if_no_db() + cfg <- lnk_config("bcfishpass") + loaded <- lnk_load_overrides(cfg) + expected <- c( + "KISP", "KLUM", "LKEL", "LSKE", "MSKE", "USKE", + "BULK", "FINA", "LBTN", "LPCE", "MORR", "PARA", "PCEA", "UPCE", + "PARS" + ) + expect_identical( + lnk_wsg_resolve(cfg, loaded, wsgs = c("PARS", "BULK")), + expected + ) +}) + +test_that("lnk_wsg_resolve province mode returns the full bundle-species list", { + skip_if_no_db() + cfg <- lnk_config("bcfishpass") + loaded <- lnk_load_overrides(cfg) + res <- lnk_wsg_resolve(cfg, loaded) + expect_true(length(res) >= 200L) + expect_identical(res, sort(res)) + # Spot-check a few known bundle-species WSGs are present + expect_true(all(c("BULK", "PARS", "ADMS") %in% res)) +})