From 40ca95d56fbb5e5b4a3d1563be2f257aabbaa2cf Mon Sep 17 00:00:00 2001 From: Connorrmcd6 Date: Sat, 20 Jun 2026 09:22:35 +0200 Subject: [PATCH] feat(hash): member-access names verbatim + versioned stamps (#140) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The canonical hash alpha-renamed every identifier, so re-pointing an anchored span at a different single-occurrence external symbol (PointsTier.TIER_1 -> TIER_2, b.Del -> b.Keep, ProbeColor.RED -> GREEN) produced a byte-identical hash: the gate stayed green while the claim's prose became false. This is the member-access slice of #77 — the shape that accounts for every reproduced miss. The v2 recipe keeps the property/field component of a member-access expression verbatim instead of alpha-renaming it, in TS/Go/Rust/Python. These positions name an external member, never a local binding, so emitting them verbatim distinguishes "re-pointed at a different symbol" (loud) from "renamed my own local" (still quiet) — rename tolerance is preserved. v1 == v2 minus that single rule, one mode flag, no frozen copy of the old algorithm. Stamps now carry their recipe (`2:` prefix; bare hex = implicit v1). check verifies each stamp under its own recipe, so existing v1 stamps keep passing (with a one-line nudge) until verify re-stamps them as v2; an unrecognized prefix fails closed. Forced re-verify is avoided deliberately — it would launder real v1-missed drift green. One `surf verify` per repo ends the transition. Golden fixtures pin both recipes per language; docs/reference/hash-recipes.md documents the rules and migration. The repo's own hubs are migrated to v2 and affected claim prose updated. Out of scope (stays in #77): the full bound/free split for bare free identifiers, the external-corpus validation harness, dogfood claims in hubs/hash.md, and the full version-table governance. Co-Authored-By: Claude Opus 4.8 --- AGENTS.md | 2 +- CHANGELOG.md | 14 ++ docs/index.md | 2 +- docs/reference/hash-recipes.md | 94 ++++++++++ docs/reference/how-it-works.md | 11 +- hubs/anchor.md | 2 +- hubs/cli-check.md | 19 +- hubs/cli-for.md | 4 +- hubs/cli-git.md | 10 +- hubs/cli-lint.md | 6 +- hubs/cli-reference.md | 2 +- hubs/cli-scaffold.md | 4 +- hubs/cli-stats.md | 4 +- hubs/cli-suggest.md | 2 +- hubs/cli-verify.md | 11 +- hubs/cli-workspace.md | 4 +- hubs/config.md | 2 +- hubs/hash.md | 16 +- hubs/hub-format.md | 4 +- hubs/lang.md | 2 +- hubs/rename.md | 2 +- hubs/resolve.md | 6 +- surf-cli/src/check.rs | 235 +++++++++++++++++++---- surf-cli/src/verify.rs | 75 ++++++-- surf-core/src/hash.rs | 331 ++++++++++++++++++++++++++++++++- surf-core/src/hub.rs | 15 ++ surf-core/src/lib.rs | 3 +- surf-core/src/rename.rs | 11 +- surf-core/tests/golden_hash.rs | 104 ++++++++++- 29 files changed, 873 insertions(+), 124 deletions(-) create mode 100644 docs/reference/hash-recipes.md diff --git a/AGENTS.md b/AGENTS.md index 8875a66..dac2d40 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -5,7 +5,7 @@ anchors: surf lint blocks when AGENTS.md carries a surf:hubs block that does not link the configured hubs directory, or when that directory does not exist; without the block it stays silent. at: surf-cli/src/lint.rs > lint_agents_pointer - hash: 938380798f7a + hash: 2:9a5f7d9fd0db refs: [] --- diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f62e33..5f70f4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,20 @@ project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Changed +- **Hash recipe v2 (member-access names verbatim).** The canonical hash now keeps the + property/field component of a member-access expression verbatim instead of alpha-renaming it, + so re-pointing an anchored span at a *different* external symbol — `PointsTier.TIER_1` → + `TIER_2`, `b.Del` → `b.Keep`, `ProbeColor.RED` → `GREEN` — changes the hash even when the name + occurs once. Previously these passed the gate silently while the claim's prose became false + (#140, the member-access slice of #77). Consistent local/parameter renames stay quiet, as + before. Covers TypeScript, Go, Rust, and Python. +- **Versioned stamps.** Stored hashes now carry their recipe: a v2 stamp is prefixed `2:`, a bare + 12-hex stamp is an implicit v1. `surf check` verifies each stamp under its own recipe, so + existing v1 stamps keep passing (with a one-line nudge) until `surf verify` re-stamps them as + v2 — one pass per repo. An unrecognized stamp prefix fails closed. See + [Hash recipes](docs/reference/hash-recipes.md). **Action on upgrade: run `surf verify` once.** + ## [0.6.3] - 2026-06-18 ### Added diff --git a/docs/index.md b/docs/index.md index 3d8b80f..4fe4b10 100644 --- a/docs/index.md +++ b/docs/index.md @@ -117,4 +117,4 @@ Agents are a multiplier, not the foundation. - [Install](./getting-started/install.md) · [Quickstart](./getting-started/quickstart.md) - [Authoring hubs](./guides/authoring-hubs.md) · [CI integration](./guides/ci-integration.md) · [Examples](./examples.md) -- Reference: [Commands](./reference/commands.md) · [Configuration](./reference/configuration.md) · [How the gate works](./reference/how-it-works.md) · [FAQ](./reference/faq.md) +- Reference: [Commands](./reference/commands.md) · [Configuration](./reference/configuration.md) · [How the gate works](./reference/how-it-works.md) · [Hash recipes](./reference/hash-recipes.md) · [FAQ](./reference/faq.md) diff --git a/docs/reference/hash-recipes.md b/docs/reference/hash-recipes.md new file mode 100644 index 0000000..5eaafed --- /dev/null +++ b/docs/reference/hash-recipes.md @@ -0,0 +1,94 @@ +--- +title: Hash recipes +description: The versioned canonicalization recipes behind every stored stamp — what each one does, how stamps are labelled, and how to migrate. +--- + +A **stamp** is the value `surf verify` writes into a claim's `hash:` field and `surf check` +compares against. It is produced by a **recipe**: the exact rules for turning a resolved span into +a canonical token stream (see [How the gate works](./how-it-works.md), step 2). Changing those +rules changes the output for unchanged code, which would silently invalidate every stamp in the +wild — so each recipe has a number, and every stamp records the recipe that produced it. + +## Stamp format + +``` +hash: 2:f1075e760a17 # v2 stamp — explicit prefix +hash: f1075e760a17 # bare 12-hex — implicitly v1 (written before recipes were numbered) +``` + +`surf check` reads the prefix, verifies the span under that recipe, and: + +- **matches** → passes. If the stamp is v1, it adds a one-line nudge inviting `surf verify` to + upgrade (so the span gains newer protections). +- **differs** → blocks, exactly as before. +- **unrecognized prefix** (e.g. a `3:` stamp written by a newer surf) → fails closed: an + unverifiable stamp is never treated as clean. + +New stamps are always written under the current recipe (**v2**). + +## Migration + +Upgrading surf does **not** mass-flag your repo. v1 stamps keep verifying in v1 mode until you run: + +``` +surf verify +``` + +once, which re-stamps every anchor under the current recipe — including v1 anchors whose hash +still matches (the one narrow case where `verify` rewrites an otherwise-unchanged stamp). After +that single pass the whole repo is on v2. + +> Forced re-verify is deliberately *not* automatic on upgrade. `verify` stamps whatever the code +> is *now*; if a repo already contains drift that v1 missed, a blind re-stamp would launder it +> green. v1-compat keeps the gate honest *through* the migration — `check` can still tell +> "unchanged under the old recipe" (pass) from "actually changed" (block). + +## Recipes + +### v1 — original (surf ≤ 0.6.x; bare-hex stamps) + +Walk the resolved span's syntax tree into tokens: + +- whitespace and comments are absent from the tree → ignored; +- every **identifier** is alpha-renamed to a positional placeholder (`#0`, `#1`, …) in order of + first occurrence — a *consistent* rename hashes identically, swapping two names does not; +- operators, keywords, punctuation, and literal **values** are kept verbatim; +- a Python **decorator name** is kept verbatim (`@cache` → `@lru_cache` is loud). + +SHA-256 of the token stream, truncated to 12 hex. + +**Known blind spot (#77):** because *every* identifier is alpha-renamed, re-pointing a span at a +different single-occurrence external symbol (`PointsTier.TIER_1` → `TIER_2`, `b.Del` → `b.Keep`) +yields a byte-identical stream — the claim's prose silently becomes false while the gate stays +green. + +### v2 — member-access names verbatim (surf ≥ 0.7.0; `2:` prefix) + +v1, plus one rule: the **property/field component of a member-access expression** is kept verbatim +(`kind:text`) instead of alpha-renamed. These positions name an *external* member, never a local +binding, so emitting them verbatim distinguishes "re-pointed at a different symbol" (loud) from +"renamed my own local" (still quiet — rename tolerance is preserved). Per family: + +| Family | Member-access position | +|---|---| +| TypeScript | `property_identifier` / `private_property_identifier` as the `property` of a `member_expression` | +| Go | `field_identifier` as the `field` of a `selector_expression` | +| Rust | `field_identifier` as the `field` of a `field_expression` | +| Python | the `attribute` identifier of an `attribute` node | + +Everything else is identical to v1, so v1 ≡ v2 minus this single rule — a member-access-free span +hashes the same under both. This closes the #77 blind spot for member accesses (every reported +reproduction). Re-pointing at a non-member free identifier — a bare `Enum::VARIANT` path, a renamed +imported function called by bare name — is **not** yet covered; that is the full bound/free split +tracked in [#77](https://github.com/Connorrmcd6/surface/issues/77). + +## Policy (for maintainers) + +- **Any** change to canonical output is a new recipe number — no exceptions. An innocent-looking + refactor of the tokenizer that changes one byte of output is silently a new recipe wearing an old + number, which corrupts every stamp in the wild. The golden fixtures in + `surf-core/tests/golden_hash.rs` pin each recipe's output (v1 and v2 digests for representative + symbols per language) precisely to make that break loud. +- A recipe is kept as a verification mode only while it is expressible as a flag over the current + code (v1 ≡ v2 with the member-access rule off — one branch, no frozen copy). The N-1 support + policy and the broader version-table governance are tracked in #77. diff --git a/docs/reference/how-it-works.md b/docs/reference/how-it-works.md index 6a05dab..a63070a 100644 --- a/docs/reference/how-it-works.md +++ b/docs/reference/how-it-works.md @@ -15,11 +15,16 @@ The gate runs in four steps. placeholders (a *consistent* rename yields the same tokens, swapping two names does not); operators, keywords, and literal *values* are kept verbatim. Python decorators are part of the span, and a decorator's *name* is kept verbatim — so swapping `@cache` for `@lru_cache`, or - `@staticmethod` for `@classmethod`, changes the hash. + `@staticmethod` for `@classmethod`, changes the hash. **Member-access names are kept verbatim + too** (`obj.foo`, `pkg.Bar`, `Enum.VARIANT`), so re-pointing a span at a *different* external + symbol — `PointsTier.TIER_1` → `TIER_2`, `b.Del` → `b.Keep` — changes the hash even when the + name occurs once. (This last rule is the **v2** recipe; see [Hash recipes](./hash-recipes.md).) 3. **Hash.** SHA-256 of that stream, truncated to 12 hex. A list `at:` combines its sites into one hash, so the claim is stale if *any* listed span changes. -4. **Compare** against the hash stored in the frontmatter (written by `surf verify`). Equal → pass; - different → block. +4. **Compare** against the stamp stored in the frontmatter (written by `surf verify`). The stamp + carries its recipe — a v2 stamp is prefixed `2:`, a bare hex stamp is an older v1 — and is + verified under *its own* recipe, so existing v1 stamps keep passing until `surf verify` upgrades + them. Equal → pass; different → block. Quiet on cosmetics, loud on logic — and **reproducible**, because the parser ships *inside* the binary and is version-pinned. There is no separate formatter or language server in CI to skew the diff --git a/hubs/anchor.md b/hubs/anchor.md index d6920cb..6255817 100644 --- a/hubs/anchor.md +++ b/hubs/anchor.md @@ -6,7 +6,7 @@ anchors: a 1-based `@N` positional suffix for genuine name collisions. Empty/zero/missing parts are typed parse errors. at: surf-core/src/anchor.rs > parse_anchor - hash: 8818a44052c1 + hash: 2:0f9a4f9d406d refs: [] --- diff --git a/hubs/cli-check.md b/hubs/cli-check.md index 183a610..66370cc 100644 --- a/hubs/cli-check.md +++ b/hubs/cli-check.md @@ -2,11 +2,13 @@ summary: surf check — the gate. Hash each anchored span, compare to the stored hash, block on divergence. Optionally scope to changed files. anchors: - claim: > - Per claim: resolve and hash every site, combine into one hash, compare to the stored - hash. No stored hash → Unverified; an anchor that no longer resolves → Unresolvable; - a mismatch → Changed. The verdict is deterministic and needs no git. + Per claim: resolve and hash every site under the stored stamp's own recipe (v1/v2), + combine into one hash, compare to the stored hash. No stored hash → Unverified; an anchor + that no longer resolves, or a stamp with an unrecognized version prefix → Unresolvable; + a mismatch → Changed; a clean match is tagged with whether the stamp was still v1. The + verdict is deterministic and needs no git. at: surf-cli/src/check.rs > check_claim - hash: e04e680e6d8b + hash: 2:36cbbc039ab1 - claim: > Scoping is opt-in and intersective: with neither --base nor --files every claim is checked. A claim is in scope when any of its anchored files matches each active filter — the --base @@ -15,15 +17,16 @@ anchors: records whether it ever matched an anchored file (tallied before the --base filter), so a pattern that scopes the gate to nothing is detectable after the walk. at: surf-cli/src/check.rs > Scope > includes - hash: f18aefc5097e + hash: 2:d459cc00d69b - claim: > The gate fails closed: a hub whose frontmatter won't parse yields an Unresolvable divergence (blocking the run) rather than being silently skipped, so a frontmatter typo can't pass as clean. Alongside the divergences it returns the --files patterns that - matched no anchored file; run warns on stderr for each and exits non-zero when every - pattern matched nothing, so a typo'd --files can't read as a clean run. + matched no anchored file (run warns on stderr for each and exits non-zero when every + pattern matched nothing, so a typo'd --files can't read as a clean run) and a count of + clean anchors still stamped under v1, so run can nudge the one-time `surf verify` upgrade. at: surf-cli/src/check.rs > check_workspace - hash: 567ba4ebe18e + hash: 2:d8957ecb971d refs: [] --- diff --git a/hubs/cli-for.md b/hubs/cli-for.md index 029eded..dc92cf2 100644 --- a/hubs/cli-for.md +++ b/hubs/cli-for.md @@ -9,13 +9,13 @@ anchors: versioned {version, path, matches} envelope (JSON), always exiting 0 whether or not anything matched. at: surf-cli/src/for_path.rs > run - hash: 3143f824dcfb + hash: 2:4ef15aadc147 - claim: > find collects every claim whose anchored file equals the queried path (matched on path only — no source parse), optionally narrowed to anchors whose first segment is the given symbol. Malformed hubs are skipped rather than erroring, and results are sorted by hub then anchor. at: surf-cli/src/for_path.rs > find - hash: 047c1480c650 + hash: 2:6eb52572ab68 refs: [] --- diff --git a/hubs/cli-git.md b/hubs/cli-git.md index 86839f0..43c4fb3 100644 --- a/hubs/cli-git.md +++ b/hubs/cli-git.md @@ -8,20 +8,20 @@ anchors: subdirectory. A missing merge base (shallow clone) falls back to diffing the ref directly; if git can't answer at all it returns None. at: surf-cli/src/git.rs > changed_files - hash: 454e65cc8aa3 + hash: 2:e395bff5410d - claim: > show returns the contents of a file at a git ref (git show :), used to recover the previous source for advisory old_code/magnitude. None when unavailable — the verdict is unchanged either way. at: surf-cli/src/git.rs > show - hash: 6398bf958ad1 + hash: 2:ea9143b47615 - claim: > renamed_to asks git's rename detection (diff --name-status --find-renames HEAD) for the new path a file moved to, letting lint warn and verify --follow re-point instead of hard-blocking. Best-effort: a pure mv with no content match may show as delete+add and not be detected, and None means git couldn't pair the rename — the deterministic verdict never depends on it. at: surf-cli/src/git.rs > renamed_to - hash: 9622170a3b9a + hash: 2:a51ff4adba72 - claim: > log_stream returns the whole history window in one git spawn: every reachable commit (newest first, children before parents) with its parents and its first-parent name-status diff. @@ -29,12 +29,12 @@ anchors: through them, and --no-renames keeps a rename reading as delete+add. None when git can't answer. at: surf-cli/src/git.rs > log_stream - hash: 8827a8266fc9 + hash: 2:c5d2fccc872e - claim: > list_files_at lists every tracked file at a commit (ls-tree -r --name-only), used to find the hub set as it existed at a past commit. None when git can't answer. at: surf-cli/src/git.rs > list_files_at - hash: cbe066de9432 + hash: 2:23c36e64fc4d refs: [] --- diff --git a/hubs/cli-lint.md b/hubs/cli-lint.md index 2d9536d..bddc8c8 100644 --- a/hubs/cli-lint.md +++ b/hubs/cli-lint.md @@ -7,7 +7,7 @@ anchors: as does a file that git reports has moved. Block-level findings set a non-zero exit; warnings alone keep exit 0. at: surf-cli/src/lint.rs > lint_site - hash: 1ec63fccf77f + hash: 2:69018813a373 - claim: > Advisory granularity guidance (§8), never blocking: lint_under_coverage flags public symbols — top-level functions and methods — in an already-anchored file that no claim @@ -16,14 +16,14 @@ anchors: uncovered symbol is reported once against the file's first anchoring hub. It runs only on files whose anchors all resolved cleanly, so coverage nags never pile onto broken anchors. at: surf-cli/src/lint.rs > lint_under_coverage - hash: 569a7e6fe417 + hash: 2:3ca608c27462 - claim: > AGENTS.md enforcement is opt-in (§11.6): only when the file carries a surf:hubs marker block does lint require it to link the configured hubs directory (which must exist), blocking otherwise. It points agents at the directory to search — never enumerating individual hubs, which would push an agent to read everything. at: surf-cli/src/lint.rs > lint_agents_pointer - hash: 938380798f7a + hash: 2:9a5f7d9fd0db refs: [] --- diff --git a/hubs/cli-reference.md b/hubs/cli-reference.md index e4d416c..1dc9483 100644 --- a/hubs/cli-reference.md +++ b/hubs/cli-reference.md @@ -9,7 +9,7 @@ anchors: flag, or changing a default, diverges this anchor — re-read docs/reference/commands.md before sealing. at: surf-cli/src/main.rs > Command - hash: 0d910ff4886d + hash: 2:0d910ff4886d refs: ["../docs/reference/commands.md"] --- diff --git a/hubs/cli-scaffold.md b/hubs/cli-scaffold.md index a773178..9db6100 100644 --- a/hubs/cli-scaffold.md +++ b/hubs/cli-scaffold.md @@ -5,12 +5,12 @@ anchors: init writes surf.toml + creates hubs/ in the cwd, and is idempotent — an existing surf.toml is left untouched. at: surf-cli/src/init.rs > run - hash: cfd3bdbdd15d + hash: 2:dd57e4e7c5d9 - claim: > new derives the target directory from the literal prefix of the first hub glob, then writes a hub with no anchors so it is lint-clean immediately; it refuses to overwrite. at: surf-cli/src/new.rs > hub_dir - hash: 598296b19fb6 + hash: 2:d921913bf7bf refs: [] --- diff --git a/hubs/cli-stats.md b/hubs/cli-stats.md index 3bc8704..79dd1e5 100644 --- a/hubs/cli-stats.md +++ b/hubs/cli-stats.md @@ -6,7 +6,7 @@ anchors: always exits 0 on success and surfaces an error (non-zero) only when git history is unavailable. The metrics are advisory and never gate. at: surf-cli/src/stats.rs > run - hash: 7f4ab96fac92 + hash: 2:7f4ab96fac92 - claim: > compute reads the whole since/until window from one streamed git log and scores each non-merge commit, propagating hub claim state incrementally — a commit inherits its first @@ -19,7 +19,7 @@ anchors: and missing git history or an invalid hub glob in surf.toml is a hard error rather than a silent zero or a quietly-narrowed hub set. at: surf-cli/src/stats.rs > compute - hash: c4d39cabab48 + hash: 2:73bc9fa9daac refs: ["../docs/guides/stats.md"] --- diff --git a/hubs/cli-suggest.md b/hubs/cli-suggest.md index 1372160..d5ea8b3 100644 --- a/hubs/cli-suggest.md +++ b/hubs/cli-suggest.md @@ -13,7 +13,7 @@ anchors: never writes a file and never computes or stamps a hash — the author edits the claims and verifies. at: surf-cli/src/suggest.rs > run - hash: 5b5ebe5de616 + hash: 2:6d5ea2dc7760 refs: [] --- diff --git a/hubs/cli-verify.md b/hubs/cli-verify.md index 2b0a216..4b4f87e 100644 --- a/hubs/cli-verify.md +++ b/hubs/cli-verify.md @@ -2,14 +2,15 @@ summary: surf verify — re-seal a claim after a human confirms the prose, with optional --follow. anchors: - claim: > - For each claim, plan_claim re-hashes every site (combined) when all resolve, returning - Unchanged when that hash already matches the stored one or Hash to re-stamp otherwise. + For each claim, plan_claim re-hashes every site (combined) under the current recipe when + all resolve, returning Unchanged only when the stored stamp already matches that recipe's + stamp, else Hash to re-stamp — so one pass also upgrades a still-matching v1 stamp to v2. Under --follow, a site that no longer resolves re-points a renamed single-segment anchor via find_renamed; a site whose file is unreadable asks git where it moved and re-points the - path (only when the code is otherwise unchanged). Otherwise it skips with a reason. It - never edits prose, only the hash/at line. + path (only when the code is otherwise unchanged under the stored recipe). Otherwise it skips + with a reason. It never edits prose, only the hash/at line. at: surf-cli/src/verify.rs > plan_claim - hash: 6de72f5412b9 + hash: 2:cc47fe88418b refs: [] --- diff --git a/hubs/cli-workspace.md b/hubs/cli-workspace.md index 0025cef..bf8f2ed 100644 --- a/hubs/cli-workspace.md +++ b/hubs/cli-workspace.md @@ -5,12 +5,12 @@ anchors: discover walks up from a starting directory to the nearest surf.toml (like git/ruff), parses it, and returns the root + config; it errors if no marker is found in any parent. at: surf-cli/src/workspace.rs > Workspace > discover - hash: 3ab1ddc44a2e + hash: 2:f9a5e81dc046 - claim: > hub_paths globs the config's hub patterns relative to the discovered root, sorted and deduped. at: surf-cli/src/workspace.rs > Workspace > hub_paths - hash: d51a6b74add6 + hash: 2:275e1726b702 refs: [] --- diff --git a/hubs/config.md b/hubs/config.md index 8a7cb4d..c4f2f90 100644 --- a/hubs/config.md +++ b/hubs/config.md @@ -5,7 +5,7 @@ anchors: surf.toml parses into a Config whose hubs default to ["hubs/*.md"]; unknown keys are rejected. Filesystem discovery (walking up for the marker) lives in the CLI, not here. at: surf-core/src/config.rs > parse_config - hash: 57cd4f316e4a + hash: 2:7b98f22a91b6 refs: [] --- diff --git a/hubs/hash.md b/hubs/hash.md index 635b3fc..8466785 100644 --- a/hubs/hash.md +++ b/hubs/hash.md @@ -4,22 +4,24 @@ anchors: - claim: > The canonical token stream drops comments, alpha-renames identifiers to positional placeholders (consistent rename → same tokens; swapping two names → different), and - keeps operators, keywords, and literal values verbatim. Two deliberate exceptions: a - Python decorator's name is kept verbatim, not alpha-renamed (so `@cache` → `@lru_cache` - is caught), and the per-claim ignore_literals option drops string-literal content so a - copy edit doesn't re-open the gate. + keeps operators, keywords, and literal values verbatim. Exceptions kept verbatim: a + Python decorator's name (so `@cache` → `@lru_cache` is caught), and — under the v2 + recipe — a member-access name (the property/field of `obj.foo`/`pkg.Bar`), so + re-pointing at a different external symbol is caught even when it occurs once. The + per-claim ignore_literals option drops string-literal content so a copy edit doesn't + re-open the gate. at: surf-core/src/hash.rs > emit - hash: 1bdb8c599f6d + hash: 2:1a93c8f4b8d9 - claim: > Identifier node kinds are enumerated per language family; only these are alpha-renamed, everything else (operators, keywords, literals) is kept. at: surf-core/src/hash.rs > is_identifier - hash: ac8c69676a07 + hash: 2:ac8c69676a07 - claim: > A claim's hash is the combination of its per-site hashes — a single site is the identity, multiple sites combine order-sensitively, so the claim is stale if any listed span changes. at: surf-core/src/hash.rs > combine_site_hashes - hash: 83a72772c92d + hash: 2:a81ab78387c2 refs: [] --- diff --git a/hubs/hub-format.md b/hubs/hub-format.md index 1e4307d..53a2f84 100644 --- a/hubs/hub-format.md +++ b/hubs/hub-format.md @@ -6,12 +6,12 @@ anchors: scalar or a list, hash is optional until verified, and unknown fields are rejected — though forward-declared fields (`refs`, `covers`) are accepted and stored but inert in the verdict. at: surf-core/src/hub.rs > parse_hub - hash: e97cc54f48d3 + hash: 2:55be573a0ca2 - claim: > verify writes hashes back surgically: set_anchor_hash locates the Nth anchor item and replaces/inserts only its hash line, so an unchanged hash is byte-identical. at: surf-core/src/hub.rs > set_anchor_hash - hash: a65d5c324dc5 + hash: 2:a65d5c324dc5 refs: [] covers: - surf-core/src/hub.rs diff --git a/hubs/lang.md b/hubs/lang.md index ddfa137..929308a 100644 --- a/hubs/lang.md +++ b/hubs/lang.md @@ -5,7 +5,7 @@ anchors: Language is detected purely by file extension (ts/tsx/mts/cts, js/jsx/mjs/cjs, rs, py/pyi, go); an unknown extension yields None and the anchor is treated as unsupported. at: surf-core/src/lang.rs > Lang > from_path - hash: c98dfc657543 + hash: 2:fabba17dc0f9 refs: [] --- diff --git a/hubs/rename.md b/hubs/rename.md index 94d5cde..8569be5 100644 --- a/hubs/rename.md +++ b/hubs/rename.md @@ -7,7 +7,7 @@ anchors: alpha-renames identifiers, a renamed-but-unchanged symbol still matches. No git, no similarity threshold. at: surf-core/src/rename.rs > find_renamed - hash: e64045b383fb + hash: 2:8d4b88480875 refs: [] --- diff --git a/hubs/resolve.md b/hubs/resolve.md index 84685bd..8cb448b 100644 --- a/hubs/resolve.md +++ b/hubs/resolve.md @@ -9,18 +9,18 @@ anchors: same-name stubs plus their implementation, in the same scope) counts as one match, so the bare name resolves without @N and the gated span covers every overload signature. at: surf-core/src/resolve.rs > resolve_nodes - hash: a704a3bbec34 + hash: 2:26a42e2bfa92 - claim: > Go is resolved by a dedicated path: its symbols are flat (no nested declarations) and methods attach to a type by receiver, so `Type > Method` matches a method_declaration whose receiver type equals the type. at: surf-core/src/resolve.rs > resolve_go - hash: 82b3f6e863cf + hash: 2:07d730bc2bf8 - claim: > Rename detection enumerates every definition at any depth so a renamed-but-unchanged symbol can be found by hash. at: surf-core/src/resolve.rs > collect_all_defs - hash: a75886451d68 + hash: 2:22d81a580041 refs: [] --- diff --git a/surf-cli/src/check.rs b/surf-cli/src/check.rs index 99cb46d..d6106eb 100644 --- a/surf-cli/src/check.rs +++ b/surf-cli/src/check.rs @@ -10,8 +10,8 @@ use crate::workspace::{read_site, Workspace}; use anyhow::Result; use std::process::ExitCode; use surf_core::{ - diff_magnitude, hash_anchor_with, parse_anchor, resolve, CheckReport, Divergence, - DivergenceKind, HashOpts, HubError, + combine_site_hashes, diff_magnitude, format_stamp, hash_anchor_raw, parse_anchor, parse_stamp, + resolve, CheckReport, Divergence, DivergenceKind, HashOpts, HubError, Recipe, }; pub fn run( @@ -20,7 +20,7 @@ pub fn run( base: Option<&str>, files: &[String], ) -> Result { - let (divergences, unmatched_globs) = check_workspace(ws, base, files)?; + let (divergences, unmatched_globs, v1_clean) = check_workspace(ws, base, files)?; match format { Format::Json => { @@ -34,6 +34,11 @@ pub fn run( for pattern in &unmatched_globs { eprintln!("surf check: --files glob \"{pattern}\" matched no anchored files."); } + // One-line migration nudge: a clean v1 stamp passes, but invites the one-time upgrade so + // the span gains the v2 member-access protection (#140). + if v1_clean > 0 { + eprintln!("surf check: {v1_clean} anchor(s) use v1 hashes; run `surf verify` to upgrade."); + } // A typo'd --files scopes the gate to nothing and must not go green (#78); but only // when *every* glob matched nothing, so a partially-correct invocation still succeeds. let all_empty = !files.is_empty() && unmatched_globs.len() == files.len(); @@ -45,18 +50,20 @@ pub fn run( }) } -/// Returns the divergences in scope plus the `--files` patterns that matched no anchored -/// file, so the caller can refuse to call a run that checked nothing "clean" (#78). +/// Returns the divergences in scope, the `--files` patterns that matched no anchored file (so +/// the caller can refuse to call a run that checked nothing "clean", #78), and the count of +/// clean anchors still stamped under v1 (so the caller can nudge the one-time upgrade, #140). fn check_workspace( ws: &Workspace, base: Option<&str>, files: &[String], -) -> Result<(Vec, Vec)> { +) -> Result<(Vec, Vec, usize)> { let mut scope = Scope::build(ws, base, files)?; // Enrichment always needs a ref; an explicit --base doubles as the diff base, else HEAD. let enrich_base = base.unwrap_or("HEAD"); let mut out = Vec::new(); + let mut v1_clean = 0usize; for loaded in ws.iter_hubs()? { let hub = match loaded.hub { Ok(hub) => hub, @@ -71,12 +78,14 @@ fn check_workspace( if !scope.includes(claim) { continue; } - if let Some(d) = check_claim(ws, &loaded.rel, claim, enrich_base) { - out.push(d); + match check_claim(ws, &loaded.rel, claim, enrich_base) { + ClaimCheck::Diverged(d) => out.push(*d), + ClaimCheck::Clean { v1: true } => v1_clean += 1, + ClaimCheck::Clean { v1: false } => {} } } } - Ok((out, scope.unmatched_globs())) + Ok((out, scope.unmatched_globs(), v1_clean)) } fn malformed_hub_divergence(hub: &str, err: &HubError) -> Divergence { @@ -169,12 +178,14 @@ impl Scope { } } -fn check_claim( - ws: &Workspace, - hub: &str, - claim: &surf_core::Claim, - base: &str, -) -> Option { +/// What checking one claim produced: a flag to report, or a clean pass tagged with whether the +/// matching stamp was still v1 (so the caller can nudge the one-time upgrade, #140). +enum ClaimCheck { + Diverged(Box), + Clean { v1: bool }, +} + +fn check_claim(ws: &Workspace, hub: &str, claim: &surf_core::Claim, base: &str) -> ClaimCheck { let prose = claim.claim.trim().to_string(); let opts = HashOpts { ignore_literals: claim.ignore_literals, @@ -184,7 +195,7 @@ fn check_claim( let single = sites.len() == 1; let mk = |kind, old_hash, new_hash, old_code, new_code, magnitude, detail| { - Some(Divergence { + ClaimCheck::Diverged(Box::new(Divergence { hub: hub.to_string(), claim: prose.clone(), at: at_display.clone(), @@ -196,7 +207,7 @@ fn check_claim( prose: prose.clone(), magnitude, detail, - }) + })) }; let unresolvable = |detail: String| { mk( @@ -210,7 +221,22 @@ fn check_claim( ) }; - // Resolve and hash every site; the claim's hash is the combination (§6.3). + // Hash under the stored stamp's own recipe, so an as-yet-unupgraded v1 stamp still verifies + // in v1 mode; the current recipe is only assumed for an unverified claim. An unrecognized + // stamp prefix (e.g. from a newer surf) is unverifiable — fail closed (#140). + let recipe = match &claim.hash { + None => Recipe::CURRENT, + Some(stored) => match parse_stamp(stored) { + Some((r, _)) => r, + None => { + return unresolvable(format!( + "unrecognized hash version in `{stored}`; re-stamp with `surf verify`" + )) + } + }, + }; + + // Resolve and hash every site under `recipe`; the claim's stamp is the combination (§6.3). let mut site_hashes = Vec::with_capacity(sites.len()); let mut first_new_code = None; for site in sites { @@ -227,41 +253,49 @@ fn check_claim( .get(span.start_byte..span.end_byte) .map(str::to_string); } - let hash = match hash_anchor_with(¤t, lang, &anchor, opts) { + let hash = match hash_anchor_raw(¤t, lang, &anchor, opts, recipe) { Ok(h) => h, Err(e) => return unresolvable(e.to_string()), }; site_hashes.push(hash); } - let new_hash = surf_core::combine_site_hashes(&site_hashes); + let combined = combine_site_hashes(&site_hashes); + let new_stamp = format_stamp(recipe, &combined); match &claim.hash { None => mk( DivergenceKind::Unverified, None, - Some(new_hash), + Some(new_stamp), None, first_new_code, None, None, ), - Some(stored) if *stored == new_hash => None, // clean Some(stored) => { - // Best-effort old_code + magnitude from git, for single-site anchors only. - let (old_code, magnitude) = if single { - enrich_from_git(ws, base, &sites[0]) + // `recipe` was parsed from `stored`, so the hex is present. + let stored_hex = parse_stamp(stored).map(|(_, hex)| hex).unwrap_or_default(); + if combined == stored_hex { + ClaimCheck::Clean { + v1: recipe == Recipe::V1, + } } else { - (None, None) - }; - mk( - DivergenceKind::Changed, - Some(stored.clone()), - Some(new_hash), - old_code, - first_new_code, - magnitude, - None, - ) + // Best-effort old_code + magnitude from git, for single-site anchors only. + let (old_code, magnitude) = if single { + enrich_from_git(ws, base, &sites[0]) + } else { + (None, None) + }; + mk( + DivergenceKind::Changed, + Some(stored.clone()), + Some(new_stamp), + old_code, + first_new_code, + magnitude, + None, + ) + } } } } @@ -326,7 +360,7 @@ mod tests { use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; - use surf_core::{hash_anchor, parse_anchor, Lang}; + use surf_core::{hash_anchor, hash_anchor_raw, parse_anchor, Lang, Recipe}; fn write(root: &Path, rel: &str, content: &str) { let p = root.join(rel); @@ -344,6 +378,18 @@ mod tests { hash_anchor(src, Lang::Rust, &parse_anchor(anchor).unwrap()).unwrap() } + /// A bare (v1) Rust stamp, simulating an anchor stamped before versioned recipes. + fn v1_stamp(src: &str, anchor: &str) -> String { + hash_anchor_raw( + src, + Lang::Rust, + &parse_anchor(anchor).unwrap(), + HashOpts::default(), + Recipe::V1, + ) + .unwrap() + } + fn git(root: &Path, args: &[&str]) { let status = Command::new("git") .current_dir(root) @@ -660,7 +706,7 @@ mod tests { let ws = ws_at(root.to_path_buf()); let typo = "src/lables/*.rs".to_string(); - let (d, unmatched) = check_workspace(&ws, None, std::slice::from_ref(&typo)).unwrap(); + let (d, unmatched, _) = check_workspace(&ws, None, std::slice::from_ref(&typo)).unwrap(); assert!(d.is_empty()); assert_eq!(unmatched, vec![typo.clone()]); @@ -686,7 +732,7 @@ mod tests { let ws = ws_at(root.to_path_buf()); let globs = vec!["src/*.rs".to_string(), "zzz/nope/*.go".to_string()]; - let (d, unmatched) = check_workspace(&ws, None, &globs).unwrap(); + let (d, unmatched, _) = check_workspace(&ws, None, &globs).unwrap(); assert!(d.is_empty()); assert_eq!(unmatched, vec!["zzz/nope/*.go".to_string()]); @@ -727,7 +773,7 @@ mod tests { ); let ws = ws_at(root.to_path_buf()); - let (d, unmatched) = + let (d, unmatched, _) = check_workspace(&ws, Some("HEAD"), &["src/b*.rs".to_string()]).unwrap(); assert!(d.is_empty()); // b is unchanged and a is excluded by the glob assert!(unmatched.is_empty(), "glob matched an anchored file"); @@ -795,4 +841,113 @@ mod tests { let ws = ws_at(root.to_path_buf()); assert_eq!(check_workspace(&ws, None, &[]).unwrap().0.len(), 2); } + + #[test] + fn clean_v1_stamp_passes_and_is_counted_for_nudge() { + // An anchor stamped under the old (bare-hex) recipe verifies in v1 mode: still clean, so + // it passes — but it's counted so the caller can nudge `surf verify` to upgrade (#140). + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let src = "pub fn add(a: i64, b: i64) -> i64 { a + b }\n"; + let h = v1_stamp(src, "src/m.rs > add"); + assert!(!h.contains(':'), "v1 stamp is bare hex"); + write(root, "surf.toml", ""); + write(root, "src/m.rs", src); + write( + root, + "hubs/a.md", + &format!("---\nsummary: x\nanchors:\n - claim: add sums\n at: src/m.rs > add\n hash: {h}\n---\n"), + ); + + let (d, _unmatched, v1_clean) = + check_workspace(&ws_at(root.to_path_buf()), None, &[]).unwrap(); + assert!(d.is_empty(), "clean v1 stamp must pass"); + assert_eq!( + v1_clean, 1, + "the clean v1 anchor is counted for the upgrade nudge" + ); + } + + #[test] + fn changed_v1_anchor_still_diverges() { + // v1-compat does not weaken the gate: a v1 stamp whose code actually changed fails. + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let v1 = "pub fn add(a: i64, b: i64) -> i64 { a + b }\n"; + let h = v1_stamp(v1, "src/m.rs > add"); + write(root, "surf.toml", ""); + write( + root, + "src/m.rs", + "pub fn add(a: i64, b: i64) -> i64 { a - b }\n", + ); + write( + root, + "hubs/a.md", + &format!("---\nsummary: x\nanchors:\n - claim: add sums\n at: src/m.rs > add\n hash: {h}\n---\n"), + ); + + let (d, _, v1_clean) = check_workspace(&ws_at(root.to_path_buf()), None, &[]).unwrap(); + assert_eq!(d.len(), 1); + assert_eq!(d[0].kind, DivergenceKind::Changed); + assert_eq!(v1_clean, 0, "a diverged anchor is not a clean-v1 nudge"); + } + + #[test] + fn member_reference_swap_diverges_under_v2() { + // The headline #140 fix, end to end: a v2-stamped TS anchor whose only change is the + // member name (`PointsTier.TIER_1` → `TIER_2`) now fails the gate. + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let before = "export class S {\n tier(): T {\n return PointsTier.TIER_1;\n }\n}\n"; + let stamp = hash_anchor( + before, + Lang::TypeScript, + &parse_anchor("src/s.ts > S > tier").unwrap(), + ) + .unwrap(); + assert!(stamp.starts_with("2:"), "new stamps are v2"); + write(root, "surf.toml", ""); + write( + root, + "src/s.ts", + "export class S {\n tier(): T {\n return PointsTier.TIER_2;\n }\n}\n", + ); + write( + root, + "hubs/a.md", + &format!("---\nsummary: x\nanchors:\n - claim: default tier is TIER_1\n at: src/s.ts > S > tier\n hash: {stamp}\n---\n"), + ); + + let d = check_workspace(&ws_at(root.to_path_buf()), None, &[]) + .unwrap() + .0; + assert_eq!(d.len(), 1, "member swap must diverge under v2"); + assert_eq!(d[0].kind, DivergenceKind::Changed); + } + + #[test] + fn unrecognized_hash_version_fails_closed() { + // A stamp from a newer surf (or a corrupted prefix) is unverifiable — it must not pass. + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + write(root, "surf.toml", ""); + write(root, "src/m.rs", "pub fn add() -> i64 { 1 }\n"); + write( + root, + "hubs/a.md", + "---\nsummary: x\nanchors:\n - claim: c\n at: src/m.rs > add\n hash: 9:abcdef123456\n---\n", + ); + + let d = check_workspace(&ws_at(root.to_path_buf()), None, &[]) + .unwrap() + .0; + assert_eq!(d.len(), 1); + assert_eq!(d[0].kind, DivergenceKind::Unresolvable); + assert!(d[0] + .detail + .as_deref() + .unwrap() + .contains("unrecognized hash version")); + } } diff --git a/surf-cli/src/verify.rs b/surf-cli/src/verify.rs index dc9b583..cf22dc0 100644 --- a/surf-cli/src/verify.rs +++ b/surf-cli/src/verify.rs @@ -11,8 +11,8 @@ use anyhow::{Context, Result}; use serde::Serialize; use std::process::ExitCode; use surf_core::{ - combine_site_hashes, find_renamed, hash_anchor_with, parse_anchor, parse_hub, set_anchor_at, - set_anchor_hash, HashOpts, + combine_site_hashes, find_renamed, format_stamp, hash_anchor_raw, hash_anchor_with, + parse_anchor, parse_hub, parse_stamp, set_anchor_at, set_anchor_hash, HashOpts, Recipe, }; enum Plan { @@ -196,11 +196,15 @@ fn plan_claim(ws: &Workspace, claim: &surf_core::Claim, follow: bool) -> Plan { match failure { None => { + // verify always (re-)stamps under the current recipe, so one pass also upgrades a + // still-matching v1 stamp to v2 — a narrow, intentional exception to #22's + // skip-unchanged (#140). let combined = combine_site_hashes(&site_hashes); - if claim.hash.as_deref() == Some(combined.as_str()) { + let new_stamp = format_stamp(Recipe::CURRENT, &combined); + if claim.hash.as_deref() == Some(new_stamp.as_str()) { Plan::Unchanged } else { - Plan::Hash(combined) + Plan::Hash(new_stamp) } } Some(reason) if !follow => Plan::Skip(reason), @@ -270,13 +274,13 @@ fn follow_file(ws: &Workspace, site: &str, stored: &str, opts: HashOpts) -> Plan Ok(parts) => parts, Err(e) => return Plan::Skip(e.to_string()), }; - // Same symbol path, code unchanged → re-point with the identical hash. - if let Ok(h) = hash_anchor_with(&source, lang, &new_anchor, opts) { - if h == stored { - return Plan::Follow { - new_at, - new_hash: h, - }; + // Same symbol path, code unchanged under the stored recipe → re-point, upgrading the stamp + // to the current recipe (the move is a chance to migrate v1 → v2). + if let Some((recipe, stored_hex)) = parse_stamp(stored) { + if hash_anchor_raw(&source, lang, &new_anchor, opts, recipe).as_deref() == Ok(stored_hex) { + if let Ok(new_hash) = hash_anchor_with(&source, lang, &new_anchor, opts) { + return Plan::Follow { new_at, new_hash }; + } } } // The symbol may also have been renamed in the move (single-segment only). @@ -286,9 +290,11 @@ fn follow_file(ws: &Workspace, site: &str, stored: &str, opts: HashOpts) -> Plan Plan::Skip("file moved but its anchored code changed; run `surf lint`".into()) } +/// One site's bare per-site digest under the current recipe — combined and prefixed into the +/// stored stamp by the caller. Bare (not a full stamp) so multi-site combination stays correct. fn site_hash(ws: &Workspace, site: &str, opts: HashOpts) -> std::result::Result { let (source, lang, anchor) = read_site(ws, site).map_err(|e| e.to_string())?; - hash_anchor_with(&source, lang, &anchor, opts).map_err(|e| e.to_string()) + hash_anchor_raw(&source, lang, &anchor, opts, Recipe::CURRENT).map_err(|e| e.to_string()) } #[cfg(test)] @@ -296,7 +302,7 @@ mod tests { use super::*; use std::fs; use std::path::Path; - use surf_core::{hash_anchor, Lang}; + use surf_core::{hash_anchor, hash_anchor_raw, HashOpts, Lang, Recipe}; fn write(root: &Path, rel: &str, content: &str) { let p = root.join(rel); @@ -348,6 +354,49 @@ mod tests { assert_eq!(fs::read_to_string(root.join("hubs/a.md")).unwrap(), after); } + #[test] + fn verify_upgrades_v1_stamp_to_v2() { + // A repo stamped under the old recipe: one `surf verify` re-stamps the still-matching v1 + // anchor as v2 (the narrow exception to skip-unchanged), and is then idempotent (#140). + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let src = "pub fn add(a: i64, b: i64) -> i64 { a + b }\n"; + let v1 = hash_anchor_raw( + src, + Lang::Rust, + &parse_anchor("src/m.rs > add").unwrap(), + HashOpts::default(), + Recipe::V1, + ) + .unwrap(); + assert!(!v1.contains(':'), "starts as a bare v1 stamp"); + write(root, "surf.toml", ""); + write(root, "src/m.rs", src); + write( + root, + "hubs/a.md", + &format!("---\nsummary: s\nanchors:\n - claim: add sums\n at: src/m.rs > add\n hash: {v1}\n---\n"), + ); + + let ws = Workspace::discover(root).unwrap(); + let report = verify_all(&ws, None, false).unwrap(); + assert_eq!(report.stamped, 1, "the v1 anchor is re-stamped to v2"); + + let expected = + hash_anchor(src, Lang::Rust, &parse_anchor("src/m.rs > add").unwrap()).unwrap(); + assert!(expected.starts_with("2:")); + let hub = parse_hub(&fs::read_to_string(root.join("hubs/a.md")).unwrap()).unwrap(); + assert_eq!( + hub.frontmatter.anchors[0].hash.as_deref(), + Some(expected.as_str()) + ); + + // Idempotent on the upgraded stamp. + let again = verify_all(&ws, None, false).unwrap(); + assert_eq!(again.stamped, 0); + assert_eq!(again.unchanged, 1); + } + #[test] fn follow_repoints_renamed_anchor() { let tmp = tempfile::tempdir().unwrap(); diff --git a/surf-core/src/hash.rs b/surf-core/src/hash.rs index 0a33c07..42ec97b 100644 --- a/surf-core/src/hash.rs +++ b/surf-core/src/hash.rs @@ -12,6 +12,21 @@ //! //! The result is quiet on the changes you want ignored and loud on the ones you must catch. //! +//! ## Recipes (versioned canonicalization) +//! +//! The canonicalization above is the **v1** recipe. **v2** (#140) adds one rule: the +//! property/field component of a member-access expression (`obj.foo`, `pkg.Bar`) is kept +//! *verbatim* rather than alpha-renamed, so re-pointing an anchored span at a different +//! external symbol (`PointsTier.TIER_1` → `TIER_2`, `b.Del` → `b.Keep`) changes the hash even +//! when the name occurs exactly once. These positions are never bindings, so emitting them +//! verbatim cannot resurface a benign local rename. v1 ≡ v2 minus that single rule — one mode +//! flag, no frozen copy of the old algorithm. +//! +//! Stored stamps carry their recipe: a v2 stamp is prefixed `2:`, a bare 12-hex stamp is +//! implicitly v1. New stamps are written under [`Recipe::CURRENT`]; `check` verifies a stamp +//! under *its own* recipe, so existing v1 stamps keep working until `surf verify` upgrades +//! them. See `docs/hash-recipes.md`. +//! //! `Magnitude` is advisory triage metadata only. It is never compared, thresholded, or used //! to decide pass/fail — that would defeat the whole point (§6.2). @@ -34,17 +49,79 @@ pub struct HashOpts { pub ignore_literals: bool, } +/// A canonicalization recipe. Bumped whenever a change to canonical output would otherwise +/// silently invalidate every stored stamp (see the module docs and `docs/hash-recipes.md`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Recipe { + /// Original recipe: every identifier alpha-renamed. Implicit for bare (unprefixed) stamps. + V1, + /// v1 plus the member-access-name verbatim rule (#140). Stamps are prefixed `2:`. + V2, +} + +impl Recipe { + /// The recipe new stamps are written under (`verify`, and `check`'s suggestion for an + /// unverified claim). + pub const CURRENT: Recipe = Recipe::V2; +} + +/// Split a stored stamp into its recipe and bare hex digest. A bare hex stamp is implicitly +/// v1 (every stamp written before versioned recipes). Returns `None` for an unrecognized +/// prefix — e.g. a `3:` stamp from a newer surf — so the caller fails closed rather than +/// guessing a recipe. +pub fn parse_stamp(stamp: &str) -> Option<(Recipe, &str)> { + match stamp.split_once(':') { + Some(("2", hex)) if is_hex(hex) => Some((Recipe::V2, hex)), + Some(_) => None, + None if is_hex(stamp) => Some((Recipe::V1, stamp)), + None => None, + } +} + +/// Format a bare hex digest as a stored stamp under `recipe`: v1 is bare (back-compat), later +/// recipes carry an `N:` prefix. +pub fn format_stamp(recipe: Recipe, hex: &str) -> String { + match recipe { + Recipe::V1 => hex.to_string(), + Recipe::V2 => format!("2:{hex}"), + } +} + +fn is_hex(s: &str) -> bool { + !s.is_empty() && s.bytes().all(|b| b.is_ascii_hexdigit()) +} + +/// The full stored stamp for a single-site anchor under [`Recipe::CURRENT`] (v2), prefix and +/// all. Multi-site claims combine per-site [`hash_anchor_raw`] digests via +/// [`combine_site_hashes`] and prefix the result with [`format_stamp`]. pub fn hash_anchor(source: &str, lang: Lang, anchor: &Anchor) -> Result { hash_anchor_with(source, lang, anchor, HashOpts::default()) } +/// Like [`hash_anchor`], with per-claim [`HashOpts`]. Returns the current-recipe stamp. pub fn hash_anchor_with( source: &str, lang: Lang, anchor: &Anchor, opts: HashOpts, ) -> Result { - Ok(hash_tokens(&anchor_tokens(source, lang, anchor, opts)?)) + let hex = hash_anchor_raw(source, lang, anchor, opts, Recipe::CURRENT)?; + Ok(format_stamp(Recipe::CURRENT, &hex)) +} + +/// The bare hex digest of one anchor under `recipe` — the per-site hash that +/// [`combine_site_hashes`] folds into a claim stamp. Carries no version prefix; use +/// [`format_stamp`] to turn the combined digest into a stored stamp. +pub fn hash_anchor_raw( + source: &str, + lang: Lang, + anchor: &Anchor, + opts: HashOpts, + recipe: Recipe, +) -> Result { + Ok(hash_tokens(&anchor_tokens( + source, lang, anchor, opts, recipe, + )?)) } /// One hash per claim from its per-site hashes (§6.3). A single site is the identity (so the @@ -72,8 +149,20 @@ pub fn diff_magnitude( lang: Lang, anchor: &Anchor, ) -> Result { - let old = anchor_tokens(old_source, lang, anchor, HashOpts::default())?; - let new = anchor_tokens(new_source, lang, anchor, HashOpts::default())?; + let old = anchor_tokens( + old_source, + lang, + anchor, + HashOpts::default(), + Recipe::CURRENT, + )?; + let new = anchor_tokens( + new_source, + lang, + anchor, + HashOpts::default(), + Recipe::CURRENT, + )?; Ok(categorize(token_distance(&old, &new))) } @@ -82,6 +171,7 @@ fn anchor_tokens( lang: Lang, anchor: &Anchor, opts: HashOpts, + recipe: Recipe, ) -> Result, ResolveError> { let tree = parse_tree(source, lang).ok_or(ResolveError::Parse)?; let src = source.as_bytes(); @@ -98,6 +188,7 @@ fn anchor_tokens( src, family, opts, + recipe, false, &mut idents, &mut out, @@ -106,11 +197,23 @@ fn anchor_tokens( Ok(out) } -pub(crate) fn hash_node(node: Node, src: &[u8], family: Family, opts: HashOpts) -> String { - hash_tokens(&canonical_tokens(node, src, family, opts)) +pub(crate) fn hash_node( + node: Node, + src: &[u8], + family: Family, + opts: HashOpts, + recipe: Recipe, +) -> String { + hash_tokens(&canonical_tokens(node, src, family, opts, recipe)) } -fn canonical_tokens(node: Node, src: &[u8], family: Family, opts: HashOpts) -> Vec { +fn canonical_tokens( + node: Node, + src: &[u8], + family: Family, + opts: HashOpts, + recipe: Recipe, +) -> Vec { let mut out = Vec::new(); let mut idents: HashMap = HashMap::new(); emit( @@ -118,6 +221,7 @@ fn canonical_tokens(node: Node, src: &[u8], family: Family, opts: HashOpts) -> V src, family, opts, + recipe, false, &mut idents, &mut out, @@ -125,11 +229,13 @@ fn canonical_tokens(node: Node, src: &[u8], family: Family, opts: HashOpts) -> V out } +#[allow(clippy::too_many_arguments)] fn emit( node: Node, src: &[u8], family: Family, opts: HashOpts, + recipe: Recipe, // True while inside a decorator's *name* (the symbol being applied), where identifiers are // kept verbatim rather than alpha-renamed — so `@cache` → `@lru_cache` or // `@staticmethod` → `@classmethod` is caught (§6.1, #8). Arguments to a decorator follow the @@ -146,7 +252,11 @@ fn emit( if node.is_named() { if is_identifier(kind, family) { let text = node.utf8_text(src).unwrap_or_default(); - if decorator_name { + // v2 keeps member-access names verbatim too, so `obj.foo` → `obj.bar` is loud even + // when `bar` occurs once (#140). v1 keeps only decorator names verbatim. + let verbatim = + decorator_name || (recipe == Recipe::V2 && is_member_access_name(node, family)); + if verbatim { out.push(format!("{kind}:{text}")); } else { let next = idents.len(); @@ -191,7 +301,58 @@ fn emit( }; let mut cursor = node.walk(); for child in node.children(&mut cursor) { - emit(child, src, family, opts, child_decorator_name, idents, out); + emit( + child, + src, + family, + opts, + recipe, + child_decorator_name, + idents, + out, + ); + } +} + +/// True for the property/field component of a member-access expression — the part the v2 +/// recipe (#140) keeps verbatim. These positions name an *external* member, never a local +/// binding, so emitting them verbatim distinguishes "re-pointed at a different symbol" from +/// "renamed my own local" without breaking rename tolerance. Each family is matched +/// structurally (kind + parent kind + the parent's named field) so an identifier that merely +/// *shares* the kind in another position (e.g. an object-literal key, a method *name*) is left +/// to the normal alpha-rename. +fn is_member_access_name(node: Node, family: Family) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + let is_field = + |field: &str| parent.child_by_field_name(field).map(|n| n.id()) == Some(node.id()); + match family { + // `obj.prop` / `obj?.prop` — the property of a member_expression. + Family::TypeScript => { + matches!( + node.kind(), + "property_identifier" | "private_property_identifier" + ) && parent.kind() == "member_expression" + && is_field("property") + } + // `pkg.Bar` / `recv.Method` — the field of a selector_expression. + Family::Go => { + node.kind() == "field_identifier" + && parent.kind() == "selector_expression" + && is_field("field") + } + // `value.field` / `value.method()` — the field of a field_expression. (Path access + // `Enum::Variant` is a scoped_identifier, not a field — left to the full split, #77.) + Family::Rust => { + node.kind() == "field_identifier" + && parent.kind() == "field_expression" + && is_field("field") + } + // `obj.attr` — the attribute of an attribute node. + Family::Python => { + node.kind() == "identifier" && parent.kind() == "attribute" && is_field("attribute") + } } } @@ -271,3 +432,157 @@ fn categorize(distance: usize) -> Magnitude { _ => Magnitude::Large, } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::parse_anchor; + + fn raw(src: &str, lang: Lang, anchor: &str, recipe: Recipe) -> String { + hash_anchor_raw( + src, + lang, + &parse_anchor(anchor).unwrap(), + HashOpts::default(), + recipe, + ) + .unwrap() + } + + // --- stamp parse/format ------------------------------------------------------------- + + #[test] + fn bare_hex_is_v1_prefixed_is_v2() { + assert_eq!(parse_stamp("abc123"), Some((Recipe::V1, "abc123"))); + assert_eq!(parse_stamp("2:abc123"), Some((Recipe::V2, "abc123"))); + // Unknown version or junk → None, so the gate fails closed rather than guessing. + assert_eq!(parse_stamp("3:abc123"), None); + assert_eq!(parse_stamp("2:nothex"), None); + assert_eq!(parse_stamp(""), None); + } + + #[test] + fn format_stamp_round_trips() { + for (recipe, stamp) in [(Recipe::V1, "deadbeef"), (Recipe::V2, "2:deadbeef")] { + assert_eq!(format_stamp(recipe, "deadbeef"), stamp); + assert_eq!( + parse_stamp(&format_stamp(recipe, "deadbeef")), + Some((recipe, "deadbeef")) + ); + } + } + + #[test] + fn current_stamp_carries_v2_prefix() { + let src = "pub fn f(p: T) -> i64 { p.x }\n"; + let stamp = hash_anchor(src, Lang::Rust, &parse_anchor("x.rs > f").unwrap()).unwrap(); + assert!( + stamp.starts_with("2:"), + "current stamp is v2-prefixed: {stamp}" + ); + } + + // --- the #140 member-access rule ---------------------------------------------------- + + /// Re-pointing a member access at a *different* single-occurrence member is invisible to v1 + /// (the bug) and caught by v2 (the fix), in every family. The receiver/operands are + /// identical, so only the member name moved. + #[test] + fn member_name_swap_is_v1_blind_and_v2_loud() { + let cases = [ + ( + Lang::TypeScript, + "x.ts > S > f", + "export class S {\n f(): T { return PointsTier.TIER_1; }\n}\n", + "export class S {\n f(): T { return PointsTier.TIER_2; }\n}\n", + ), + ( + Lang::TypeScript, + "x.ts > S > f", + "export class S {\n f(u: U): T { return Tiers.getHighest(u); }\n}\n", + "export class S {\n f(u: U): T { return Tiers.getLowest(u); }\n}\n", + ), + ( + Lang::Go, + "x.go > Builder > Set", + "func (b *Builder) Set(n string) *Builder { return b.Del(n) }\n", + "func (b *Builder) Set(n string) *Builder { return b.Keep(n) }\n", + ), + ( + Lang::Python, + "x.py > color", + "def color(self):\n return ProbeColor.RED\n", + "def color(self):\n return ProbeColor.GREEN\n", + ), + ( + Lang::Rust, + "x.rs > f", + "pub fn f(p: P) -> i64 { p.first }\n", + "pub fn f(p: P) -> i64 { p.second }\n", + ), + ]; + for (lang, anchor, before, after) in cases { + assert_eq!( + raw(before, lang, anchor, Recipe::V1), + raw(after, lang, anchor, Recipe::V1), + "v1 should be blind to the member swap ({lang:?})" + ); + assert_ne!( + raw(before, lang, anchor, Recipe::V2), + raw(after, lang, anchor, Recipe::V2), + "v2 must catch the member swap ({lang:?})" + ); + } + } + + /// A consistent rename of a *bound* name (param + locals) stays quiet under v2 — the + /// rename-tolerance promise §6.1 makes is preserved, even though v2 stopped renaming member + /// names. The renamed name never appears in a member-access position here. + #[test] + fn consistent_local_rename_is_quiet_under_v2() { + let a = "pub fn f(nxpTier: i64) -> i64 { let t = nxpTier; t + nxpTier }\n"; + let b = "pub fn f(pointsTier: i64) -> i64 { let t = pointsTier; t + pointsTier }\n"; + assert_eq!( + raw(a, Lang::Rust, "x.rs > f", Recipe::V2), + raw(b, Lang::Rust, "x.rs > f", Recipe::V2), + ); + } + + /// Renaming the *receiver* of a member access while keeping the member name is a consistent + /// rename of a bound local — still quiet under v2 (only the receiver placeholder moves, the + /// verbatim member name is unchanged). + #[test] + fn receiver_rename_keeping_member_is_quiet_under_v2() { + let a = "pub fn f(obj: T) -> i64 { obj.compute() }\n"; + let b = "pub fn f(thing: T) -> i64 { thing.compute() }\n"; + assert_eq!( + raw(a, Lang::Rust, "x.rs > f", Recipe::V2), + raw(b, Lang::Rust, "x.rs > f", Recipe::V2), + ); + } + + /// v2 still catches everything v1 did: a structural edit (operator flip) moves the hash. + #[test] + fn structural_edits_still_move_v2() { + let a = "pub fn f(x: i64, y: i64) -> i64 { x + y }\n"; + let b = "pub fn f(x: i64, y: i64) -> i64 { x - y }\n"; + assert_ne!( + raw(a, Lang::Rust, "x.rs > f", Recipe::V2), + raw(b, Lang::Rust, "x.rs > f", Recipe::V2), + ); + } + + /// An object-literal *key* is a `property_identifier` too, but not a member access — it stays + /// alpha-renamed, so renaming both a key and its sole reference consistently is quiet (the + /// structural check guards against over-firing on non-access `property_identifier`s). + #[test] + fn object_literal_key_is_not_treated_as_member_access() { + let a = "export function f() { const o = { alpha: 1 }; return o; }\n"; + let b = "export function f() { const o = { beta: 1 }; return o; }\n"; + // Both v1 and v2 see a single identifier in that position → alpha-renamed → equal. + assert_eq!( + raw(a, Lang::TypeScript, "x.ts > f", Recipe::V2), + raw(b, Lang::TypeScript, "x.ts > f", Recipe::V2), + ); + } +} diff --git a/surf-core/src/hub.rs b/surf-core/src/hub.rs index a70e393..956e42b 100644 --- a/surf-core/src/hub.rs +++ b/surf-core/src/hub.rs @@ -363,6 +363,21 @@ mod tests { assert_eq!(set_anchor_hash(HUB, 0, "oldhash").unwrap(), HUB); } + #[test] + fn versioned_stamp_round_trips_as_a_string() { + // A `2:`-prefixed v2 stamp is a plain YAML scalar (colon not followed by a space), so it + // parses back as the exact string and survives a serialize round-trip (#140). + let out = set_anchor_hash(HUB, 0, "2:abc123def456").unwrap(); + let hub = parse_hub(&out).unwrap(); + assert_eq!( + hub.frontmatter.anchors[0].hash.as_deref(), + Some("2:abc123def456") + ); + let yaml = serde_yaml::to_string(&hub.frontmatter).unwrap(); + let reparsed: Frontmatter = serde_yaml::from_str(&yaml).unwrap(); + assert_eq!(reparsed.anchors[0].hash.as_deref(), Some("2:abc123def456")); + } + #[test] fn follow_rewrites_scalar_at() { let out = set_anchor_at(HUB, 0, "a.rs > foo_renamed").unwrap(); diff --git a/surf-core/src/lib.rs b/surf-core/src/lib.rs index 5507db9..217fb58 100644 --- a/surf-core/src/lib.rs +++ b/surf-core/src/lib.rs @@ -12,7 +12,8 @@ pub mod resolve; pub use anchor::{parse_anchor, Anchor, AnchorParseError, Segment}; pub use config::{parse_config, Config, ConfigError, CONFIG_FILE}; pub use hash::{ - combine_site_hashes, diff_magnitude, hash_anchor, hash_anchor_with, HashOpts, Magnitude, + combine_site_hashes, diff_magnitude, format_stamp, hash_anchor, hash_anchor_raw, + hash_anchor_with, parse_stamp, HashOpts, Magnitude, Recipe, }; pub use hub::{parse_hub, set_anchor_at, set_anchor_hash, At, Claim, Frontmatter, Hub, HubError}; pub use lang::Lang; diff --git a/surf-core/src/rename.rs b/surf-core/src/rename.rs index 5d72c76..1a7258f 100644 --- a/surf-core/src/rename.rs +++ b/surf-core/src/rename.rs @@ -7,19 +7,24 @@ //! This covers symbol renames within a file (the common case). A *file* rename makes the //! anchor's path unreadable; that surfaces as a broken reference at the `lint` layer. -use crate::hash::{hash_node, HashOpts}; +use crate::hash::{hash_node, parse_stamp, HashOpts}; use crate::lang::Lang; use crate::resolve::{collect_all_defs, parse_tree, ResolveError}; /// If some current symbol's canonical hash equals `stored_hash`, return its name — the /// symbol the anchor was probably renamed to. `opts` must match the mode the stored hash was -/// computed in (e.g. a claim with `ignore_literals`), or a renamed symbol won't match. +/// computed in (e.g. a claim with `ignore_literals`), or a renamed symbol won't match. The +/// stored stamp's recipe (v1/v2) is honoured, so a renamed-but-unchanged symbol relocates +/// whether or not its stamp has been upgraded yet. pub fn find_renamed( source: &str, lang: Lang, stored_hash: &str, opts: HashOpts, ) -> Result, ResolveError> { + let Some((recipe, stored_hex)) = parse_stamp(stored_hash) else { + return Ok(None); + }; let tree = parse_tree(source, lang).ok_or(ResolveError::Parse)?; let src = source.as_bytes(); let family = lang.family(); @@ -27,7 +32,7 @@ pub fn find_renamed( let mut defs = Vec::new(); collect_all_defs(tree.root_node(), src, family, &mut defs); for (name, node) in defs { - if hash_node(node, src, family, opts) == stored_hash { + if hash_node(node, src, family, opts, recipe) == stored_hex { return Ok(Some(name)); } } diff --git a/surf-core/tests/golden_hash.rs b/surf-core/tests/golden_hash.rs index 5f2ae71..0ef75d4 100644 --- a/surf-core/tests/golden_hash.rs +++ b/surf-core/tests/golden_hash.rs @@ -20,34 +20,124 @@ //! If a *deliberate* change updates these values, update CHANGELOG and treat it as a //! hash-format break for downstreams. -use surf_core::{hash_anchor, parse_anchor, Lang}; +use surf_core::{format_stamp, hash_anchor, hash_anchor_raw, parse_anchor, HashOpts, Lang, Recipe}; fn h(src: &str, lang: Lang, anchor: &str) -> String { hash_anchor(src, lang, &parse_anchor(anchor).unwrap()).unwrap() } +fn raw(src: &str, lang: Lang, anchor: &str, recipe: Recipe) -> String { + hash_anchor_raw( + src, + lang, + &parse_anchor(anchor).unwrap(), + HashOpts::default(), + recipe, + ) + .unwrap() +} + #[test] fn golden_hashes_are_stable_per_language() { // Each snippet carries a comment and non-canonical whitespace on purpose, so the golden - // already encodes the "comments + formatting are ignored" guarantee. + // already encodes the "comments + formatting are ignored" guarantee. These snippets are + // member-access-free, so v1 and v2 agree byte-for-byte — itself the guarantee that v2 only + // diverges on member-access names (#140). The frozen v1 digests are unchanged from before + // versioning, so existing v1 stamps in downstream repos still verify. let rust = "pub fn add(a: i64, b: i64) -> i64 {\n // sum them\n a + b\n}\n"; - assert_eq!(h(rust, Lang::Rust, "x.rs > add"), "f1075e760a17"); + assert_eq!( + raw(rust, Lang::Rust, "x.rs > add", Recipe::V1), + "f1075e760a17" + ); + assert_eq!( + raw(rust, Lang::Rust, "x.rs > add", Recipe::V2), + "f1075e760a17" + ); let ts = "export class Svc {\n rotate(tok: string): string {\n return tok + tok; // c\n }\n}\n"; assert_eq!( - h(ts, Lang::TypeScript, "x.ts > Svc > rotate"), + raw(ts, Lang::TypeScript, "x.ts > Svc > rotate", Recipe::V1), + "afa4514b5c89" + ); + assert_eq!( + raw(ts, Lang::TypeScript, "x.ts > Svc > rotate", Recipe::V2), "afa4514b5c89" ); let tsx = "export function App(): JSX.Element {\n return
{1 + 2}
;\n}\n"; - assert_eq!(h(tsx, Lang::Tsx, "x.tsx > App"), "97e0de58725d"); + assert_eq!( + raw(tsx, Lang::Tsx, "x.tsx > App", Recipe::V1), + "97e0de58725d" + ); + assert_eq!( + raw(tsx, Lang::Tsx, "x.tsx > App", Recipe::V2), + "97e0de58725d" + ); let py = "def add(a, b):\n # comment\n return a + b\n"; - assert_eq!(h(py, Lang::Python, "x.py > add"), "879b76118966"); + assert_eq!( + raw(py, Lang::Python, "x.py > add", Recipe::V1), + "879b76118966" + ); + assert_eq!( + raw(py, Lang::Python, "x.py > add", Recipe::V2), + "879b76118966" + ); let go = "func Add(a int, b int) int {\n\t// sum\n\treturn a + b\n}\n"; - assert_eq!(h(go, Lang::Go, "x.go > Add"), "942af2641116"); + assert_eq!(raw(go, Lang::Go, "x.go > Add", Recipe::V1), "942af2641116"); + assert_eq!(raw(go, Lang::Go, "x.go > Add", Recipe::V2), "942af2641116"); + + // The stored stamp for a single-site anchor carries the current-recipe (v2) prefix. + assert_eq!(h(rust, Lang::Rust, "x.rs > add"), "2:f1075e760a17"); + assert_eq!(format_stamp(Recipe::V1, "f1075e760a17"), "f1075e760a17"); +} + +#[test] +fn golden_member_access_hashes_differ_by_recipe() { + // Symbols whose only interesting content is a member access: v1 and v2 diverge, and both + // digests are pinned so a grammar bump or canonicalization refactor that perturbs either + // recipe is a loud, intentional signal (the #140 probes, one per family). + let ts = "export class S {\n tier(u: User): Tier {\n return Tiers.getHighest(u.nxp, PointsTier.TIER_1);\n }\n}\n"; + assert_eq!( + raw(ts, Lang::TypeScript, "x.ts > S > tier", Recipe::V1), + "9aea05e557ad" + ); + assert_eq!( + raw(ts, Lang::TypeScript, "x.ts > S > tier", Recipe::V2), + "2c5e43fc3a1f" + ); + + let go = "func (b *Builder) Set(n string) *Builder {\n\treturn b.Del(n)\n}\n"; + assert_eq!( + raw(go, Lang::Go, "x.go > Builder > Set", Recipe::V1), + "34bc2bf73d75" + ); + assert_eq!( + raw(go, Lang::Go, "x.go > Builder > Set", Recipe::V2), + "e5bc7182a348" + ); + + let py = "def color(self):\n return ProbeColor.RED\n"; + assert_eq!( + raw(py, Lang::Python, "x.py > color", Recipe::V1), + "6061e364641b" + ); + assert_eq!( + raw(py, Lang::Python, "x.py > color", Recipe::V2), + "ccf224e8a6fc" + ); + + let rs = "pub fn name(p: Person) -> String {\n p.first.clone()\n}\n"; + assert_eq!( + raw(rs, Lang::Rust, "x.rs > name", Recipe::V1), + "0e1353a2aee5" + ); + assert_eq!( + raw(rs, Lang::Rust, "x.rs > name", Recipe::V2), + "a0a671650fb6" + ); } #[test]