From 79bc33f35ff2da5f8ffc81cadfa819fc1c97879c Mon Sep 17 00:00:00 2001
From: Jason Stirnaman <jstirnaman@influxdata.com>
Date: Wed, 3 Jun 2026 16:30:27 -0500
Subject: [PATCH 1/4] feat(platform): expand FAQ with category-level Q&As and
 FAQPage JSON-LD

Move /platform/faq/ to the data-driven faq pattern: questions live in
data/faqs/platform.yml, rendered by the faq shortcode and emitted as
schema.org FAQPage JSON-LD via faq_data/faq_canonical frontmatter.

Add four category-level Q&As (what InfluxDB is used for, which industries,
when to use a time series database, is it open source), expand the
relational-database comparison, and keep the decision-page cross-link.

Add a Cypress spec asserting the visible Q&As, stable anchors, and the
FAQPage JSON-LD shape.

Closes #7202
---
 content/platform/faq.md                | 31 ++--------
 cypress/e2e/content/platform-faq.cy.js | 84 ++++++++++++++++++++++++++
 data/faqs/platform.yml                 | 71 ++++++++++++++++++++++
 3 files changed, 161 insertions(+), 25 deletions(-)
 create mode 100644 cypress/e2e/content/platform-faq.cy.js
 create mode 100644 data/faqs/platform.yml
diff --git a/content/platform/faq.md b/content/platform/faq.md
index bbcc798bb8..a2d37777ff 100644
--- a/content/platform/faq.md
+++ b/content/platform/faq.md
@@ -1,33 +1,14 @@
 ---
 title: Frequently asked questions
-description: Frequently asked questions about time series data and the InfluxData platform.
+description: >
+  Frequently asked questions about time series data, what InfluxDB is used for,
+  which industries use it, and which version of InfluxDB to choose.
 menu:
   platform:
     name: Frequently asked questions
     weight: 70
+faq_data: platform
+faq_canonical: true
 ---
 
-[What is time series data?](#what-is-time-series-data)  
-[Why shouldn't I just use a relational database?](#why-shouldnt-i-just-use-a-relational-database)  
-[Which version of InfluxDB should I use?](#which-version-of-influxdb-should-i-use)  
-
-## What is time series data?
-Time series data is a series of data points each associated with a specific time.
-Examples include:
-
-- Server performance metrics
-- Financial averages over time
-- Sensor data, such as temperature, barometric pressure, wind speeds, etc.
-
-## Why shouldn't I just use a relational database?
-Relational databases can be used to store and analyze time series data, but depending
-on the precision of your data, a query can involve potentially millions of rows.
-InfluxDB is purpose-built to store and query data by time, providing out-of-the-box
-functionality that optionally downsamples data after a specific age and a query
-engine optimized for time-based data.
-
-## Which version of InfluxDB should I use?
-For new projects, use InfluxDB 3.
-See [Which InfluxDB 3 should I use?](/influxdb3/which-influxdb-3/)
-for a decision guide across InfluxDB 3 products and migration
-from InfluxDB 1 or InfluxDB 2.
+{{< faq >}}
diff --git a/cypress/e2e/content/platform-faq.cy.js b/cypress/e2e/content/platform-faq.cy.js
new file mode 100644
index 0000000000..cc3804bfeb
--- /dev/null
+++ b/cypress/e2e/content/platform-faq.cy.js
@@ -0,0 +1,84 @@
+/// <reference types="cypress" />
+
+// Issue #7202: expand /platform/faq/ with category-level Q&As and emit
+// FAQPage JSON-LD. The page is data-driven (data/faqs/platform.yml) via the
+// `faq` shortcode and the header/faq-jsonld.html partial (faq_data: platform,
+// faq_canonical: true).
+
+describe('Platform FAQ page', function () {
+  const url = '/platform/faq/';
+
+  // Question text + anchorized id. Anchors are stable URLs that LLMs and
+  // search engines deep-link to, so changing them is a breaking change.
+  const questions = [
+    { text: 'What is time series data?', anchor: 'what-is-time-series-data' },
+    {
+      text: 'What is InfluxDB used for?',
+      anchor: 'what-is-influxdb-used-for',
+    },
+    {
+      text: 'What industries use InfluxDB?',
+      anchor: 'what-industries-use-influxdb',
+    },
+    {
+      text: 'When should I use a time series database?',
+      anchor: 'when-should-i-use-a-time-series-database',
+    },
+    {
+      text: "What's the difference between a time series database and a relational database?",
+      anchor:
+        'whats-the-difference-between-a-time-series-database-and-a-relational-database',
+    },
+    { text: 'Is InfluxDB open source?', anchor: 'is-influxdb-open-source' },
+    {
+      text: 'Which version of InfluxDB should I use?',
+      anchor: 'which-version-of-influxdb-should-i-use',
+    },
+  ];
+
+  beforeEach(() => cy.visit(url));
+
+  it('renders each FAQ question as an H2 with a stable anchor', function () {
+    questions.forEach(({ text, anchor }) => {
+      cy.get(`h2#${anchor}`).should('contain.text', text);
+    });
+  });
+
+  it('wraps each FAQ answer in <div class="faq-answer"><p>...</p></div>', function () {
+    cy.get('div.faq-answer').should('have.length', questions.length);
+    cy.get('div.faq-answer').each(($div) => {
+      cy.wrap($div).find('p').should('have.length.gte', 1);
+    });
+  });
+
+  it('cross-links to the decision page from the version Q&A', function () {
+    cy.get('a[href="/influxdb3/which-influxdb-3/"]').should('exist');
+  });
+
+  it('does NOT leak raw markdown headings or list markers into the HTML', function () {
+    cy.get('article.article--content').then(($article) => {
+      const html = $article[0].innerHTML;
+      expect(html).not.to.match(/(^|\n)## /);
+      expect(html).not.to.match(/(^|\n)- \[/);
+    });
+  });
+
+  it('emits FAQPage JSON-LD with one Question entity per visible Q&A', function () {
+    cy.get('script[type="application/ld+json"]').then(($scripts) => {
+      const faq = [...$scripts]
+        .map((s) => JSON.parse(s.textContent))
+        .find((j) => j['@type'] === 'FAQPage');
+      expect(faq, 'FAQPage JSON-LD present').to.exist;
+      expect(faq['@context']).to.equal('https://schema.org');
+      expect(faq.mainEntity).to.have.length(questions.length);
+      faq.mainEntity.forEach((q) => {
+        expect(q['@type']).to.equal('Question');
+        expect(q.name).to.be.a('string').and.not.empty;
+        expect(q.acceptedAnswer['@type']).to.equal('Answer');
+        expect(q.acceptedAnswer.text).to.be.a('string').and.not.empty;
+        // Plain text only — no leftover HTML tags from markdownify | plainify.
+        expect(q.acceptedAnswer.text).to.not.match(/<[a-z][^>]*>/i);
+      });
+    });
+  });
+});
diff --git a/data/faqs/platform.yml b/data/faqs/platform.yml
new file mode 100644
index 0000000000..fd74c8fbcc
--- /dev/null
+++ b/data/faqs/platform.yml
@@ -0,0 +1,71 @@
+# FAQ data for /platform/faq/. Rendered as visible Q&As by the `faq`
+# shortcode and emitted as schema.org FAQPage JSON-LD by
+# layouts/partials/header/faq-jsonld.html (page sets faq_data: platform and
+# faq_canonical: true). Answers are front-loaded: lead with the direct answer,
+# then add detail.
+
+- question: "What is time series data?"
+  answer: |
+    Time series data is a sequence of data points, each associated with a
+    timestamp, that measure how something changes over time. Common examples
+    include server and application metrics, network telemetry, financial
+    prices, and sensor readings such as temperature, pressure, and voltage.
+    Time series workloads are write-heavy, append-mostly, and queried by time
+    range.
+
+- question: "What is InfluxDB used for?"
+  answer: |
+    InfluxDB is a purpose-built time series database for storing and querying
+    large volumes of timestamped data in real time. Common use cases include
+    infrastructure and application monitoring, network monitoring, IoT and
+    industrial sensor data, energy and battery (BESS) systems, and financial
+    market analytics. It is optimized for high-ingest workloads and fast
+    queries that power dashboards, alerting, and automation.
+
+- question: "What industries use InfluxDB?"
+  answer: |
+    InfluxDB is used across industrial IoT (IIoT) and manufacturing, energy
+    and battery energy storage systems (BESS), software observability and
+    DevOps monitoring, telecommunications and network operations, financial
+    services, and aerospace. These domains share a common need: ingest
+    high-frequency measurements from many sources and query them by time for
+    monitoring, analytics, and control.
+
+- question: "When should I use a time series database?"
+  answer: |
+    Use a time series database when your primary access pattern is "what
+    happened over this time range" and you ingest a continuous stream of
+    timestamped measurements. It is the right choice for metrics, events,
+    sensor data, and telemetry, where write throughput is high and queries
+    aggregate or downsample data by time. A general-purpose relational
+    database is a better fit for transactional, relationship-heavy data that
+    isn't primarily organized by time.
+
+- question: "What's the difference between a time series database and a relational database?"
+  answer: |
+    A time series database is optimized for timestamped data: it ingests
+    millions of points per second, indexes by time, and runs time-windowed
+    aggregations efficiently. A relational database is optimized for
+    transactional integrity and relationships across normalized tables. You
+    can store time series in a relational database, but a single time-range
+    query can scan millions of rows. InfluxDB stores and queries data by time
+    out of the box, optionally downsamples data after a set age, and uses a
+    query engine tuned for time-based access.
+
+- question: "Is InfluxDB open source?"
+  answer: |
+    Yes. InfluxDB 3 Core is open source under the permissive MIT or Apache 2.0
+    license and is free to download and run with no license key. InfluxDB 3
+    Enterprise is a commercial product built on the same engine; it offers a
+    30-day free trial and a free at-home license for non-commercial use.
+    The earlier InfluxDB 1 and InfluxDB 2 open source releases remain
+    available under open source licenses. For new projects, use InfluxDB 3.
+
+- question: "Which version of InfluxDB should I use?"
+  answer: |
+    For new projects, use InfluxDB 3. For new production workloads, use
+    InfluxDB 3 Enterprise; use InfluxDB 3 Core for free, open source,
+    single-node deployments. See
+    [Which InfluxDB 3 should I use?](/influxdb3/which-influxdb-3/) for a full
+    decision guide across InfluxDB 3 products and for migrating from
+    InfluxDB 1 or InfluxDB 2.

From b4877956abdddd582cac0184409787d5114f7b8f Mon Sep 17 00:00:00 2001
From: Jason Stirnaman <jstirnaman@influxdata.com>
Date: Thu, 4 Jun 2026 12:21:53 -0500
Subject: [PATCH 2/4] docs(platform): add IA sharing canonical-validation
 design (#7233)

Validate-first design for the Phase 2 IA content-sharing mechanism:
a canonical-honoring test protocol (dual target: byte-identical
release-notes with canonical vs. identical engine pages without) and
an outcome-keyed decision rubric. Defers the route choice until data
lands. Splits downstream work into #7297 (mechanism spike) and #7298
(pilot conversion).

Refs #7230, #7232
---
 ...-ia-sharing-canonical-validation-design.md | 176 ++++++++++++++++++
 1 file changed, 176 insertions(+)
 create mode 100644 docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md

diff --git a/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md b/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md
new file mode 100644
index 0000000000..65905815ef
--- /dev/null
+++ b/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md
@@ -0,0 +1,176 @@
+# IA sharing mechanism — canonical-honoring validation and decision rubric
+
+**Status:** Design — validation pending. No route chosen until the test in
+section 3 produces data.
+**Closes:** [#7233](https://github.com/influxdata/docs-v2/issues/7233) (Phase 2 design review)
+**Parent:** [#7230](https://github.com/influxdata/docs-v2/issues/7230) (AI visibility)
+**Blocks:** [#7232](https://github.com/influxdata/docs-v2/issues/7232) (job-led IA migration kickoff)
+**Related:** [#7245](https://github.com/influxdata/docs-v2/issues/7245) (canonical audit of engine-concept pages)
+
+## Goal
+
+Decide how the job-led IA shares content across Core, Enterprise, and
+deployment variants — but decide it on evidence, not assumption. The IA's
+"engine docs live once, thin overlays elsewhere" model needs a sharing
+mechanism. The choice between candidate mechanisms hinges on one empirical
+question that no one has measured: **do LLM retrievers honor `rel=canonical`?**
+
+This document defines the test that answers that question and a rubric that
+maps each possible outcome to a route. It does not pick the route. The route is
+chosen when the validation test has data and recorded under "Test results and
+decision."
+
+## Intent (the two pillars)
+
+The work this design serves has two pillars, both from the parent epic and the
+original (since-lost) AI-visibility plan:
+
+1. **Placement — job-led, anti-dumping.** Content lives where the *task* is, not
+   dumped into `/reference/` or a catch-all section. An agent asking "how do I
+   downsample with InfluxDB 3" should retrieve a `/process/` page, not a
+   reference appendix. This is the #7232 IA concern and overlaps #7245.
+
+2. **Mechanism — inverted transclusion.** Today's pattern is *N thin stubs pull
+   from one hidden `content/shared/*.md` source*, and that source is laced with
+   `show-in` / `hide-in` conditionals. Three costs fall on readers and
+   retrievers: the authoritative body lives at a non-published `/shared/` path
+   that is not itself a URL; whoever parses the source wades through
+   conditionals; and no single *real* page is the authority — every stub is an
+   equal pull from a hidden file. **Inverted** flips this: the full content
+   lives at one real, published, canonical page written as clean prose with no
+   conditionals, and other products reference or include *from that real page*.
+   `N stubs → 1 hidden source` becomes `1 canonical real page ← N consumers`.
+
+## Key reframe
+
+The **readability** half of pillar 2 — authoritative copy is clean prose with
+no `show-in`/`hide-in` to parse — wins on reader-UX and agent-parse grounds
+**regardless of the test outcome**. The test does not decide *whether* to invert.
+It decides **how much to invest in URL consolidation** (canonical tags, `noindex`
+on secondary copies, fragment tooling). That bounds the decision: the downside
+of a wrong read is bounded effort, not a wrong direction.
+
+## What the repo already has
+
+The #7233 issue frames "Route 1 (transclusion)" as net-new engineering. It is
+not. docs-v2 already ships the building blocks:
+
+- **Whole-page transclusion** — `source:` frontmatter + `content/shared/`
+  (\~1,485 files). A per-product stub holds frontmatter; the body comes from one
+  shared file.
+- **Conditional blocks** — `show-in` / `hide-in` shortcodes (\~146 files) vary a
+  shared file by consuming product.
+- **Canonical signaling** — `canonical:` frontmatter (\~296 files) and
+  `alt_links:` (\~206 files).
+
+So the genuinely new piece a transclusion route would need is **sub-page
+fragment** includes (reuse a 60-word snippet inline); `source:` is whole-page
+only. And the inversion is a *discipline and placement* change to the existing
+mechanism, not a new engine. This is scoped in chunk 5, after the decision.
+
+## Validation test (spec — run manually)
+
+Per the epic, measurement tooling lives in `influxdata/docs-tooling` and is out
+of docs-v2 scope. This document is the **protocol**; execution is manual against
+production (already-indexed) content. Results land under "Test results and
+decision."
+
+### Targets — a contrast pair
+
+The test isolates the marginal effect of `rel=canonical` by comparing two
+"identical content at multiple live URLs" situations that differ only in whether
+canonical is declared.
+
+|                   | Content                                                               | Live URLs                                                                                                                                | Canonical state                            |
+| ----------------- | --------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------ |
+| **Control (C)**   | v3 Python client release notes (byte-identical via one shared source) | 5: `core`, `enterprise`, `clustered`, `cloud-dedicated`, `cloud-serverless` under `/reference/client-libraries/v3/python/release-notes/` | Non-Core copies declare **Core** canonical |
+| **Realistic (R)** | `admin/performance-tuning`; replicate with `admin/backup-restore`     | `core` + `enterprise` (backup-restore also `clustered`)                                                                                  | **No canonical declared**                  |
+
+Control verified byte-identical (single `source:`); Core is canonical. Realistic
+pages are identical across products but carry no `canonical:` — that absence is
+itself the #7245 gap. C and R differ systematically only in the canonical tag,
+so the citation delta between them measures what the tag does.
+
+### Retrievers and modes
+
+ChatGPT (browsing on), Claude (web search on), Perplexity, Gemini (search on),
+Google AI Overviews. **Browsing/RAG mode is primary** — canonical only matters
+when a retriever indexes URLs. Record a plain/no-browsing pass separately as
+"which URL does the model *recall* from training," noting it is not a canonical
+test.
+
+### Prompts
+
+3–5 natural prompts per target, phrased as a user or agent asks. Examples:
+
+- **C:** "What changed in the latest influxdb3-python client release?";
+  "Show the release notes for the InfluxDB 3 Python client."
+- **R:** "How do I tune InfluxDB 3 query performance?";
+  "What are the steps to back up and restore InfluxDB 3?"
+
+### Coding scheme
+
+Record one row per `query × retriever × repeat`:
+
+- date, retriever, mode, prompt
+- verbatim cited InfluxData URL(s)
+- classification:
+  - **(a)** canonical URL cited
+  - **(b)** one non-canonical duplicate cited
+  - **(c)** multiple duplicates cited
+  - **(d)** neither — third-party or marketing URL
+  - **(e)** no citation
+
+### Variance control
+
+N=3 fresh-session repeats per cell. Stamp the date — retriever index freshness
+drifts. Rough total: 2 targets × \~4 prompts × \~5 retrievers × 3 ≈ 100
+observations. Doable by hand in one sitting.
+
+## Decision rubric (outcome → route)
+
+| Control (canonical present)           | Realistic (no canonical)            | Reading                                             | Route lean                                                                                                                                 |
+| ------------------------------------- | ----------------------------------- | --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
+| Canonical URL cited consistently      | Duplicates scatter across products  | Canonical does real work                            | **Promote #7245 to do-now** (add canonical everywhere) + Route 2 conventional split. Cheap, high-leverage                                  |
+| Canonical ignored; duplicates scatter | Duplicates scatter                  | Canonical is cosmetic to retrievers                 | **Inverted transclusion** — one clean readable canonical page; reduce duplicate URL surface (consolidation, `noindex` on secondary copies) |
+| Canonical honored                     | Realistic also consolidates somehow | Something other than canonical drives consolidation | Investigate the real signal (sitemap, internal link graph) before investing either way                                                     |
+| Mixed across retrievers               | Mixed                               | Partial honoring                                    | **Hybrid** — canonical for cheap consolidation + inverted transclusion for the highest-value pages                                         |
+
+In every row, the readability inversion still proceeds (see Key reframe). The
+rubric only sets the consolidation investment.
+
+## Test results and decision
+
+> Pending. Fill the results table from the validation test's coding scheme, then state the
+> chosen route and a one-paragraph rationale keyed to the matching rubric row.
+> Filling this section closes #7233 and unblocks #7232.
+
+## Work chunks
+
+Small, sequenced. Only chunk 1 is this session.
+
+1. **This design doc** — protocol + rubric. The #7233 artifact. *(done)*
+2. **Execute and record** — run the section 3 queries manually; paste results
+   into section 5. *(manual; you)*
+3. **Decision record** — pick the route from the rubric, write the rationale
+   under "Test results and decision," close #7233, unblock #7232.
+
+Opened only **after** the decision:
+
+4. **#7245 canonical audit** — promoted to do-now if the rubric says canonical
+   helps; deferred or reshaped otherwise.
+5. **Inverted-transclusion mechanism spike** ([#7297](https://github.com/influxdata/docs-v2/issues/7297)) —
+   the Hugo question: make a real published page the authoritative source
+   instead of a `/shared/` stub; define how consumers include it; decide whether
+   sub-page fragment includes are needed.
+6. **Pilot conversion** ([#7298](https://github.com/influxdata/docs-v2/issues/7298)) —
+   top `show-in`/`hide-in` pages, using the chosen route.
+
+## Explicitly out of scope
+
+- The route decision itself (deferred to "Test results and decision", post-data).
+- Migration guides per competitor (separate content workstream).
+- The Phase 1 IA skeleton (#7232 predecessor) and Phase 3 editorial discipline
+  (#7234 successor).
+- The prompt-audit data pipeline and measurement tooling
+  (`influxdata/docs-tooling`).

From e2d550cd27ba5f7c2b8baa2b9c2a47932d4b20da Mon Sep 17 00:00:00 2001
From: Jason Stirnaman <jstirnaman@influxdata.com>
Date: Mon, 8 Jun 2026 12:02:08 -0500
Subject: [PATCH 3/4] docs(platform): record IA sharing route decision (#7233)

Decide Route 2 (conventional split) + promote #7245 canonical cleanup to
do-now, on field evidence and rubric structure rather than the gated
hand-run audit. Repurpose the section-3 test as non-blocking before/after
validation. Add field-evidence and reference-vs-usage sections.
---
 ...-ia-sharing-canonical-validation-design.md | 196 +++++++++++++++---
 1 file changed, 163 insertions(+), 33 deletions(-)

diff --git a/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md b/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md
index 65905815ef..f200874615 100644
--- a/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md
+++ b/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md
@@ -1,7 +1,10 @@
 # IA sharing mechanism — canonical-honoring validation and decision rubric
 
-**Status:** Design — validation pending. No route chosen until the test in
-section 3 produces data.
+**Status:** Decided. Route chosen on field evidence (see "Test results and
+decision"). The original "no route until the section-3 test produces data" gate
+is lifted: the test, as specified, could not produce an outcome that changed the
+action, so it is repurposed as non-blocking before/after validation on the pilot
+pages.
 **Closes:** [#7233](https://github.com/influxdata/docs-v2/issues/7233) (Phase 2 design review)
 **Parent:** [#7230](https://github.com/influxdata/docs-v2/issues/7230) (AI visibility)
 **Blocks:** [#7232](https://github.com/influxdata/docs-v2/issues/7232) (job-led IA migration kickoff)
@@ -10,15 +13,18 @@ section 3 produces data.
 ## Goal
 
 Decide how the job-led IA shares content across Core, Enterprise, and
-deployment variants — but decide it on evidence, not assumption. The IA's
-"engine docs live once, thin overlays elsewhere" model needs a sharing
-mechanism. The choice between candidate mechanisms hinges on one empirical
-question that no one has measured: **do LLM retrievers honor `rel=canonical`?**
-
-This document defines the test that answers that question and a rubric that
-maps each possible outcome to a route. It does not pick the route. The route is
-chosen when the validation test has data and recorded under "Test results and
-decision."
+deployment variants — and decide it on evidence. The IA's "engine docs live
+once, thin overlays elsewhere" model needs a sharing mechanism. The original
+plan framed the choice as hinging on one unmeasured empirical question: **do LLM
+retrievers honor `rel=canonical`?**
+
+That question turns out to be largely answered by published field evidence (see
+"Field evidence"), and — more decisively — the decision rubric below contains no
+outcome that argues against canonical consolidation. When no test result can
+change the action, the test is not a gate. This document therefore picks the
+route now, on field evidence and the rubric's own logic, and records it under
+"Test results and decision." The section-3 protocol is retained as non-blocking
+before/after validation, not as a precondition.
 
 ## Intent (the two pillars)
 
@@ -68,18 +74,32 @@ fragment** includes (reuse a 60-word snippet inline); `source:` is whole-page
 only. And the inversion is a *discipline and placement* change to the existing
 mechanism, not a new engine. This is scoped in chunk 5, after the decision.
 
-## Validation test (spec — run manually)
+## Validation test (spec — non-blocking before/after)
+
+This protocol is **no longer a gate** on the route decision (see "Test results
+and decision"). It is retained, reframed, as before/after validation on the pilot
+pages: measure citation behavior before the canonical cleanup, apply the cleanup,
+then measure the same pages again. That design removes the content-type confound
+of the original contrast pair — the same page is compared against itself, with
+only its canonical/`noindex` state changed over time. Run it to confirm the
+cleanup did something, not to decide whether to do it.
 
 Per the epic, measurement tooling lives in `influxdata/docs-tooling` and is out
 of docs-v2 scope. This document is the **protocol**; execution is manual against
-production (already-indexed) content. Results land under "Test results and
-decision."
+production (already-indexed) content. Reliable per-retriever behavioral intel is
+a separate research track that needs the `docs-tooling` pipeline, not this hand-run.
 
-### Targets — a contrast pair
+### Targets — before/after on the pilot pages
 
-The test isolates the marginal effect of `rel=canonical` by comparing two
-"identical content at multiple live URLs" situations that differ only in whether
-canonical is declared.
+The original spec compared a contrast pair (Control with canonical, Realistic
+without). That pair is confounded: Control is release-notes across 5 products,
+Realistic is admin prose across 2–3, so content type and duplicate count vary
+alongside the tag. The reframed test instead measures the **same pilot pages
+before and after** the cleanup. The contrast-pair targets below are kept only as
+an optional secondary observation, not the primary measurement.
+
+The original contrast pair compared two "identical content at multiple live URLs"
+situations that differ primarily in whether canonical is declared.
 
 |                   | Content                                                               | Live URLs                                                                                                                                | Canonical state                            |
 | ----------------- | --------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------ |
@@ -139,38 +159,148 @@ observations. Doable by hand in one sitting.
 In every row, the readability inversion still proceeds (see Key reframe). The
 rubric only sets the consolidation investment.
 
+**Read the rubric column-by-column.** Every route lean — every row — still does
+canonical signaling and reduces duplicate URL surface. No outcome says "don't add
+canonical." A test whose every branch leads to the same next action has near-zero
+decision value for that action. That is the structural reason the canonical
+cleanup is not gated on the test.
+
+## Field evidence (2026)
+
+The "do LLM retrievers honor `rel=canonical`" question is less unmeasured than the
+original framing assumed. The major AI retrievers do not form independent opinions
+about canonical — they inherit the canonical handling of the search indexes they
+ride on, so the effect happens at the **index/dedup layer, upstream of the LLM**:
+
+- **ChatGPT search** runs largely on Bing's index (reported \~92% of grounded
+  queries). Bing treats canonical as a consolidation signal, so ChatGPT tends to
+  cite whatever URL Bing already canonicalized — often before the model layer
+  chooses.
+- **Google AI Overviews and Gemini grounding** run on Google's index. Google
+  honors canonical as a *hint* and can override the declared canonical with its
+  own choice; that override cascades into what the AI surface can cite.
+- **Perplexity and Claude** (Brave / independent crawl) are the weak-dedup cases —
+  more likely to index and cite multiple variants. Between-retriever divergence is
+  large: published audits report only \~11% domain overlap between ChatGPT and
+  Perplexity citations on identical queries.
+
+Two practical consequences:
+
+1. **`noindex` on true secondary duplicates is the more reliable lever than
+   canonical alone.** Google can ignore a canonical hint; it cannot ignore
+   `noindex`. The cleanup should be "canonical + selectively `noindex` secondary
+   copies," not canonical only. This is adopted from known practice — it does not
+   require our own study. **Caveat:** `noindex` is safe only on *pure* duplicates.
+   A page that mixes shared reference with per-product usage is not a duplicate;
+   `noindex`ing it would suppress distinct content. See "Reference vs usage."
+2. **The high between-retriever variance means a hand-run N=3 audit cannot
+   reliably separate "canonical consolidates" from noise.** Citation selection is
+   stochastic; the variance swamps \~100 observations. Reliable per-retriever
+   intel needs the `docs-tooling` measurement pipeline (out of docs-v2 scope per
+   the epic), not a one-sitting hand-run.
+
+Sources: Passionfruit (canonical tags and AI citations); Glenn Gabe / GSQI
+(canonical-as-hint cascade to ChatGPT; AI search and syndicated content);
+ai-visibility.org.uk (how AI search works); Topic Intelligence (per-engine source
+selection).
+
 ## Test results and decision
 
-> Pending. Fill the results table from the validation test's coding scheme, then state the
-> chosen route and a one-paragraph rationale keyed to the matching rubric row.
-> Filling this section closes #7233 and unblocks #7232.
+**Decision (2026-06-08): Route 1 — promote #7245 canonical cleanup to do-now,
+paired with Route 2 conventional split and the readability inversion. The
+section-3 measurement is repurposed as non-blocking before/after validation.**
+
+This is the rubric's first row ("canonical does real work → promote #7245 +
+conventional split. Cheap, high-leverage"), reached on field evidence rather than
+a hand-run audit, for three reasons:
+
+1. **No rubric outcome stops the canonical work.** Every route lean in the table
+   still adds canonical signaling and reduces duplicate URL surface. A gating test
+   whose every branch leads to the same action has near-zero decision value. The
+   action is decided by the structure of the rubric itself.
+
+2. **Field evidence already points to consolidation working at the index layer.**
+   The two highest-traffic AI surfaces — ChatGPT (via Bing) and AI Overviews /
+   Gemini (via Google) — inherit canonical/dedup decisions from indexes that honor
+   the signal (Google as a hint it may override; Bing more directly). Canonical
+   cleanup helps these surfaces and classic SEO at once, with no downside, and the
+   repo already has the `canonical:` machinery (296 files). It is a no-regret move;
+   gating it was the real cost.
+
+3. **The hand-run audit could not have settled it anyway.** N=3 across five
+   retrievers with \~11% cross-retriever citation overlap is underpowered for a
+   stochastic outcome; variance swamps the signal. And the contrast pair was
+   confounded (release-notes-×5 vs admin-prose-×3). The reliable version of that
+   measurement belongs in the `docs-tooling` pipeline, out of docs-v2 scope.
+
+**Implementation note carried into #7245:** classify each page first (see
+"Reference vs usage") — pure shared reference gets consolidated; per-product
+usage stays distinct and indexed. For pure duplicates, consolidate with
+**canonical + selective `noindex`**, not canonical alone, since Google can ignore
+a canonical hint but not `noindex`. Pair the canonical reference page with
+bidirectional links to each product's usage guides. The readability inversion
+proceeds regardless, per the Key reframe.
+
+This decision closes #7233 and unblocks #7232. The before/after validation
+(section 3, reframed) runs during the pilot conversion (#7298) to confirm the
+cleanup consolidated citations; it does not block the migration.
+
+## Reference vs usage — what gets consolidated
+
+Canonical consolidation applies to **shared reference**, not to **per-product
+usage**. These are different content types and the cleanup must not collapse them.
+
+| Content type          | Example                                                               | Across products   | Canonical / index treatment                                                                    |
+| --------------------- | --------------------------------------------------------------------- | ----------------- | ---------------------------------------------------------------------------------------------- |
+| **Shared reference**  | v3 Python client release notes; client API surface                    | byte-identical    | One product owns canonical. Secondary copies `canonical:` → owner; `noindex` if pure duplicate |
+| **Per-product usage** | how the client is used in Serverless vs Core; setup, examples, guides | genuinely differs | Each page self-canonical, stays indexed, discoverable on its own. Never `noindex`              |
+
+The v3 Python client control case shows the split cleanly: the **release notes**
+are the same across all v3 products and versions, so one product (Core) owns that
+canonical. But *how the client is used* differs by product, deployment, and
+version — that usage is distinct content that each product keeps.
+
+**Linking pattern (bidirectional hub-and-spoke):**
+
+- The canonical reference page links out to each product's usage guides
+  ("Using the client in Core / Serverless / Dedicated ...").
+- Each per-product usage guide links back to the canonical reference for the
+  shared parts (release notes, full API).
+
+This is the natural output of Route 2: the split *is* the reference/usage
+boundary. The classification pass — deciding, per page, whether a body is pure
+reference (dedup) or carries per-product usage (keep distinct) — is the first
+step of the #7245 cleanup, ahead of any `noindex`.
 
 ## Work chunks
 
-Small, sequenced. Only chunk 1 is this session.
+Small, sequenced.
 
 1. **This design doc** — protocol + rubric. The #7233 artifact. *(done)*
-2. **Execute and record** — run the section 3 queries manually; paste results
-   into section 5. *(manual; you)*
-3. **Decision record** — pick the route from the rubric, write the rationale
-   under "Test results and decision," close #7233, unblock #7232.
+2. **Decision record** — route picked on field evidence and rubric structure;
+   rationale under "Test results and decision"; closes #7233, unblocks #7232.
+   *(done)*
 
-Opened only **after** the decision:
+Now unblocked (the canonical cleanup is decoupled from any test gate):
 
-4. **#7245 canonical audit** — promoted to do-now if the rubric says canonical
-   helps; deferred or reshaped otherwise.
-5. **Inverted-transclusion mechanism spike** ([#7297](https://github.com/influxdata/docs-v2/issues/7297)) —
+3. **#7245 canonical cleanup** — promoted to do-now. Add `canonical:` everywhere
+   it is missing on duplicate engine-concept pages, and add `noindex` to true
+   secondary duplicates. No-regret; machinery exists.
+4. **Inverted-transclusion mechanism spike** ([#7297](https://github.com/influxdata/docs-v2/issues/7297)) —
    the Hugo question: make a real published page the authoritative source
    instead of a `/shared/` stub; define how consumers include it; decide whether
    sub-page fragment includes are needed.
-6. **Pilot conversion** ([#7298](https://github.com/influxdata/docs-v2/issues/7298)) —
-   top `show-in`/`hide-in` pages, using the chosen route.
+5. **Pilot conversion** ([#7298](https://github.com/influxdata/docs-v2/issues/7298)) —
+   top `show-in`/`hide-in` pages, using the chosen route. Run the reframed
+   before/after validation (section 3) on these pilot pages — non-blocking.
 
 ## Explicitly out of scope
 
-- The route decision itself (deferred to "Test results and decision", post-data).
 - Migration guides per competitor (separate content workstream).
 - The Phase 1 IA skeleton (#7232 predecessor) and Phase 3 editorial discipline
   (#7234 successor).
 - The prompt-audit data pipeline and measurement tooling
   (`influxdata/docs-tooling`).
+- Reliable per-retriever behavioral intel (how ChatGPT vs Perplexity vs Gemini
+  pick sources). Strategically valuable for the parent epic (#7230) but needs the
+  `docs-tooling` pipeline with adequate sample size — not a one-sitting hand-run.

From e349b317a676ee5cfec66453e4c0f511f117ea1b Mon Sep 17 00:00:00 2001
From: Jason Stirnaman <jstirnaman@influxdata.com>
Date: Mon, 8 Jun 2026 12:57:51 -0500
Subject: [PATCH 4/4] docs(platform): scope urgent canonical fixes, defer
 cross-edition reference

Scope the #7245 urgent pass to Core<->Enterprise shared content (canonical
to Enterprise as the most complete edition). Defer canonical ownership for
reference shared across all v3 editions (client libraries, etc.) to broader
cross-edition IA work; keep current canonical in the meantime.
---
 ...-ia-sharing-canonical-validation-design.md | 40 ++++++++++++++++---
 1 file changed, 35 insertions(+), 5 deletions(-)

diff --git a/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md b/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md
index f200874615..c7116f6fe6 100644
--- a/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md
+++ b/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md
@@ -235,7 +235,10 @@ a hand-run audit, for three reasons:
 
 **Implementation note carried into #7245:** classify each page first (see
 "Reference vs usage") — pure shared reference gets consolidated; per-product
-usage stays distinct and indexed. For pure duplicates, consolidate with
+usage stays distinct and indexed. **The urgent pass covers only Core↔Enterprise
+shared content (canonical → Enterprise);** reference shared across all v3 editions
+(client libraries, etc.) keeps its current canonical and is deferred to the
+broader cross-edition IA work. For pure duplicates in scope, consolidate with
 **canonical + selective `noindex`**, not canonical alone, since Google can ignore
 a canonical hint but not `noindex`. Pair the canonical reference page with
 bidirectional links to each product's usage guides. The readability inversion
@@ -256,10 +259,34 @@ usage**. These are different content types and the cleanup must not collapse the
 | **Per-product usage** | how the client is used in Serverless vs Core; setup, examples, guides | genuinely differs | Each page self-canonical, stays indexed, discoverable on its own. Never `noindex`              |
 
 The v3 Python client control case shows the split cleanly: the **release notes**
-are the same across all v3 products and versions, so one product (Core) owns that
+are the same across all v3 products and versions, so one product owns that
 canonical. But *how the client is used* differs by product, deployment, and
 version — that usage is distinct content that each product keeps.
 
+### Canonical owner by sharing scope (urgent vs deferred)
+
+*Which* product owns the canonical depends on the sharing scope, and the two
+scopes have different urgency.
+
+| Sharing scope          | Example                                                 | Canonical owner                                                 | When                                                                 |
+| ---------------------- | ------------------------------------------------------- | --------------------------------------------------------------- | -------------------------------------------------------------------- |
+| Core ↔ Enterprise only | engine internals (storage, compaction, indexing)        | **Enterprise** (most complete edition; strict superset of Core) | **Urgent — this pass (#7245).** Fill missing canonicals now          |
+| All v3 editions        | client libraries, line protocol, SQL/InfluxQL reference | **Deferred** — keep current canonical                           | **Broader cross-edition IA work, not this pass.** Do not re-home now |
+| Single edition         | Core install/quickstart; Enterprise HA/clustering       | **self**                                                        | already correct by default                                           |
+
+**Urgent now:** add the missing `canonical:` (→ Enterprise) on Core↔Enterprise
+shared content — the original #7245 gap. This is unambiguous on present facts:
+Enterprise is the most complete edition and a strict superset of Core, so it owns
+the shared engine reference.
+
+**Deferred:** canonical ownership for reference shared across *all* v3 editions
+(client libraries, etc.) is **not settled in this pass.** Those pages keep their
+current canonical. Resolving them — including whether to unify all shared
+reference under one owner — is folded into the broader cross-edition IA effort,
+where the relationship between editions is being reworked. Canonical re-pointing
+is a cheap, reversible frontmatter change, so deferring costs little. Leaving the
+current state in place is the conservative choice until that effort lands.
+
 **Linking pattern (bidirectional hub-and-spoke):**
 
 - The canonical reference page links out to each product's usage guides
@@ -283,9 +310,12 @@ Small, sequenced.
 
 Now unblocked (the canonical cleanup is decoupled from any test gate):
 
-3. **#7245 canonical cleanup** — promoted to do-now. Add `canonical:` everywhere
-   it is missing on duplicate engine-concept pages, and add `noindex` to true
-   secondary duplicates. No-regret; machinery exists.
+3. **#7245 canonical cleanup (urgent scope)** — promoted to do-now, scoped to
+   **Core↔Enterprise shared content**. Add `canonical:` (→ Enterprise) wherever it
+   is missing on shared engine-concept pages, and add `noindex` to true secondary
+   duplicates. No-regret; machinery exists. Reference shared across all v3 editions
+   (client libraries, etc.) is **out of this pass** — keep current canonical.
+   Deferred to the broader cross-edition IA work.
 4. **Inverted-transclusion mechanism spike** ([#7297](https://github.com/influxdata/docs-v2/issues/7297)) —
    the Hugo question: make a real published page the authoritative source
    instead of a `/shared/` stub; define how consumers include it; decide whether