From 79bc33f35ff2da5f8ffc81cadfa819fc1c97879c Mon Sep 17 00:00:00 2001 From: Jason Stirnaman Date: Wed, 3 Jun 2026 16:30:27 -0500 Subject: [PATCH 1/4] feat(platform): expand FAQ with category-level Q&As and FAQPage JSON-LD Move /platform/faq/ to the data-driven faq pattern: questions live in data/faqs/platform.yml, rendered by the faq shortcode and emitted as schema.org FAQPage JSON-LD via faq_data/faq_canonical frontmatter. Add four category-level Q&As (what InfluxDB is used for, which industries, when to use a time series database, is it open source), expand the relational-database comparison, and keep the decision-page cross-link. Add a Cypress spec asserting the visible Q&As, stable anchors, and the FAQPage JSON-LD shape. Closes #7202 --- content/platform/faq.md | 31 ++-------- cypress/e2e/content/platform-faq.cy.js | 84 ++++++++++++++++++++++++++ data/faqs/platform.yml | 71 ++++++++++++++++++++++ 3 files changed, 161 insertions(+), 25 deletions(-) create mode 100644 cypress/e2e/content/platform-faq.cy.js create mode 100644 data/faqs/platform.yml diff --git a/content/platform/faq.md b/content/platform/faq.md index bbcc798bb8..a2d37777ff 100644 --- a/content/platform/faq.md +++ b/content/platform/faq.md @@ -1,33 +1,14 @@ --- title: Frequently asked questions -description: Frequently asked questions about time series data and the InfluxData platform. +description: > + Frequently asked questions about time series data, what InfluxDB is used for, + which industries use it, and which version of InfluxDB to choose. menu: platform: name: Frequently asked questions weight: 70 +faq_data: platform +faq_canonical: true --- -[What is time series data?](#what-is-time-series-data) -[Why shouldn't I just use a relational database?](#why-shouldnt-i-just-use-a-relational-database) -[Which version of InfluxDB should I use?](#which-version-of-influxdb-should-i-use) - -## What is time series data? -Time series data is a series of data points each associated with a specific time. -Examples include: - -- Server performance metrics -- Financial averages over time -- Sensor data, such as temperature, barometric pressure, wind speeds, etc. - -## Why shouldn't I just use a relational database? -Relational databases can be used to store and analyze time series data, but depending -on the precision of your data, a query can involve potentially millions of rows. -InfluxDB is purpose-built to store and query data by time, providing out-of-the-box -functionality that optionally downsamples data after a specific age and a query -engine optimized for time-based data. - -## Which version of InfluxDB should I use? -For new projects, use InfluxDB 3. -See [Which InfluxDB 3 should I use?](/influxdb3/which-influxdb-3/) -for a decision guide across InfluxDB 3 products and migration -from InfluxDB 1 or InfluxDB 2. +{{< faq >}} diff --git a/cypress/e2e/content/platform-faq.cy.js b/cypress/e2e/content/platform-faq.cy.js new file mode 100644 index 0000000000..cc3804bfeb --- /dev/null +++ b/cypress/e2e/content/platform-faq.cy.js @@ -0,0 +1,84 @@ +/// + +// Issue #7202: expand /platform/faq/ with category-level Q&As and emit +// FAQPage JSON-LD. The page is data-driven (data/faqs/platform.yml) via the +// `faq` shortcode and the header/faq-jsonld.html partial (faq_data: platform, +// faq_canonical: true). + +describe('Platform FAQ page', function () { + const url = '/platform/faq/'; + + // Question text + anchorized id. Anchors are stable URLs that LLMs and + // search engines deep-link to, so changing them is a breaking change. + const questions = [ + { text: 'What is time series data?', anchor: 'what-is-time-series-data' }, + { + text: 'What is InfluxDB used for?', + anchor: 'what-is-influxdb-used-for', + }, + { + text: 'What industries use InfluxDB?', + anchor: 'what-industries-use-influxdb', + }, + { + text: 'When should I use a time series database?', + anchor: 'when-should-i-use-a-time-series-database', + }, + { + text: "What's the difference between a time series database and a relational database?", + anchor: + 'whats-the-difference-between-a-time-series-database-and-a-relational-database', + }, + { text: 'Is InfluxDB open source?', anchor: 'is-influxdb-open-source' }, + { + text: 'Which version of InfluxDB should I use?', + anchor: 'which-version-of-influxdb-should-i-use', + }, + ]; + + beforeEach(() => cy.visit(url)); + + it('renders each FAQ question as an H2 with a stable anchor', function () { + questions.forEach(({ text, anchor }) => { + cy.get(`h2#${anchor}`).should('contain.text', text); + }); + }); + + it('wraps each FAQ answer in

...

', function () { + cy.get('div.faq-answer').should('have.length', questions.length); + cy.get('div.faq-answer').each(($div) => { + cy.wrap($div).find('p').should('have.length.gte', 1); + }); + }); + + it('cross-links to the decision page from the version Q&A', function () { + cy.get('a[href="/influxdb3/which-influxdb-3/"]').should('exist'); + }); + + it('does NOT leak raw markdown headings or list markers into the HTML', function () { + cy.get('article.article--content').then(($article) => { + const html = $article[0].innerHTML; + expect(html).not.to.match(/(^|\n)## /); + expect(html).not.to.match(/(^|\n)- \[/); + }); + }); + + it('emits FAQPage JSON-LD with one Question entity per visible Q&A', function () { + cy.get('script[type="application/ld+json"]').then(($scripts) => { + const faq = [...$scripts] + .map((s) => JSON.parse(s.textContent)) + .find((j) => j['@type'] === 'FAQPage'); + expect(faq, 'FAQPage JSON-LD present').to.exist; + expect(faq['@context']).to.equal('https://schema.org'); + expect(faq.mainEntity).to.have.length(questions.length); + faq.mainEntity.forEach((q) => { + expect(q['@type']).to.equal('Question'); + expect(q.name).to.be.a('string').and.not.empty; + expect(q.acceptedAnswer['@type']).to.equal('Answer'); + expect(q.acceptedAnswer.text).to.be.a('string').and.not.empty; + // Plain text only — no leftover HTML tags from markdownify | plainify. + expect(q.acceptedAnswer.text).to.not.match(/<[a-z][^>]*>/i); + }); + }); + }); +}); diff --git a/data/faqs/platform.yml b/data/faqs/platform.yml new file mode 100644 index 0000000000..fd74c8fbcc --- /dev/null +++ b/data/faqs/platform.yml @@ -0,0 +1,71 @@ +# FAQ data for /platform/faq/. Rendered as visible Q&As by the `faq` +# shortcode and emitted as schema.org FAQPage JSON-LD by +# layouts/partials/header/faq-jsonld.html (page sets faq_data: platform and +# faq_canonical: true). Answers are front-loaded: lead with the direct answer, +# then add detail. + +- question: "What is time series data?" + answer: | + Time series data is a sequence of data points, each associated with a + timestamp, that measure how something changes over time. Common examples + include server and application metrics, network telemetry, financial + prices, and sensor readings such as temperature, pressure, and voltage. + Time series workloads are write-heavy, append-mostly, and queried by time + range. + +- question: "What is InfluxDB used for?" + answer: | + InfluxDB is a purpose-built time series database for storing and querying + large volumes of timestamped data in real time. Common use cases include + infrastructure and application monitoring, network monitoring, IoT and + industrial sensor data, energy and battery (BESS) systems, and financial + market analytics. It is optimized for high-ingest workloads and fast + queries that power dashboards, alerting, and automation. + +- question: "What industries use InfluxDB?" + answer: | + InfluxDB is used across industrial IoT (IIoT) and manufacturing, energy + and battery energy storage systems (BESS), software observability and + DevOps monitoring, telecommunications and network operations, financial + services, and aerospace. These domains share a common need: ingest + high-frequency measurements from many sources and query them by time for + monitoring, analytics, and control. + +- question: "When should I use a time series database?" + answer: | + Use a time series database when your primary access pattern is "what + happened over this time range" and you ingest a continuous stream of + timestamped measurements. It is the right choice for metrics, events, + sensor data, and telemetry, where write throughput is high and queries + aggregate or downsample data by time. A general-purpose relational + database is a better fit for transactional, relationship-heavy data that + isn't primarily organized by time. + +- question: "What's the difference between a time series database and a relational database?" + answer: | + A time series database is optimized for timestamped data: it ingests + millions of points per second, indexes by time, and runs time-windowed + aggregations efficiently. A relational database is optimized for + transactional integrity and relationships across normalized tables. You + can store time series in a relational database, but a single time-range + query can scan millions of rows. InfluxDB stores and queries data by time + out of the box, optionally downsamples data after a set age, and uses a + query engine tuned for time-based access. + +- question: "Is InfluxDB open source?" + answer: | + Yes. InfluxDB 3 Core is open source under the permissive MIT or Apache 2.0 + license and is free to download and run with no license key. InfluxDB 3 + Enterprise is a commercial product built on the same engine; it offers a + 30-day free trial and a free at-home license for non-commercial use. + The earlier InfluxDB 1 and InfluxDB 2 open source releases remain + available under open source licenses. For new projects, use InfluxDB 3. + +- question: "Which version of InfluxDB should I use?" + answer: | + For new projects, use InfluxDB 3. For new production workloads, use + InfluxDB 3 Enterprise; use InfluxDB 3 Core for free, open source, + single-node deployments. See + [Which InfluxDB 3 should I use?](/influxdb3/which-influxdb-3/) for a full + decision guide across InfluxDB 3 products and for migrating from + InfluxDB 1 or InfluxDB 2. From b4877956abdddd582cac0184409787d5114f7b8f Mon Sep 17 00:00:00 2001 From: Jason Stirnaman Date: Thu, 4 Jun 2026 12:21:53 -0500 Subject: [PATCH 2/4] docs(platform): add IA sharing canonical-validation design (#7233) Validate-first design for the Phase 2 IA content-sharing mechanism: a canonical-honoring test protocol (dual target: byte-identical release-notes with canonical vs. identical engine pages without) and an outcome-keyed decision rubric. Defers the route choice until data lands. Splits downstream work into #7297 (mechanism spike) and #7298 (pilot conversion). Refs #7230, #7232 --- ...-ia-sharing-canonical-validation-design.md | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md diff --git a/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md b/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md new file mode 100644 index 0000000000..65905815ef --- /dev/null +++ b/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md @@ -0,0 +1,176 @@ +# IA sharing mechanism — canonical-honoring validation and decision rubric + +**Status:** Design — validation pending. No route chosen until the test in +section 3 produces data. +**Closes:** [#7233](https://github.com/influxdata/docs-v2/issues/7233) (Phase 2 design review) +**Parent:** [#7230](https://github.com/influxdata/docs-v2/issues/7230) (AI visibility) +**Blocks:** [#7232](https://github.com/influxdata/docs-v2/issues/7232) (job-led IA migration kickoff) +**Related:** [#7245](https://github.com/influxdata/docs-v2/issues/7245) (canonical audit of engine-concept pages) + +## Goal + +Decide how the job-led IA shares content across Core, Enterprise, and +deployment variants — but decide it on evidence, not assumption. The IA's +"engine docs live once, thin overlays elsewhere" model needs a sharing +mechanism. The choice between candidate mechanisms hinges on one empirical +question that no one has measured: **do LLM retrievers honor `rel=canonical`?** + +This document defines the test that answers that question and a rubric that +maps each possible outcome to a route. It does not pick the route. The route is +chosen when the validation test has data and recorded under "Test results and +decision." + +## Intent (the two pillars) + +The work this design serves has two pillars, both from the parent epic and the +original (since-lost) AI-visibility plan: + +1. **Placement — job-led, anti-dumping.** Content lives where the *task* is, not + dumped into `/reference/` or a catch-all section. An agent asking "how do I + downsample with InfluxDB 3" should retrieve a `/process/` page, not a + reference appendix. This is the #7232 IA concern and overlaps #7245. + +2. **Mechanism — inverted transclusion.** Today's pattern is *N thin stubs pull + from one hidden `content/shared/*.md` source*, and that source is laced with + `show-in` / `hide-in` conditionals. Three costs fall on readers and + retrievers: the authoritative body lives at a non-published `/shared/` path + that is not itself a URL; whoever parses the source wades through + conditionals; and no single *real* page is the authority — every stub is an + equal pull from a hidden file. **Inverted** flips this: the full content + lives at one real, published, canonical page written as clean prose with no + conditionals, and other products reference or include *from that real page*. + `N stubs → 1 hidden source` becomes `1 canonical real page ← N consumers`. + +## Key reframe + +The **readability** half of pillar 2 — authoritative copy is clean prose with +no `show-in`/`hide-in` to parse — wins on reader-UX and agent-parse grounds +**regardless of the test outcome**. The test does not decide *whether* to invert. +It decides **how much to invest in URL consolidation** (canonical tags, `noindex` +on secondary copies, fragment tooling). That bounds the decision: the downside +of a wrong read is bounded effort, not a wrong direction. + +## What the repo already has + +The #7233 issue frames "Route 1 (transclusion)" as net-new engineering. It is +not. docs-v2 already ships the building blocks: + +- **Whole-page transclusion** — `source:` frontmatter + `content/shared/` + (\~1,485 files). A per-product stub holds frontmatter; the body comes from one + shared file. +- **Conditional blocks** — `show-in` / `hide-in` shortcodes (\~146 files) vary a + shared file by consuming product. +- **Canonical signaling** — `canonical:` frontmatter (\~296 files) and + `alt_links:` (\~206 files). + +So the genuinely new piece a transclusion route would need is **sub-page +fragment** includes (reuse a 60-word snippet inline); `source:` is whole-page +only. And the inversion is a *discipline and placement* change to the existing +mechanism, not a new engine. This is scoped in chunk 5, after the decision. + +## Validation test (spec — run manually) + +Per the epic, measurement tooling lives in `influxdata/docs-tooling` and is out +of docs-v2 scope. This document is the **protocol**; execution is manual against +production (already-indexed) content. Results land under "Test results and +decision." + +### Targets — a contrast pair + +The test isolates the marginal effect of `rel=canonical` by comparing two +"identical content at multiple live URLs" situations that differ only in whether +canonical is declared. + +| | Content | Live URLs | Canonical state | +| ----------------- | --------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------ | +| **Control (C)** | v3 Python client release notes (byte-identical via one shared source) | 5: `core`, `enterprise`, `clustered`, `cloud-dedicated`, `cloud-serverless` under `/reference/client-libraries/v3/python/release-notes/` | Non-Core copies declare **Core** canonical | +| **Realistic (R)** | `admin/performance-tuning`; replicate with `admin/backup-restore` | `core` + `enterprise` (backup-restore also `clustered`) | **No canonical declared** | + +Control verified byte-identical (single `source:`); Core is canonical. Realistic +pages are identical across products but carry no `canonical:` — that absence is +itself the #7245 gap. C and R differ systematically only in the canonical tag, +so the citation delta between them measures what the tag does. + +### Retrievers and modes + +ChatGPT (browsing on), Claude (web search on), Perplexity, Gemini (search on), +Google AI Overviews. **Browsing/RAG mode is primary** — canonical only matters +when a retriever indexes URLs. Record a plain/no-browsing pass separately as +"which URL does the model *recall* from training," noting it is not a canonical +test. + +### Prompts + +3–5 natural prompts per target, phrased as a user or agent asks. Examples: + +- **C:** "What changed in the latest influxdb3-python client release?"; + "Show the release notes for the InfluxDB 3 Python client." +- **R:** "How do I tune InfluxDB 3 query performance?"; + "What are the steps to back up and restore InfluxDB 3?" + +### Coding scheme + +Record one row per `query × retriever × repeat`: + +- date, retriever, mode, prompt +- verbatim cited InfluxData URL(s) +- classification: + - **(a)** canonical URL cited + - **(b)** one non-canonical duplicate cited + - **(c)** multiple duplicates cited + - **(d)** neither — third-party or marketing URL + - **(e)** no citation + +### Variance control + +N=3 fresh-session repeats per cell. Stamp the date — retriever index freshness +drifts. Rough total: 2 targets × \~4 prompts × \~5 retrievers × 3 ≈ 100 +observations. Doable by hand in one sitting. + +## Decision rubric (outcome → route) + +| Control (canonical present) | Realistic (no canonical) | Reading | Route lean | +| ------------------------------------- | ----------------------------------- | --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | +| Canonical URL cited consistently | Duplicates scatter across products | Canonical does real work | **Promote #7245 to do-now** (add canonical everywhere) + Route 2 conventional split. Cheap, high-leverage | +| Canonical ignored; duplicates scatter | Duplicates scatter | Canonical is cosmetic to retrievers | **Inverted transclusion** — one clean readable canonical page; reduce duplicate URL surface (consolidation, `noindex` on secondary copies) | +| Canonical honored | Realistic also consolidates somehow | Something other than canonical drives consolidation | Investigate the real signal (sitemap, internal link graph) before investing either way | +| Mixed across retrievers | Mixed | Partial honoring | **Hybrid** — canonical for cheap consolidation + inverted transclusion for the highest-value pages | + +In every row, the readability inversion still proceeds (see Key reframe). The +rubric only sets the consolidation investment. + +## Test results and decision + +> Pending. Fill the results table from the validation test's coding scheme, then state the +> chosen route and a one-paragraph rationale keyed to the matching rubric row. +> Filling this section closes #7233 and unblocks #7232. + +## Work chunks + +Small, sequenced. Only chunk 1 is this session. + +1. **This design doc** — protocol + rubric. The #7233 artifact. *(done)* +2. **Execute and record** — run the section 3 queries manually; paste results + into section 5. *(manual; you)* +3. **Decision record** — pick the route from the rubric, write the rationale + under "Test results and decision," close #7233, unblock #7232. + +Opened only **after** the decision: + +4. **#7245 canonical audit** — promoted to do-now if the rubric says canonical + helps; deferred or reshaped otherwise. +5. **Inverted-transclusion mechanism spike** ([#7297](https://github.com/influxdata/docs-v2/issues/7297)) — + the Hugo question: make a real published page the authoritative source + instead of a `/shared/` stub; define how consumers include it; decide whether + sub-page fragment includes are needed. +6. **Pilot conversion** ([#7298](https://github.com/influxdata/docs-v2/issues/7298)) — + top `show-in`/`hide-in` pages, using the chosen route. + +## Explicitly out of scope + +- The route decision itself (deferred to "Test results and decision", post-data). +- Migration guides per competitor (separate content workstream). +- The Phase 1 IA skeleton (#7232 predecessor) and Phase 3 editorial discipline + (#7234 successor). +- The prompt-audit data pipeline and measurement tooling + (`influxdata/docs-tooling`). From e2d550cd27ba5f7c2b8baa2b9c2a47932d4b20da Mon Sep 17 00:00:00 2001 From: Jason Stirnaman Date: Mon, 8 Jun 2026 12:02:08 -0500 Subject: [PATCH 3/4] docs(platform): record IA sharing route decision (#7233) Decide Route 2 (conventional split) + promote #7245 canonical cleanup to do-now, on field evidence and rubric structure rather than the gated hand-run audit. Repurpose the section-3 test as non-blocking before/after validation. Add field-evidence and reference-vs-usage sections. --- ...-ia-sharing-canonical-validation-design.md | 196 +++++++++++++++--- 1 file changed, 163 insertions(+), 33 deletions(-) diff --git a/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md b/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md index 65905815ef..f200874615 100644 --- a/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md +++ b/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md @@ -1,7 +1,10 @@ # IA sharing mechanism — canonical-honoring validation and decision rubric -**Status:** Design — validation pending. No route chosen until the test in -section 3 produces data. +**Status:** Decided. Route chosen on field evidence (see "Test results and +decision"). The original "no route until the section-3 test produces data" gate +is lifted: the test, as specified, could not produce an outcome that changed the +action, so it is repurposed as non-blocking before/after validation on the pilot +pages. **Closes:** [#7233](https://github.com/influxdata/docs-v2/issues/7233) (Phase 2 design review) **Parent:** [#7230](https://github.com/influxdata/docs-v2/issues/7230) (AI visibility) **Blocks:** [#7232](https://github.com/influxdata/docs-v2/issues/7232) (job-led IA migration kickoff) @@ -10,15 +13,18 @@ section 3 produces data. ## Goal Decide how the job-led IA shares content across Core, Enterprise, and -deployment variants — but decide it on evidence, not assumption. The IA's -"engine docs live once, thin overlays elsewhere" model needs a sharing -mechanism. The choice between candidate mechanisms hinges on one empirical -question that no one has measured: **do LLM retrievers honor `rel=canonical`?** - -This document defines the test that answers that question and a rubric that -maps each possible outcome to a route. It does not pick the route. The route is -chosen when the validation test has data and recorded under "Test results and -decision." +deployment variants — and decide it on evidence. The IA's "engine docs live +once, thin overlays elsewhere" model needs a sharing mechanism. The original +plan framed the choice as hinging on one unmeasured empirical question: **do LLM +retrievers honor `rel=canonical`?** + +That question turns out to be largely answered by published field evidence (see +"Field evidence"), and — more decisively — the decision rubric below contains no +outcome that argues against canonical consolidation. When no test result can +change the action, the test is not a gate. This document therefore picks the +route now, on field evidence and the rubric's own logic, and records it under +"Test results and decision." The section-3 protocol is retained as non-blocking +before/after validation, not as a precondition. ## Intent (the two pillars) @@ -68,18 +74,32 @@ fragment** includes (reuse a 60-word snippet inline); `source:` is whole-page only. And the inversion is a *discipline and placement* change to the existing mechanism, not a new engine. This is scoped in chunk 5, after the decision. -## Validation test (spec — run manually) +## Validation test (spec — non-blocking before/after) + +This protocol is **no longer a gate** on the route decision (see "Test results +and decision"). It is retained, reframed, as before/after validation on the pilot +pages: measure citation behavior before the canonical cleanup, apply the cleanup, +then measure the same pages again. That design removes the content-type confound +of the original contrast pair — the same page is compared against itself, with +only its canonical/`noindex` state changed over time. Run it to confirm the +cleanup did something, not to decide whether to do it. Per the epic, measurement tooling lives in `influxdata/docs-tooling` and is out of docs-v2 scope. This document is the **protocol**; execution is manual against -production (already-indexed) content. Results land under "Test results and -decision." +production (already-indexed) content. Reliable per-retriever behavioral intel is +a separate research track that needs the `docs-tooling` pipeline, not this hand-run. -### Targets — a contrast pair +### Targets — before/after on the pilot pages -The test isolates the marginal effect of `rel=canonical` by comparing two -"identical content at multiple live URLs" situations that differ only in whether -canonical is declared. +The original spec compared a contrast pair (Control with canonical, Realistic +without). That pair is confounded: Control is release-notes across 5 products, +Realistic is admin prose across 2–3, so content type and duplicate count vary +alongside the tag. The reframed test instead measures the **same pilot pages +before and after** the cleanup. The contrast-pair targets below are kept only as +an optional secondary observation, not the primary measurement. + +The original contrast pair compared two "identical content at multiple live URLs" +situations that differ primarily in whether canonical is declared. | | Content | Live URLs | Canonical state | | ----------------- | --------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------ | @@ -139,38 +159,148 @@ observations. Doable by hand in one sitting. In every row, the readability inversion still proceeds (see Key reframe). The rubric only sets the consolidation investment. +**Read the rubric column-by-column.** Every route lean — every row — still does +canonical signaling and reduces duplicate URL surface. No outcome says "don't add +canonical." A test whose every branch leads to the same next action has near-zero +decision value for that action. That is the structural reason the canonical +cleanup is not gated on the test. + +## Field evidence (2026) + +The "do LLM retrievers honor `rel=canonical`" question is less unmeasured than the +original framing assumed. The major AI retrievers do not form independent opinions +about canonical — they inherit the canonical handling of the search indexes they +ride on, so the effect happens at the **index/dedup layer, upstream of the LLM**: + +- **ChatGPT search** runs largely on Bing's index (reported \~92% of grounded + queries). Bing treats canonical as a consolidation signal, so ChatGPT tends to + cite whatever URL Bing already canonicalized — often before the model layer + chooses. +- **Google AI Overviews and Gemini grounding** run on Google's index. Google + honors canonical as a *hint* and can override the declared canonical with its + own choice; that override cascades into what the AI surface can cite. +- **Perplexity and Claude** (Brave / independent crawl) are the weak-dedup cases — + more likely to index and cite multiple variants. Between-retriever divergence is + large: published audits report only \~11% domain overlap between ChatGPT and + Perplexity citations on identical queries. + +Two practical consequences: + +1. **`noindex` on true secondary duplicates is the more reliable lever than + canonical alone.** Google can ignore a canonical hint; it cannot ignore + `noindex`. The cleanup should be "canonical + selectively `noindex` secondary + copies," not canonical only. This is adopted from known practice — it does not + require our own study. **Caveat:** `noindex` is safe only on *pure* duplicates. + A page that mixes shared reference with per-product usage is not a duplicate; + `noindex`ing it would suppress distinct content. See "Reference vs usage." +2. **The high between-retriever variance means a hand-run N=3 audit cannot + reliably separate "canonical consolidates" from noise.** Citation selection is + stochastic; the variance swamps \~100 observations. Reliable per-retriever + intel needs the `docs-tooling` measurement pipeline (out of docs-v2 scope per + the epic), not a one-sitting hand-run. + +Sources: Passionfruit (canonical tags and AI citations); Glenn Gabe / GSQI +(canonical-as-hint cascade to ChatGPT; AI search and syndicated content); +ai-visibility.org.uk (how AI search works); Topic Intelligence (per-engine source +selection). + ## Test results and decision -> Pending. Fill the results table from the validation test's coding scheme, then state the -> chosen route and a one-paragraph rationale keyed to the matching rubric row. -> Filling this section closes #7233 and unblocks #7232. +**Decision (2026-06-08): Route 1 — promote #7245 canonical cleanup to do-now, +paired with Route 2 conventional split and the readability inversion. The +section-3 measurement is repurposed as non-blocking before/after validation.** + +This is the rubric's first row ("canonical does real work → promote #7245 + +conventional split. Cheap, high-leverage"), reached on field evidence rather than +a hand-run audit, for three reasons: + +1. **No rubric outcome stops the canonical work.** Every route lean in the table + still adds canonical signaling and reduces duplicate URL surface. A gating test + whose every branch leads to the same action has near-zero decision value. The + action is decided by the structure of the rubric itself. + +2. **Field evidence already points to consolidation working at the index layer.** + The two highest-traffic AI surfaces — ChatGPT (via Bing) and AI Overviews / + Gemini (via Google) — inherit canonical/dedup decisions from indexes that honor + the signal (Google as a hint it may override; Bing more directly). Canonical + cleanup helps these surfaces and classic SEO at once, with no downside, and the + repo already has the `canonical:` machinery (296 files). It is a no-regret move; + gating it was the real cost. + +3. **The hand-run audit could not have settled it anyway.** N=3 across five + retrievers with \~11% cross-retriever citation overlap is underpowered for a + stochastic outcome; variance swamps the signal. And the contrast pair was + confounded (release-notes-×5 vs admin-prose-×3). The reliable version of that + measurement belongs in the `docs-tooling` pipeline, out of docs-v2 scope. + +**Implementation note carried into #7245:** classify each page first (see +"Reference vs usage") — pure shared reference gets consolidated; per-product +usage stays distinct and indexed. For pure duplicates, consolidate with +**canonical + selective `noindex`**, not canonical alone, since Google can ignore +a canonical hint but not `noindex`. Pair the canonical reference page with +bidirectional links to each product's usage guides. The readability inversion +proceeds regardless, per the Key reframe. + +This decision closes #7233 and unblocks #7232. The before/after validation +(section 3, reframed) runs during the pilot conversion (#7298) to confirm the +cleanup consolidated citations; it does not block the migration. + +## Reference vs usage — what gets consolidated + +Canonical consolidation applies to **shared reference**, not to **per-product +usage**. These are different content types and the cleanup must not collapse them. + +| Content type | Example | Across products | Canonical / index treatment | +| --------------------- | --------------------------------------------------------------------- | ----------------- | ---------------------------------------------------------------------------------------------- | +| **Shared reference** | v3 Python client release notes; client API surface | byte-identical | One product owns canonical. Secondary copies `canonical:` → owner; `noindex` if pure duplicate | +| **Per-product usage** | how the client is used in Serverless vs Core; setup, examples, guides | genuinely differs | Each page self-canonical, stays indexed, discoverable on its own. Never `noindex` | + +The v3 Python client control case shows the split cleanly: the **release notes** +are the same across all v3 products and versions, so one product (Core) owns that +canonical. But *how the client is used* differs by product, deployment, and +version — that usage is distinct content that each product keeps. + +**Linking pattern (bidirectional hub-and-spoke):** + +- The canonical reference page links out to each product's usage guides + ("Using the client in Core / Serverless / Dedicated ..."). +- Each per-product usage guide links back to the canonical reference for the + shared parts (release notes, full API). + +This is the natural output of Route 2: the split *is* the reference/usage +boundary. The classification pass — deciding, per page, whether a body is pure +reference (dedup) or carries per-product usage (keep distinct) — is the first +step of the #7245 cleanup, ahead of any `noindex`. ## Work chunks -Small, sequenced. Only chunk 1 is this session. +Small, sequenced. 1. **This design doc** — protocol + rubric. The #7233 artifact. *(done)* -2. **Execute and record** — run the section 3 queries manually; paste results - into section 5. *(manual; you)* -3. **Decision record** — pick the route from the rubric, write the rationale - under "Test results and decision," close #7233, unblock #7232. +2. **Decision record** — route picked on field evidence and rubric structure; + rationale under "Test results and decision"; closes #7233, unblocks #7232. + *(done)* -Opened only **after** the decision: +Now unblocked (the canonical cleanup is decoupled from any test gate): -4. **#7245 canonical audit** — promoted to do-now if the rubric says canonical - helps; deferred or reshaped otherwise. -5. **Inverted-transclusion mechanism spike** ([#7297](https://github.com/influxdata/docs-v2/issues/7297)) — +3. **#7245 canonical cleanup** — promoted to do-now. Add `canonical:` everywhere + it is missing on duplicate engine-concept pages, and add `noindex` to true + secondary duplicates. No-regret; machinery exists. +4. **Inverted-transclusion mechanism spike** ([#7297](https://github.com/influxdata/docs-v2/issues/7297)) — the Hugo question: make a real published page the authoritative source instead of a `/shared/` stub; define how consumers include it; decide whether sub-page fragment includes are needed. -6. **Pilot conversion** ([#7298](https://github.com/influxdata/docs-v2/issues/7298)) — - top `show-in`/`hide-in` pages, using the chosen route. +5. **Pilot conversion** ([#7298](https://github.com/influxdata/docs-v2/issues/7298)) — + top `show-in`/`hide-in` pages, using the chosen route. Run the reframed + before/after validation (section 3) on these pilot pages — non-blocking. ## Explicitly out of scope -- The route decision itself (deferred to "Test results and decision", post-data). - Migration guides per competitor (separate content workstream). - The Phase 1 IA skeleton (#7232 predecessor) and Phase 3 editorial discipline (#7234 successor). - The prompt-audit data pipeline and measurement tooling (`influxdata/docs-tooling`). +- Reliable per-retriever behavioral intel (how ChatGPT vs Perplexity vs Gemini + pick sources). Strategically valuable for the parent epic (#7230) but needs the + `docs-tooling` pipeline with adequate sample size — not a one-sitting hand-run. From e349b317a676ee5cfec66453e4c0f511f117ea1b Mon Sep 17 00:00:00 2001 From: Jason Stirnaman Date: Mon, 8 Jun 2026 12:57:51 -0500 Subject: [PATCH 4/4] docs(platform): scope urgent canonical fixes, defer cross-edition reference Scope the #7245 urgent pass to Core<->Enterprise shared content (canonical to Enterprise as the most complete edition). Defer canonical ownership for reference shared across all v3 editions (client libraries, etc.) to broader cross-edition IA work; keep current canonical in the meantime. --- ...-ia-sharing-canonical-validation-design.md | 40 ++++++++++++++++--- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md b/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md index f200874615..c7116f6fe6 100644 --- a/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md +++ b/docs/exec-plans/active/2026-06-03-ia-sharing-canonical-validation-design.md @@ -235,7 +235,10 @@ a hand-run audit, for three reasons: **Implementation note carried into #7245:** classify each page first (see "Reference vs usage") — pure shared reference gets consolidated; per-product -usage stays distinct and indexed. For pure duplicates, consolidate with +usage stays distinct and indexed. **The urgent pass covers only Core↔Enterprise +shared content (canonical → Enterprise);** reference shared across all v3 editions +(client libraries, etc.) keeps its current canonical and is deferred to the +broader cross-edition IA work. For pure duplicates in scope, consolidate with **canonical + selective `noindex`**, not canonical alone, since Google can ignore a canonical hint but not `noindex`. Pair the canonical reference page with bidirectional links to each product's usage guides. The readability inversion @@ -256,10 +259,34 @@ usage**. These are different content types and the cleanup must not collapse the | **Per-product usage** | how the client is used in Serverless vs Core; setup, examples, guides | genuinely differs | Each page self-canonical, stays indexed, discoverable on its own. Never `noindex` | The v3 Python client control case shows the split cleanly: the **release notes** -are the same across all v3 products and versions, so one product (Core) owns that +are the same across all v3 products and versions, so one product owns that canonical. But *how the client is used* differs by product, deployment, and version — that usage is distinct content that each product keeps. +### Canonical owner by sharing scope (urgent vs deferred) + +*Which* product owns the canonical depends on the sharing scope, and the two +scopes have different urgency. + +| Sharing scope | Example | Canonical owner | When | +| ---------------------- | ------------------------------------------------------- | --------------------------------------------------------------- | -------------------------------------------------------------------- | +| Core ↔ Enterprise only | engine internals (storage, compaction, indexing) | **Enterprise** (most complete edition; strict superset of Core) | **Urgent — this pass (#7245).** Fill missing canonicals now | +| All v3 editions | client libraries, line protocol, SQL/InfluxQL reference | **Deferred** — keep current canonical | **Broader cross-edition IA work, not this pass.** Do not re-home now | +| Single edition | Core install/quickstart; Enterprise HA/clustering | **self** | already correct by default | + +**Urgent now:** add the missing `canonical:` (→ Enterprise) on Core↔Enterprise +shared content — the original #7245 gap. This is unambiguous on present facts: +Enterprise is the most complete edition and a strict superset of Core, so it owns +the shared engine reference. + +**Deferred:** canonical ownership for reference shared across *all* v3 editions +(client libraries, etc.) is **not settled in this pass.** Those pages keep their +current canonical. Resolving them — including whether to unify all shared +reference under one owner — is folded into the broader cross-edition IA effort, +where the relationship between editions is being reworked. Canonical re-pointing +is a cheap, reversible frontmatter change, so deferring costs little. Leaving the +current state in place is the conservative choice until that effort lands. + **Linking pattern (bidirectional hub-and-spoke):** - The canonical reference page links out to each product's usage guides @@ -283,9 +310,12 @@ Small, sequenced. Now unblocked (the canonical cleanup is decoupled from any test gate): -3. **#7245 canonical cleanup** — promoted to do-now. Add `canonical:` everywhere - it is missing on duplicate engine-concept pages, and add `noindex` to true - secondary duplicates. No-regret; machinery exists. +3. **#7245 canonical cleanup (urgent scope)** — promoted to do-now, scoped to + **Core↔Enterprise shared content**. Add `canonical:` (→ Enterprise) wherever it + is missing on shared engine-concept pages, and add `noindex` to true secondary + duplicates. No-regret; machinery exists. Reference shared across all v3 editions + (client libraries, etc.) is **out of this pass** — keep current canonical. + Deferred to the broader cross-edition IA work. 4. **Inverted-transclusion mechanism spike** ([#7297](https://github.com/influxdata/docs-v2/issues/7297)) — the Hugo question: make a real published page the authoritative source instead of a `/shared/` stub; define how consumers include it; decide whether