From a40dc536140661220d2befc2a39fe4c446105897 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 6 May 2026 12:49:01 -0700 Subject: [PATCH 1/6] improvement(seo): restore explicit AI/search bot allow-list and add link-preview rules --- apps/sim/app/robots.ts | 129 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 113 insertions(+), 16 deletions(-) diff --git a/apps/sim/app/robots.ts b/apps/sim/app/robots.ts index ce531db019b..bf492f55648 100644 --- a/apps/sim/app/robots.ts +++ b/apps/sim/app/robots.ts @@ -1,29 +1,126 @@ import type { MetadataRoute } from 'next' import { getBaseUrl } from '@/lib/core/utils/urls' +/** + * Default disallow list applied to crawlers and the wildcard rule. Blocks + * authenticated surfaces, internal endpoints, and one-time-use links. + */ +const DISALLOWED_PATHS = [ + '/api/', + '/workspace/', + '/chat/', + '/playground/', + '/resume/', + '/invite/', + '/unsubscribe/', + '/w/', + '/form/', + '/credential-account/', + '/_next/', + '/private/', + '/blog*tag=', +] + +/** + * Tighter disallow list for link-preview bots. They fetch single URLs to + * render Open Graph cards rather than crawl, so publicly-shareable surfaces + * like /chat/, /form/, and /w/ must be reachable for previews to render. + */ +const LINK_PREVIEW_DISALLOWED_PATHS = [ + '/api/', + '/workspace/', + '/resume/', + '/invite/', + '/unsubscribe/', + '/credential-account/', + '/_next/', + '/private/', +] + +/** + * Search engines and AI/answer-engine crawlers explicitly allow-listed for + * SEO/AEO/GEO. Explicit Allow rules ensure these bots are not accidentally + * suppressed by downstream filters and signal intent to operators that + * publish allow-list audits (Profound, Scrunch, Otterly, etc.). + */ +const SEARCH_AND_AI_BOTS = [ + // Western search engines + 'Googlebot', + 'Bingbot', + 'DuckDuckBot', + 'Kagibot', + 'Bravebot', + // Regional search engines + 'YandexBot', + 'Baiduspider', + 'Sogou web spider', + 'Yeti', + 'SeznamBot', + 'PetalBot', + // OpenAI + 'GPTBot', + 'OAI-SearchBot', + 'ChatGPT-User', + // Anthropic + 'ClaudeBot', + 'Claude-SearchBot', + 'Claude-User', + // Google AI + 'Google-Extended', + // Perplexity + 'PerplexityBot', + 'Perplexity-User', + // Apple + 'Applebot', + 'Applebot-Extended', + // Meta + 'Meta-ExternalAgent', + 'Meta-ExternalFetcher', + 'FacebookBot', + // Other major AI / answer engines + 'Amazonbot', + 'CCBot', + 'cohere-ai', + 'cohere-training-data-crawler', + 'Grok-web-crawl', + 'MistralAI-User', + 'DeepSeek-AI', + 'YouBot', + 'Diffbot', + 'AI2Bot', + 'Timpibot', + 'ImageSiftBot', +] + +/** + * Social and messaging platforms that fetch URLs to render link previews + * (Open Graph / Twitter Card images). These need access to publicly-shared + * surfaces like /chat/ and /form/ that are otherwise blocked. + */ +const LINK_PREVIEW_BOTS = [ + 'Twitterbot', + 'LinkedInBot', + 'Slackbot', + 'Slack-ImgProxy', + 'Discordbot', + 'TelegramBot', + 'WhatsApp', + 'facebookexternalhit', + 'Pinterestbot', + 'redditbot', +] + export default function robots(): MetadataRoute.Robots { const baseUrl = getBaseUrl() return { rules: [ + { userAgent: '*', allow: '/', disallow: DISALLOWED_PATHS }, + { userAgent: SEARCH_AND_AI_BOTS, allow: '/', disallow: DISALLOWED_PATHS }, { - userAgent: '*', + userAgent: LINK_PREVIEW_BOTS, allow: '/', - disallow: [ - '/api/', - '/workspace/', - '/chat/', - '/playground/', - '/resume/', - '/invite/', - '/unsubscribe/', - '/w/', - '/form/', - '/credential-account/', - '/_next/', - '/private/', - '/blog*tag=', - ], + disallow: LINK_PREVIEW_DISALLOWED_PATHS, }, ], sitemap: [`${baseUrl}/sitemap.xml`, `${baseUrl}/blog/sitemap-images.xml`], From 2c78835bf5c7c359b0a948ce9197ce4321787591 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 6 May 2026 13:05:29 -0700 Subject: [PATCH 2/6] fix(seo): correct xAI UA strings, drop Bravebot, block /playground/ and /w/ from link-preview bots --- apps/sim/app/robots.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/apps/sim/app/robots.ts b/apps/sim/app/robots.ts index bf492f55648..e0f39529a96 100644 --- a/apps/sim/app/robots.ts +++ b/apps/sim/app/robots.ts @@ -24,11 +24,14 @@ const DISALLOWED_PATHS = [ /** * Tighter disallow list for link-preview bots. They fetch single URLs to * render Open Graph cards rather than crawl, so publicly-shareable surfaces - * like /chat/, /form/, and /w/ must be reachable for previews to render. + * like /chat/ and /form/ must be reachable for previews to render. Other + * authenticated routes (/workspace/, /w/, /playground/) stay blocked. */ const LINK_PREVIEW_DISALLOWED_PATHS = [ '/api/', '/workspace/', + '/w/', + '/playground/', '/resume/', '/invite/', '/unsubscribe/', @@ -49,7 +52,6 @@ const SEARCH_AND_AI_BOTS = [ 'Bingbot', 'DuckDuckBot', 'Kagibot', - 'Bravebot', // Regional search engines 'YandexBot', 'Baiduspider', @@ -82,7 +84,9 @@ const SEARCH_AND_AI_BOTS = [ 'CCBot', 'cohere-ai', 'cohere-training-data-crawler', - 'Grok-web-crawl', + 'GrokBot', + 'xAI-Grok', + 'Grok-DeepSearch', 'MistralAI-User', 'DeepSeek-AI', 'YouBot', From ec7e5a7e398e9a6f051f210cf1df63ca2843675e Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 6 May 2026 13:08:47 -0700 Subject: [PATCH 3/6] fix(seo): drop unverified Grok UAs, correct DeepSeekBot and ImagesiftBot tokens --- apps/sim/app/robots.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/apps/sim/app/robots.ts b/apps/sim/app/robots.ts index e0f39529a96..63ad16bff68 100644 --- a/apps/sim/app/robots.ts +++ b/apps/sim/app/robots.ts @@ -84,16 +84,13 @@ const SEARCH_AND_AI_BOTS = [ 'CCBot', 'cohere-ai', 'cohere-training-data-crawler', - 'GrokBot', - 'xAI-Grok', - 'Grok-DeepSearch', 'MistralAI-User', - 'DeepSeek-AI', + 'DeepSeekBot', 'YouBot', 'Diffbot', 'AI2Bot', 'Timpibot', - 'ImageSiftBot', + 'ImagesiftBot', ] /** From 8a615f3cd44566a135f6613f6a20f9dd95ed8559 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 6 May 2026 13:12:12 -0700 Subject: [PATCH 4/6] fix(seo): re-add Bravebot to allow-list per Brave Search docs --- apps/sim/app/robots.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/sim/app/robots.ts b/apps/sim/app/robots.ts index 63ad16bff68..4daee694702 100644 --- a/apps/sim/app/robots.ts +++ b/apps/sim/app/robots.ts @@ -52,6 +52,7 @@ const SEARCH_AND_AI_BOTS = [ 'Bingbot', 'DuckDuckBot', 'Kagibot', + 'Bravebot', // Regional search engines 'YandexBot', 'Baiduspider', From 55104852290d728126d3b0d73b78945f1dbd291d Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 6 May 2026 14:14:48 -0700 Subject: [PATCH 5/6] improvement(seo): drop redundant named AI/search bot allow-list --- apps/sim/app/robots.ts | 55 ------------------------------------------ 1 file changed, 55 deletions(-) diff --git a/apps/sim/app/robots.ts b/apps/sim/app/robots.ts index 4daee694702..e7cf92c87e0 100644 --- a/apps/sim/app/robots.ts +++ b/apps/sim/app/robots.ts @@ -40,60 +40,6 @@ const LINK_PREVIEW_DISALLOWED_PATHS = [ '/private/', ] -/** - * Search engines and AI/answer-engine crawlers explicitly allow-listed for - * SEO/AEO/GEO. Explicit Allow rules ensure these bots are not accidentally - * suppressed by downstream filters and signal intent to operators that - * publish allow-list audits (Profound, Scrunch, Otterly, etc.). - */ -const SEARCH_AND_AI_BOTS = [ - // Western search engines - 'Googlebot', - 'Bingbot', - 'DuckDuckBot', - 'Kagibot', - 'Bravebot', - // Regional search engines - 'YandexBot', - 'Baiduspider', - 'Sogou web spider', - 'Yeti', - 'SeznamBot', - 'PetalBot', - // OpenAI - 'GPTBot', - 'OAI-SearchBot', - 'ChatGPT-User', - // Anthropic - 'ClaudeBot', - 'Claude-SearchBot', - 'Claude-User', - // Google AI - 'Google-Extended', - // Perplexity - 'PerplexityBot', - 'Perplexity-User', - // Apple - 'Applebot', - 'Applebot-Extended', - // Meta - 'Meta-ExternalAgent', - 'Meta-ExternalFetcher', - 'FacebookBot', - // Other major AI / answer engines - 'Amazonbot', - 'CCBot', - 'cohere-ai', - 'cohere-training-data-crawler', - 'MistralAI-User', - 'DeepSeekBot', - 'YouBot', - 'Diffbot', - 'AI2Bot', - 'Timpibot', - 'ImagesiftBot', -] - /** * Social and messaging platforms that fetch URLs to render link previews * (Open Graph / Twitter Card images). These need access to publicly-shared @@ -118,7 +64,6 @@ export default function robots(): MetadataRoute.Robots { return { rules: [ { userAgent: '*', allow: '/', disallow: DISALLOWED_PATHS }, - { userAgent: SEARCH_AND_AI_BOTS, allow: '/', disallow: DISALLOWED_PATHS }, { userAgent: LINK_PREVIEW_BOTS, allow: '/', From b9a1f584f32eda137ca7dd9cdb6806f7a60be7ed Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 6 May 2026 14:16:05 -0700 Subject: [PATCH 6/6] chore(seo): trim verbose comments in robots.ts --- apps/sim/app/robots.ts | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/apps/sim/app/robots.ts b/apps/sim/app/robots.ts index e7cf92c87e0..3adce4103f3 100644 --- a/apps/sim/app/robots.ts +++ b/apps/sim/app/robots.ts @@ -1,10 +1,6 @@ import type { MetadataRoute } from 'next' import { getBaseUrl } from '@/lib/core/utils/urls' -/** - * Default disallow list applied to crawlers and the wildcard rule. Blocks - * authenticated surfaces, internal endpoints, and one-time-use links. - */ const DISALLOWED_PATHS = [ '/api/', '/workspace/', @@ -21,12 +17,7 @@ const DISALLOWED_PATHS = [ '/blog*tag=', ] -/** - * Tighter disallow list for link-preview bots. They fetch single URLs to - * render Open Graph cards rather than crawl, so publicly-shareable surfaces - * like /chat/ and /form/ must be reachable for previews to render. Other - * authenticated routes (/workspace/, /w/, /playground/) stay blocked. - */ +/** Looser disallow than the wildcard so OG previews can fetch /chat/ and /form/. */ const LINK_PREVIEW_DISALLOWED_PATHS = [ '/api/', '/workspace/', @@ -40,11 +31,6 @@ const LINK_PREVIEW_DISALLOWED_PATHS = [ '/private/', ] -/** - * Social and messaging platforms that fetch URLs to render link previews - * (Open Graph / Twitter Card images). These need access to publicly-shared - * surfaces like /chat/ and /form/ that are otherwise blocked. - */ const LINK_PREVIEW_BOTS = [ 'Twitterbot', 'LinkedInBot',