From 6eb8787b19c2b82c0ba52083d248753dba27c048 Mon Sep 17 00:00:00 2001 From: idoshamun Date: Sun, 24 May 2026 13:00:21 +0000 Subject: [PATCH 1/3] feat(vordr): auto-flag low-effort top-level comments Adds a pure `isLowEffortComment` filter that runs synchronously on every new top-level comment. When it matches, the comment's vordr flag is set inline through the existing flags mechanism and a counter/log line is emitted so we can monitor the auto-flag rate. Replies and templated squad welcomes are exempt. Co-Authored-By: Claude Opus 4.7 (1M context) --- __tests__/common/lowEffortComment.test.ts | 64 +++++ src/common/lowEffortComment.ts | 309 ++++++++++++++++++++++ src/schema/comments.ts | 37 ++- 3 files changed, 402 insertions(+), 8 deletions(-) create mode 100644 __tests__/common/lowEffortComment.test.ts create mode 100644 src/common/lowEffortComment.ts diff --git a/__tests__/common/lowEffortComment.test.ts b/__tests__/common/lowEffortComment.test.ts new file mode 100644 index 0000000000..8a95ddb0c0 --- /dev/null +++ b/__tests__/common/lowEffortComment.test.ts @@ -0,0 +1,64 @@ +import { isLowEffortComment } from '../../src/common/lowEffortComment'; + +describe('isLowEffortComment', () => { + describe('positive cases (low-effort)', () => { + it.each([ + ['nice'], + ['Great!'], + ['Thanks for sharing!'], + ["Let's go!"], + ['sooo cool'], + ['👍👍👍'], + ['Thanks 👍'], + ['wow trop coool'], + ['gracias'], + ['Helpful post.'], + ['this is good'], + ['OK'], + ['thx'], + ])('flags %p as low-effort', (content) => { + expect(isLowEffortComment(content)).toBe(true); + }); + }); + + describe('negative cases (substantive)', () => { + it.each([ + ['@alice welcome to The Awesome Squad!'], + ['is this secure?'], + ['free version?'], + ['Concerning'], + ['Too verbose'], + ['Embrace Enshittification!'], + ['Option D'], + ['Answer: C (NoSQL Database)'], + ['Awesome, a PR from Taylor!'], + ])('does not flag %p', (content) => { + expect(isLowEffortComment(content)).toBe(false); + }); + }); + + describe('edge cases', () => { + it('treats empty string as low-effort', () => { + expect(isLowEffortComment('')).toBe(true); + }); + + it('handles mixed-case the same as lowercase', () => { + expect(isLowEffortComment('GREAT!')).toBe(true); + expect(isLowEffortComment('NiCe')).toBe(true); + }); + + it('ignores leading/trailing whitespace', () => { + expect(isLowEffortComment(' nice ')).toBe(true); + expect(isLowEffortComment('\t\nthanks\n')).toBe(true); + }); + + it('does not flag a long substantive comment', () => { + const longComment = + 'This article walks through the tradeoffs between event sourcing and ' + + 'CRUD persistence really clearly, and I appreciated the concrete ' + + 'benchmarks they ran on Postgres versus Kafka — definitely worth a ' + + 'second read before our next architecture review.'; + expect(isLowEffortComment(longComment)).toBe(false); + }); + }); +}); diff --git a/src/common/lowEffortComment.ts b/src/common/lowEffortComment.ts new file mode 100644 index 0000000000..f863d76f43 --- /dev/null +++ b/src/common/lowEffortComment.ts @@ -0,0 +1,309 @@ +const LOW_VOCAB = new Set([ + '', + 'a', + 'an', + 'the', + 'is', + 'it', + 'this', + 'that', + 'these', + 'those', + 'i', + 'you', + 'me', + 'my', + 'your', + 'our', + 'we', + 'us', + 'of', + 'to', + 'for', + 'and', + 'or', + 'but', + 'so', + 'very', + 'really', + 'super', + 'too', + 'quite', + 'pretty', + 'much', + 'all', + 'one', + 'ones', + 'some', + 'any', + 'no', + 'not', + 'its', + 'am', + 'are', + 'was', + 'were', + 'be', + 'been', + 'will', + 'just', + 'still', + 'with', + 'as', + 'at', + 'on', + 'in', + 'by', + 'from', + 'definitely', + 'absolutely', + 'totally', + 'completely', + 'darn', + 'let', + 'lot', + 'another', + 'being', + 'thanks', + 'thank', + 'thx', + 'ty', + 'tysm', + 'cheers', + 'kudos', + 'respect', + 'props', + 'salute', + 'appreciate', + 'appreciated', + 'appreciation', + 'gracias', + 'por', + 'la', + 'informacion', + 'información', + 'merci', + 'danke', + 'obrigado', + 'grazie', + 'excelente', + 'excellente', + 'genial', + 'bravo', + 'arigato', + 'spasibo', + 'dziekuje', + 'dziękuję', + 'trop', + 'nice', + 'great', + 'good', + 'cool', + 'awesome', + 'amazing', + 'wonderful', + 'beautiful', + 'brilliant', + 'excellent', + 'fantastic', + 'impressive', + 'inspiring', + 'insightful', + 'informative', + 'helpful', + 'useful', + 'interesting', + 'lovely', + 'perfect', + 'epic', + 'solid', + 'noice', + 'dope', + 'sweet', + 'fire', + 'lit', + 'based', + 'huge', + 'massive', + 'wise', + 'well', + 'deserved', + 'hats', + 'off', + 'awsome', + 'aweome', + 'amazng', + 'greatt', + 'goood', + 'niceee', + 'cooll', + 'colll', + 'coll', + 'coool', + 'superb', + 'inshight', + 'impresive', + 'interensting', + 'damnn', + 'wowww', + 'soo', + 'noo', + 'boo', + 'goo', + 'woo', + 'yoo', + 'loo', + 'loveet', + 'love', + 'loved', + 'loves', + 'liked', + 'like', + 'post', + 'article', + 'read', + 'reading', + 'work', + 'find', + 'share', + 'sharing', + 'info', + 'tip', + 'tips', + 'update', + 'news', + 'idea', + 'ideas', + 'point', + 'catch', + 'tool', + 'feature', + 'stuff', + 'job', + 'observation', + 'insight', + 'insights', + 'heads', + 'up', + 'image', + 'content', + 'blog', + 'story', + 'recap', + 'digest', + 'thinking', + 'decision', + 'wow', + 'omg', + 'damn', + 'damnn', + 'ouch', + 'rip', + 'lol', + 'lolol', + 'haha', + 'hahaha', + 'hehe', + 'yay', + 'yes', + 'yeah', + 'yep', + 'yup', + 'yepp', + 'nope', + 'ok', + 'okay', + 'hi', + 'hello', + 'hey', + 'sad', + 'crying', + 'wtf', + 'sus', + 'meh', + 'bruh', + 'agree', + 'agreed', + 'same', + 'true', + 'truth', + 'facts', + 'fact', + 'preach', + 'real', + 'accurate', + 'correct', + 'finally', + 'first', + 'congrats', + 'congratulations', + 'welcome', + 'farewell', + 'lets', + 'go', + 'goo', + 'lessgo', + 'lessgoo', + 'lfg', + 'lesgo', + 'lesgoo', + 'said', + 'put', + 'done', + 'deserved', + 'banger', + 'game', + 'changer', + 'changing', + 'underrated', + 'overrated', + 'looks', + 'sounds', + 'seems', + 'feels', + 'feel', + 'looking', + 'sounding', + 'building', + 'build', + 'built', + 'bro', + 'man', + 'dude', + 'guys', + 'mate', + 'sir', + 'folks', + 'team', + 'w', + 'l', + 'ws', + 'ls', + 'big', + 'small', +]); + +const APOSTROPHE_FRAGMENTS = /\b(?:s|t|d|m|ll|ve|re)\b/g; + +const normalize = (content: string): { norm: string; words: string[] } => { + let c = content.toLowerCase(); + c = c.replace(/<[^>]+>/g, ' '); + c = c.replace(/https?:\/\/\S+/g, ' '); + c = c.replace(/@[A-Za-z0-9_-]+/g, ' '); + c = c.replace(/['`’]/g, ' '); + c = c.replace(/[^a-z0-9 ]+/g, ' '); + c = c.replace(/([a-z])\1{2,}/g, '$1$1'); + c = c.replace(APOSTROPHE_FRAGMENTS, ' '); + c = c.replace(/\s+/g, ' ').trim(); + return { norm: c, words: c.length ? c.split(' ') : [] }; +}; + +export const isLowEffortComment = (content: string): boolean => { + if (/^\s*@[A-Za-z0-9_-]+\s+welcome\s+to\s+.+$/i.test(content)) return false; + + const { norm, words } = normalize(content); + if (norm.length === 0) return true; + if (norm.length <= 4) return true; + if (words.length <= 6 && words.every((w) => LOW_VOCAB.has(w))) return true; + return false; +}; diff --git a/src/schema/comments.ts b/src/schema/comments.ts index 230e77cdcd..8bf5d6fe06 100644 --- a/src/schema/comments.ts +++ b/src/schema/comments.ts @@ -49,6 +49,9 @@ import { VordrFilterType, whereVordrFilter, } from '../common/vordr'; +import { isLowEffortComment } from '../common/lowEffortComment'; +import { logger } from '../logger'; +import { counters } from '../telemetry'; import { reportComment } from '../common/reporting'; import { ReportReason } from '../entity/common'; import { toGQLEnum } from '../common/utils'; @@ -1078,16 +1081,34 @@ export const resolvers: IResolvers = { content, }); - createdComment.flags = { - ...createdComment.flags, - vordr: await checkWithVordr( + const lowEffort = isLowEffortComment(createdComment.content); + if (lowEffort) { + logger.info( { - id: createdComment.id, - type: VordrFilterType.Comment, - content: createdComment.content, + userId: ctx.userId, + commentId: createdComment.id, + postId, }, - ctx, - ), + 'comment_low_effort_autoflagged', + ); + counters?.api?.vordr?.add(1, { + reason: 'low_effort', + type: VordrFilterType.Comment, + }); + } + + createdComment.flags = { + ...createdComment.flags, + vordr: + lowEffort || + (await checkWithVordr( + { + id: createdComment.id, + type: VordrFilterType.Comment, + content: createdComment.content, + }, + ctx, + )), }; return saveNewComment(entityManager, createdComment, squadId); From 17ecd0250a7af234a76dad23a81742646376935d Mon Sep 17 00:00:00 2001 From: idoshamun Date: Sun, 24 May 2026 13:04:35 +0000 Subject: [PATCH 2/3] refactor(vordr): simplify low-effort comment check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop dead empty-string entry and duplicate vocab tokens (`deserved`, `damnn`, `goo`) — Set already dedups but the source list shouldn't. - Inline normalize() as a fluent chain; collapse the two length checks into one (0 < 4) and drop the unused `norm`/`words` dual return. - Tighten the resolver wire-in by reusing `commentId` / `content` instead of re-reading them from `createdComment`. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/common/lowEffortComment.ts | 37 ++++++++++++++-------------------- src/schema/comments.ts | 14 +++---------- 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/src/common/lowEffortComment.ts b/src/common/lowEffortComment.ts index f863d76f43..41703b9847 100644 --- a/src/common/lowEffortComment.ts +++ b/src/common/lowEffortComment.ts @@ -1,5 +1,4 @@ const LOW_VOCAB = new Set([ - '', 'a', 'an', 'the', @@ -196,7 +195,6 @@ const LOW_VOCAB = new Set([ 'wow', 'omg', 'damn', - 'damnn', 'ouch', 'rip', 'lol', @@ -241,7 +239,6 @@ const LOW_VOCAB = new Set([ 'farewell', 'lets', 'go', - 'goo', 'lessgo', 'lessgoo', 'lfg', @@ -250,7 +247,6 @@ const LOW_VOCAB = new Set([ 'said', 'put', 'done', - 'deserved', 'banger', 'game', 'changer', @@ -285,25 +281,22 @@ const LOW_VOCAB = new Set([ const APOSTROPHE_FRAGMENTS = /\b(?:s|t|d|m|ll|ve|re)\b/g; -const normalize = (content: string): { norm: string; words: string[] } => { - let c = content.toLowerCase(); - c = c.replace(/<[^>]+>/g, ' '); - c = c.replace(/https?:\/\/\S+/g, ' '); - c = c.replace(/@[A-Za-z0-9_-]+/g, ' '); - c = c.replace(/['`’]/g, ' '); - c = c.replace(/[^a-z0-9 ]+/g, ' '); - c = c.replace(/([a-z])\1{2,}/g, '$1$1'); - c = c.replace(APOSTROPHE_FRAGMENTS, ' '); - c = c.replace(/\s+/g, ' ').trim(); - return { norm: c, words: c.length ? c.split(' ') : [] }; -}; - export const isLowEffortComment = (content: string): boolean => { if (/^\s*@[A-Za-z0-9_-]+\s+welcome\s+to\s+.+$/i.test(content)) return false; - const { norm, words } = normalize(content); - if (norm.length === 0) return true; - if (norm.length <= 4) return true; - if (words.length <= 6 && words.every((w) => LOW_VOCAB.has(w))) return true; - return false; + const normalized = content + .toLowerCase() + .replace(/<[^>]+>/g, ' ') + .replace(/https?:\/\/\S+/g, ' ') + .replace(/@[A-Za-z0-9_-]+/g, ' ') + .replace(/['`’]/g, ' ') + .replace(/[^a-z0-9 ]+/g, ' ') + .replace(/([a-z])\1{2,}/g, '$1$1') + .replace(APOSTROPHE_FRAGMENTS, ' ') + .replace(/\s+/g, ' ') + .trim(); + + if (normalized.length <= 4) return true; + const words = normalized.split(' '); + return words.length <= 6 && words.every((w) => LOW_VOCAB.has(w)); }; diff --git a/src/schema/comments.ts b/src/schema/comments.ts index 8bf5d6fe06..387dc7a200 100644 --- a/src/schema/comments.ts +++ b/src/schema/comments.ts @@ -1081,14 +1081,10 @@ export const resolvers: IResolvers = { content, }); - const lowEffort = isLowEffortComment(createdComment.content); + const lowEffort = isLowEffortComment(content); if (lowEffort) { logger.info( - { - userId: ctx.userId, - commentId: createdComment.id, - postId, - }, + { userId: ctx.userId, commentId, postId }, 'comment_low_effort_autoflagged', ); counters?.api?.vordr?.add(1, { @@ -1102,11 +1098,7 @@ export const resolvers: IResolvers = { vordr: lowEffort || (await checkWithVordr( - { - id: createdComment.id, - type: VordrFilterType.Comment, - content: createdComment.content, - }, + { id: commentId, type: VordrFilterType.Comment, content }, ctx, )), }; From 295b2d2ad7159f95813f9a274a7cc4e37384d8c9 Mon Sep 17 00:00:00 2001 From: idoshamun <1993245+idoshamun@users.noreply.github.com> Date: Sun, 24 May 2026 13:31:42 +0000 Subject: [PATCH 3/3] fix(vordr): broaden welcome carve-out and exempt GIF/image embeds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two FPs surfaced during data validation: - '@user welcome as well!!!' (and 'welcome aboard', 'welcome back', …) — the templated-welcome filter was anchored to 'welcome to '. Broaden to 'welcome\\b' so any greeting continuation is exempt. - Comments containing a markdown image/GIF embed (e.g. tenor/klipy GIFs, with or without surrounding text) were being flagged because after URL strip only 'GIF' or trailing junk remained. Add an IMAGE_EMBED guard that exempts any comment containing '![alt](url)' (also tolerates the angle-bracketed URL form '![alt]()'). Adds Jest cases for both FP classes plus a few collapse-variant positives (Damnnnnnn, Greatt!!!). --- __tests__/common/lowEffortComment.test.ts | 20 ++++++++++++++++++++ src/common/lowEffortComment.ts | 9 ++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/__tests__/common/lowEffortComment.test.ts b/__tests__/common/lowEffortComment.test.ts index 8a95ddb0c0..6547601b98 100644 --- a/__tests__/common/lowEffortComment.test.ts +++ b/__tests__/common/lowEffortComment.test.ts @@ -16,6 +16,8 @@ describe('isLowEffortComment', () => { ['this is good'], ['OK'], ['thx'], + ['Damnnnnnn'], + ['Greatt!!!'], ])('flags %p as low-effort', (content) => { expect(isLowEffortComment(content)).toBe(true); }); @@ -23,12 +25,30 @@ describe('isLowEffortComment', () => { describe('negative cases (substantive)', () => { it.each([ + // Templated @user welcome … greetings (any continuation, not just "welcome to") ['@alice welcome to The Awesome Squad!'], + ['@bob welcome aboard! Glad to have you'], + ['@giovannicompitiliceo welcome as well!!!'], + ['@carol welcome back!'], + + // Image / GIF markdown embeds — anywhere in the comment, with or without surrounding text + ['![GIF](https://media.tenor.com/PXOXwsJKbSYAAAAC/where-you.gif)'], + ['![GIF](https://media.tenor.com/PXOXwsJKbSYAAAAC/where-you.gif) ??'], + ['![GIF]()'], + ['check this out ![GIF](https://media.tenor.com/x.gif)'], + + // Real questions ['is this secure?'], ['free version?'], + ['is the github repo down?'], + ['And what is that extension?'], + + // Substantive short comments / opinions / answers ['Concerning'], ['Too verbose'], ['Embrace Enshittification!'], + ['Certified NPM classic.'], + ['Typescript supremacy'], ['Option D'], ['Answer: C (NoSQL Database)'], ['Awesome, a PR from Taylor!'], diff --git a/src/common/lowEffortComment.ts b/src/common/lowEffortComment.ts index 41703b9847..61c0c8a974 100644 --- a/src/common/lowEffortComment.ts +++ b/src/common/lowEffortComment.ts @@ -280,9 +280,16 @@ const LOW_VOCAB = new Set([ ]); const APOSTROPHE_FRAGMENTS = /\b(?:s|t|d|m|ll|ve|re)\b/g; +const TEMPLATED_WELCOME = /^\s*@[A-Za-z0-9_-]+\s+welcome\b/i; +const IMAGE_EMBED = /!\[[^\]]*\]\(?\)/; export const isLowEffortComment = (content: string): boolean => { - if (/^\s*@[A-Za-z0-9_-]+\s+welcome\s+to\s+.+$/i.test(content)) return false; + // Carve-outs — intentional/templated content that should never be flagged: + // * "@user welcome …" greetings ("welcome to ", "welcome aboard", + // "welcome back", "welcome as well", …) + // * any comment containing a markdown image / GIF embed + if (TEMPLATED_WELCOME.test(content)) return false; + if (IMAGE_EMBED.test(content)) return false; const normalized = content .toLowerCase()