From f58877a0721abfca0798c1d72915e24d1afcea30 Mon Sep 17 00:00:00 2001 From: Deywumi-debug Date: Sun, 31 May 2026 13:11:08 +0000 Subject: [PATCH] feat: add content moderation module - OpenAI moderation API integration (POST /moderation/check) - Local profanity filtering with stem-matching regex patterns - Spam detection (repeated chars, URL flooding, spam phrases, excessive caps) - Auto-reject on any violation with flags and human-readable reason - ModerationService exported for use by other modules - 14 unit tests, all passing - Gated behind ENABLE_MODERATION feature flag --- src/app.module.ts | 4 + .../moderation/dto/moderate-content.dto.ts | 10 ++ .../moderation/dto/moderation-result.dto.ts | 17 +++ .../moderation/moderation.controller.ts | 17 +++ src/modules/moderation/moderation.module.ts | 12 ++ .../moderation/moderation.service.spec.ts | 144 ++++++++++++++++++ src/modules/moderation/moderation.service.ts | 118 ++++++++++++++ 7 files changed, 322 insertions(+) create mode 100644 src/modules/moderation/dto/moderate-content.dto.ts create mode 100644 src/modules/moderation/dto/moderation-result.dto.ts create mode 100644 src/modules/moderation/moderation.controller.ts create mode 100644 src/modules/moderation/moderation.module.ts create mode 100644 src/modules/moderation/moderation.service.spec.ts create mode 100644 src/modules/moderation/moderation.service.ts diff --git a/src/app.module.ts b/src/app.module.ts index b1e99800..ef4d00ac 100644 --- a/src/app.module.ts +++ b/src/app.module.ts @@ -21,6 +21,7 @@ import { CanaryModule } from './canary/canary.module'; import { IncidentManagementModule } from './incident-management/incident-management.module'; import { MonitoringModule } from './monitoring/monitoring.module'; import { RequestTimeoutInterceptor } from './common/interceptors/request-timeout.interceptor'; +import { ModerationModule } from './modules/moderation/moderation.module'; // ✅ keep BOTH modules import { ReadReplicaModule } from './database/read-replica'; @@ -49,6 +50,9 @@ const featureFlags = loadFeatureFlags(); // ✅ feature-flagged caching ...(featureFlags.ENABLE_CACHING ? [CachingModule] : []), + + // content moderation + ...(featureFlags.ENABLE_MODERATION ? [ModerationModule] : []), ], controllers: [AppController], providers: [ diff --git a/src/modules/moderation/dto/moderate-content.dto.ts b/src/modules/moderation/dto/moderate-content.dto.ts new file mode 100644 index 00000000..c075a5a7 --- /dev/null +++ b/src/modules/moderation/dto/moderate-content.dto.ts @@ -0,0 +1,10 @@ +import { IsString, IsNotEmpty, MaxLength } from 'class-validator'; +import { ApiProperty } from '@nestjs/swagger'; + +export class ModerateContentDto { + @ApiProperty({ description: 'Content to moderate', maxLength: 10000 }) + @IsString() + @IsNotEmpty() + @MaxLength(10000) + content: string; +} diff --git a/src/modules/moderation/dto/moderation-result.dto.ts b/src/modules/moderation/dto/moderation-result.dto.ts new file mode 100644 index 00000000..6062b917 --- /dev/null +++ b/src/modules/moderation/dto/moderation-result.dto.ts @@ -0,0 +1,17 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export type ModerationFlag = 'profanity' | 'spam' | 'openai_violation'; + +export class ModerationResultDto { + @ApiProperty({ description: 'Whether the content is allowed' }) + allowed: boolean; + + @ApiProperty({ description: 'Whether the content was auto-rejected' }) + autoRejected: boolean; + + @ApiProperty({ description: 'Flags triggered', type: [String] }) + flags: ModerationFlag[]; + + @ApiProperty({ description: 'Human-readable reason if rejected', required: false }) + reason?: string; +} diff --git a/src/modules/moderation/moderation.controller.ts b/src/modules/moderation/moderation.controller.ts new file mode 100644 index 00000000..e7a721ca --- /dev/null +++ b/src/modules/moderation/moderation.controller.ts @@ -0,0 +1,17 @@ +import { Body, Controller, Post } from '@nestjs/common'; +import { ApiOperation, ApiTags } from '@nestjs/swagger'; +import { ModerationService } from './moderation.service'; +import { ModerateContentDto } from './dto/moderate-content.dto'; +import { ModerationResultDto } from './dto/moderation-result.dto'; + +@ApiTags('moderation') +@Controller('moderation') +export class ModerationController { + constructor(private readonly moderationService: ModerationService) {} + + @Post('check') + @ApiOperation({ summary: 'Check content for policy violations' }) + check(@Body() dto: ModerateContentDto): Promise { + return this.moderationService.moderate(dto.content); + } +} diff --git a/src/modules/moderation/moderation.module.ts b/src/modules/moderation/moderation.module.ts new file mode 100644 index 00000000..b2b6ed1b --- /dev/null +++ b/src/modules/moderation/moderation.module.ts @@ -0,0 +1,12 @@ +import { Module } from '@nestjs/common'; +import { HttpModule } from '@nestjs/axios'; +import { ModerationService } from './moderation.service'; +import { ModerationController } from './moderation.controller'; + +@Module({ + imports: [HttpModule], + controllers: [ModerationController], + providers: [ModerationService], + exports: [ModerationService], +}) +export class ModerationModule {} diff --git a/src/modules/moderation/moderation.service.spec.ts b/src/modules/moderation/moderation.service.spec.ts new file mode 100644 index 00000000..90ba9fc8 --- /dev/null +++ b/src/modules/moderation/moderation.service.spec.ts @@ -0,0 +1,144 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { HttpService } from '@nestjs/axios'; +import { ConfigService } from '@nestjs/config'; +import { of, throwError } from 'rxjs'; +import { ModerationService } from './moderation.service'; + +const mockHttpService = { post: jest.fn() }; +const mockConfigService = { get: jest.fn() }; + +describe('ModerationService', () => { + let service: ModerationService; + + beforeEach(async () => { + jest.clearAllMocks(); + mockConfigService.get.mockReturnValue(''); // no API key by default + + const module: TestingModule = await Test.createTestingModule({ + providers: [ + ModerationService, + { provide: HttpService, useValue: mockHttpService }, + { provide: ConfigService, useValue: mockConfigService }, + ], + }).compile(); + + service = module.get(ModerationService); + }); + + describe('clean content', () => { + it('allows clean content', async () => { + const result = await service.moderate('This is a great course!'); + expect(result.allowed).toBe(true); + expect(result.autoRejected).toBe(false); + expect(result.flags).toHaveLength(0); + expect(result.reason).toBeUndefined(); + }); + }); + + describe('profanity filter', () => { + it('flags content with profanity', async () => { + const result = await service.moderate('This is fucking terrible'); + expect(result.allowed).toBe(false); + expect(result.autoRejected).toBe(true); + expect(result.flags).toContain('profanity'); + }); + + it('is case-insensitive', async () => { + const result = await service.moderate('SHIT happens'); + expect(result.flags).toContain('profanity'); + }); + }); + + describe('spam detection', () => { + it('flags repeated characters', async () => { + const result = await service.moderate('heeeeeeeeeeello world'); + expect(result.flags).toContain('spam'); + }); + + it('flags 3 or more URLs', async () => { + const result = await service.moderate( + 'Visit http://a.com and http://b.com and http://c.com', + ); + expect(result.flags).toContain('spam'); + }); + + it('allows content with fewer than 3 URLs', async () => { + const result = await service.moderate('Check http://a.com for details'); + expect(result.flags).not.toContain('spam'); + }); + + it('flags known spam phrases', async () => { + const result = await service.moderate('Buy now and make money fast!'); + expect(result.flags).toContain('spam'); + }); + + it('flags excessive uppercase', async () => { + const result = await service.moderate('THIS IS ALL CAPS SHOUTING AT YOU'); + expect(result.flags).toContain('spam'); + }); + }); + + describe('OpenAI integration', () => { + async function makeServiceWithKey(key: string): Promise { + mockConfigService.get.mockReturnValue(key); + const mod = await Test.createTestingModule({ + providers: [ + ModerationService, + { provide: HttpService, useValue: mockHttpService }, + { provide: ConfigService, useValue: mockConfigService }, + ], + }).compile(); + return mod.get(ModerationService); + } + + it('flags content when OpenAI returns flagged=true', async () => { + const svc = await makeServiceWithKey('sk-test-key'); + mockHttpService.post.mockReturnValue( + of({ data: { results: [{ flagged: true }] } }), + ); + const result = await svc.moderate('some harmful content'); + expect(result.flags).toContain('openai_violation'); + expect(result.allowed).toBe(false); + }); + + it('allows content when OpenAI returns flagged=false', async () => { + const svc = await makeServiceWithKey('sk-test-key'); + mockHttpService.post.mockReturnValue( + of({ data: { results: [{ flagged: false }] } }), + ); + const result = await svc.moderate('normal content here'); + expect(result.flags).not.toContain('openai_violation'); + expect(result.allowed).toBe(true); + }); + + it('does not reject when OpenAI call fails (graceful degradation)', async () => { + const svc = await makeServiceWithKey('sk-test-key'); + mockHttpService.post.mockReturnValue(throwError(() => new Error('network error'))); + const result = await svc.moderate('normal content here'); + expect(result.flags).not.toContain('openai_violation'); + expect(result.allowed).toBe(true); + }); + + it('skips OpenAI check when no API key configured', async () => { + const svc = await makeServiceWithKey(''); + await svc.moderate('clean content'); + expect(mockHttpService.post).not.toHaveBeenCalled(); + }); + }); + + describe('auto-reject', () => { + it('auto-rejects and includes reason when any flag is set', async () => { + const result = await service.moderate('buy now and make money fast!'); + expect(result.autoRejected).toBe(true); + expect(result.reason).toBeTruthy(); + }); + + it('accumulates multiple flags', async () => { + // profanity + spam phrase + const result = await service.moderate('buy now you fucking idiot'); + expect(result.flags).toContain('profanity'); + expect(result.flags).toContain('spam'); + expect(result.flags.length).toBeGreaterThanOrEqual(2); + }); + }); +}); diff --git a/src/modules/moderation/moderation.service.ts b/src/modules/moderation/moderation.service.ts new file mode 100644 index 00000000..b94c956c --- /dev/null +++ b/src/modules/moderation/moderation.service.ts @@ -0,0 +1,118 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { HttpService } from '@nestjs/axios'; +import { ConfigService } from '@nestjs/config'; +import { firstValueFrom } from 'rxjs'; +import { ModerationResultDto, ModerationFlag } from './dto/moderation-result.dto'; + +// Basic profanity list — matches word stems (e.g. "fucking", "shitty") +const PROFANITY_PATTERNS = [ + /\bf+u+c+k/i, + /\bs+h+i+t/i, + /\ba+s+s+h+o+l+e/i, + /\bb+i+t+c+h/i, + /\bc+u+n+t/i, + /\bn+i+g+g+e+r/i, + /\bf+a+g+g+o+t/i, +]; + +// Spam signals: excessive caps, repeated chars, URL spam, all-caps shouting +const SPAM_PATTERNS = [ + /(.)\1{9,}/, // 10+ repeated characters + /https?:\/\/\S+/gi, // URLs (flag if 3+) + /\b(buy now|click here|free money|make money fast|earn \$|limited offer|act now)\b/i, +]; + +@Injectable() +export class ModerationService { + private readonly logger = new Logger(ModerationService.name); + private readonly openaiApiKey: string; + + constructor( + private readonly httpService: HttpService, + private readonly configService: ConfigService, + ) { + this.openaiApiKey = this.configService.get('OPENAI_API_KEY', ''); + } + + async moderate(content: string): Promise { + const flags: ModerationFlag[] = []; + + // 1. Profanity filter (local, fast) + if (this.hasProfanity(content)) { + flags.push('profanity'); + } + + // 2. Spam detection (local, fast) + if (this.isSpam(content)) { + flags.push('spam'); + } + + // 3. OpenAI moderation API (remote) + if (this.openaiApiKey) { + const openaiViolation = await this.checkOpenAI(content); + if (openaiViolation) { + flags.push('openai_violation'); + } + } + + const autoRejected = flags.length > 0; + + return { + allowed: !autoRejected, + autoRejected, + flags, + reason: autoRejected ? this.buildReason(flags) : undefined, + }; + } + + private hasProfanity(content: string): boolean { + return PROFANITY_PATTERNS.some((pattern) => pattern.test(content)); + } + + private isSpam(content: string): boolean { + // Flag if repeated-char pattern found + if (SPAM_PATTERNS[0].test(content)) return true; + + // Flag if 3+ URLs + const urls = content.match(SPAM_PATTERNS[1]) ?? []; + if (urls.length >= 3) return true; + + // Flag known spam phrases + if (SPAM_PATTERNS[2].test(content)) return true; + + // Flag if >70% uppercase (min 20 chars) + if (content.length >= 20) { + const letters = content.replace(/[^a-zA-Z]/g, ''); + if (letters.length > 0 && letters.replace(/[^A-Z]/g, '').length / letters.length > 0.7) { + return true; + } + } + + return false; + } + + private async checkOpenAI(content: string): Promise { + try { + const response = await firstValueFrom( + this.httpService.post( + 'https://api.openai.com/v1/moderations', + { input: content }, + { headers: { Authorization: `Bearer ${this.openaiApiKey}` } }, + ), + ); + return response.data?.results?.[0]?.flagged === true; + } catch (err) { + this.logger.warn(`OpenAI moderation check failed: ${(err as Error).message}`); + return false; + } + } + + private buildReason(flags: ModerationFlag[]): string { + const descriptions: Record = { + profanity: 'contains profanity', + spam: 'detected as spam', + openai_violation: 'violates content policy', + }; + return flags.map((f) => descriptions[f]).join('; '); + } +}