From 7823ff96f6a3f352a0c46e54a9a1548d9efb1cd5 Mon Sep 17 00:00:00 2001 From: Jared Zwick <52264361+jaredzwick@users.noreply.github.com> Date: Sun, 3 May 2026 08:15:58 -0400 Subject: [PATCH] hir-94: filter prefetcher hits from open-tracking pixel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Email-client scanners fetch tracking pixels before the human ever opens the message: Gmail's image proxy pre-caches inline images, Apple Mail Privacy Protection (iOS 15+) does the same, and corporate security gateways (Bitdefender / Mimecast / Proofpoint / etc.) scan every inbound image. Counting these as opens silently inflates the campaign open-rate metric to noise — every campaign would show 100% opens within seconds of sending. - src/lib/openTrackingFilter.ts: pure classifyPixelRequest() returns { isPrefetcher, reason }. Reasons: gmail_image_proxy (UA contains GoogleImageProxy), apple_mpp (MailPrivacyProtection / MaskedEmail UA), outlook_safelinks (BingPreview / SafeLinks), known_scanner (the major B2B AV/email-security vendor UAs), and sub_send_window (any hit < 30s after sentAt — humans can't open that fast). Pure, no DB or env access; tests are deterministic. - /api/email-tracking/pixel/[trackingId]: classify before recording. Prefetcher hits still write a discrete email_event (so debugging stays possible) but with metadata.prefetcher = true and never increment campaign.openCount. Real opens compute "first open" by excluding prefetcher events so the human's first hit still counts. - libs/db/src/queries/emailEvent.ts: extend eventExistsForTracking() with optional { excludePrefetcher } that filters out rows where metadata.prefetcher === true. Existing two-arg callers unchanged. 14 vitest specs cover every UA branch (case-insensitive), the time-window heuristic (custom threshold, negative-age skip, missing sentAt skip), and the precedence rule (specific UA reason wins over sub_send_window). tsc clean. test:int 109 passed (only pre-existing PAYLOAD_SECRET failure remains). Independent of all pending hir-94 PRs — branches from main. Co-Authored-By: Paperclip --- libs/db/src/queries/emailEvent.ts | 26 ++- .../pixel/[trackingId]/route.ts | 69 +++++-- src/lib/openTrackingFilter.ts | 116 ++++++++++++ tests/int/openTrackingFilter.int.spec.ts | 179 ++++++++++++++++++ 4 files changed, 363 insertions(+), 27 deletions(-) create mode 100644 src/lib/openTrackingFilter.ts create mode 100644 tests/int/openTrackingFilter.int.spec.ts diff --git a/libs/db/src/queries/emailEvent.ts b/libs/db/src/queries/emailEvent.ts index 1528d36..5782dfc 100644 --- a/libs/db/src/queries/emailEvent.ts +++ b/libs/db/src/queries/emailEvent.ts @@ -42,21 +42,31 @@ export const getEventsByTrackingId = async (trackingId: string): Promise => { + const filters = [ + eq(emailEvent.trackingId, trackingId), + eq(emailEvent.eventType, eventType), + ]; + if (options.excludePrefetcher) { + filters.push( + sql`(${emailEvent.metadata} -> 'prefetcher' IS NULL OR ${emailEvent.metadata} ->> 'prefetcher' <> 'true')` + ); + } + const results = await db .select({ id: emailEvent.id }) .from(emailEvent) - .where( - and( - eq(emailEvent.trackingId, trackingId), - eq(emailEvent.eventType, eventType) - ) - ) + .where(and(...filters)) .limit(1); return results.length > 0; diff --git a/src/app/api/email-tracking/pixel/[trackingId]/route.ts b/src/app/api/email-tracking/pixel/[trackingId]/route.ts index bb88a94..0136ada 100644 --- a/src/app/api/email-tracking/pixel/[trackingId]/route.ts +++ b/src/app/api/email-tracking/pixel/[trackingId]/route.ts @@ -6,6 +6,7 @@ import { eventExistsForTracking, incrementCampaignStat, } from '@coldflow/db'; +import { classifyPixelRequest } from '@/lib/openTrackingFilter'; /** * GET /api/email-tracking/pixel/[trackingId].png @@ -34,35 +35,65 @@ export async function GET( const queueEntry = await getQueueEntryByTrackingId(cleanTrackingId); if (queueEntry) { - // Check if this is the first open event (to avoid inflating stats) - const firstOpen = !(await eventExistsForTracking(cleanTrackingId, 'opened')); - // Get request metadata const ipAddress = request.headers.get('x-forwarded-for')?.split(',')[0] || request.headers.get('x-real-ip') || 'unknown'; const userAgent = request.headers.get('user-agent') || 'unknown'; - // Create email event - await createEmailEvent({ - id: nanoid(), - queueId: queueEntry.id, - trackingId: cleanTrackingId, - eventType: 'opened', - ipAddress, + // Filter prefetcher hits (Gmail image proxy, Apple MPP, security + // scanners, sub-send-window scans). Counting these as opens silently + // inflates open-rate to noise. + const classification = classifyPixelRequest({ userAgent, - timestamp: new Date(), - metadata: { - firstOpen, - }, + ipAddress, + sentAt: queueEntry.sentAt, }); - // Increment campaign open count only for first open - if (firstOpen) { - await incrementCampaignStat(queueEntry.campaignId, 'openCount'); - } + if (classification.isPrefetcher) { + // Record as a discrete event (so debugging stays possible) but never + // increment openCount. + await createEmailEvent({ + id: nanoid(), + queueId: queueEntry.id, + trackingId: cleanTrackingId, + eventType: 'opened', + ipAddress, + userAgent, + timestamp: new Date(), + metadata: { + firstOpen: false, + prefetcher: true, + prefetcherReason: classification.reason, + }, + }); + console.log( + `Email pixel prefetcher: ${cleanTrackingId} (${classification.reason})`, + ); + } else { + const firstOpen = !(await eventExistsForTracking( + cleanTrackingId, + 'opened', + { excludePrefetcher: true }, + )); - console.log(`Email opened: ${cleanTrackingId} (first: ${firstOpen})`); + await createEmailEvent({ + id: nanoid(), + queueId: queueEntry.id, + trackingId: cleanTrackingId, + eventType: 'opened', + ipAddress, + userAgent, + timestamp: new Date(), + metadata: { firstOpen }, + }); + + if (firstOpen) { + await incrementCampaignStat(queueEntry.campaignId, 'openCount'); + } + + console.log(`Email opened: ${cleanTrackingId} (first: ${firstOpen})`); + } } // Always return the tracking pixel, even if tracking ID not found diff --git a/src/lib/openTrackingFilter.ts b/src/lib/openTrackingFilter.ts new file mode 100644 index 0000000..3625b5e --- /dev/null +++ b/src/lib/openTrackingFilter.ts @@ -0,0 +1,116 @@ +/** + * Open-tracking prefetcher detection. + * + * Email-client scanners fetch tracking pixels before the human ever sees the + * message — Gmail's image proxy pre-caches every image, Apple Mail Privacy + * Protection (MPP) does the same on iOS 15+, corporate security gateways + * scan everything inbound. Counting these as opens silently inflates the + * open-rate metric to noise levels. + * + * This module classifies an incoming pixel request without DB or network + * access, so it can be unit-tested deterministically. + */ + +export type PrefetcherClassification = + | { isPrefetcher: false } + | { + isPrefetcher: true + reason: + | 'gmail_image_proxy' + | 'apple_mpp' + | 'outlook_safelinks' + | 'known_scanner' + | 'sub_send_window' + } + +export type ClassifyInput = { + userAgent: string | null | undefined + ipAddress?: string | null + sentAt?: Date | string | null + now?: Date + /** Min seconds between send and pixel hit before we trust it. Default 30. */ + minSendWindowSeconds?: number +} + +/** + * Classify a pixel request as either a real human open or a prefetcher. + * Order of checks matters — most specific UA signals first, then a + * blanket time-window heuristic. + */ +export function classifyPixelRequest( + input: ClassifyInput, +): PrefetcherClassification { + const ua = (input.userAgent ?? '').trim() + + if (matchesGmailImageProxy(ua)) { + return { isPrefetcher: true, reason: 'gmail_image_proxy' } + } + + if (matchesAppleMpp(ua, input.ipAddress)) { + return { isPrefetcher: true, reason: 'apple_mpp' } + } + + if (matchesOutlookSafelinks(ua)) { + return { isPrefetcher: true, reason: 'outlook_safelinks' } + } + + if (matchesKnownScanner(ua)) { + return { isPrefetcher: true, reason: 'known_scanner' } + } + + if (input.sentAt) { + const sent = + typeof input.sentAt === 'string' ? new Date(input.sentAt) : input.sentAt + if (!Number.isNaN(sent.getTime())) { + const now = input.now ?? new Date() + const ageSeconds = (now.getTime() - sent.getTime()) / 1000 + const window = input.minSendWindowSeconds ?? 30 + // A real human cannot open a cold email within 30s of it being sent — + // the inbox client hasn't even pushed the notification yet. Anything + // hitting the pixel that fast is a server-side scanner. + if (ageSeconds >= 0 && ageSeconds < window) { + return { isPrefetcher: true, reason: 'sub_send_window' } + } + } + } + + return { isPrefetcher: false } +} + +function matchesGmailImageProxy(ua: string): boolean { + // Google sets "GoogleImageProxy" in the UA when its server fetches inline + // images on behalf of Gmail web clients before the user sees the message. + return /GoogleImageProxy/i.test(ua) +} + +function matchesAppleMpp(ua: string, ipAddress: string | null | undefined): boolean { + // Apple MPP uses Apple's privacy relay; the UA is typically empty or + // contains "Mail/" with a privacy-mask IP. Conservative match: explicit + // Apple privacy strings + the well-known privacy-relay UA stub. + if (/MaskedEmail|MailPrivacyProtection|com\.apple\.mobilemail/i.test(ua)) { + return true + } + // Apple's privacy relay routes through known IP ranges; we don't ship + // the full list, but treat empty-UA-plus-no-IP as suspicious only when + // combined with the time-window check below. Don't flag here on UA alone. + void ipAddress + return false +} + +function matchesOutlookSafelinks(ua: string): boolean { + // Microsoft's Defender for Office 365 / Safe Links scans every link and + // image. UA contains "BingPreview", "Microsoft Office Word", "Outlook", + // or "MSIE 10.0" + "ms-office". + return ( + /BingPreview|MSOffice|MSIE 10\.0.*ms-office|Microsoft-WebDAV-MiniRedir/i.test( + ua, + ) || /SafeLinks/i.test(ua) + ) +} + +function matchesKnownScanner(ua: string): boolean { + // Generic security-scanner / anti-spam UA patterns common in B2B inboxes. + return /Bitdefender|Mimecast|Proofpoint|Barracuda|Sophos|TrendMicro|Symantec|McAfee|YahooMailProxy|Forcepoint/i.test( + ua, + ) +} diff --git a/tests/int/openTrackingFilter.int.spec.ts b/tests/int/openTrackingFilter.int.spec.ts new file mode 100644 index 0000000..bf8e19b --- /dev/null +++ b/tests/int/openTrackingFilter.int.spec.ts @@ -0,0 +1,179 @@ +import { describe, expect, it } from 'vitest' +import { classifyPixelRequest } from '@/lib/openTrackingFilter' + +const sentAt = new Date('2026-05-03T12:00:00Z') +const longAfter = new Date('2026-05-03T13:00:00Z') + +describe('classifyPixelRequest — UA-based prefetcher detection', () => { + it('flags Gmail image proxy', () => { + const r = classifyPixelRequest({ + userAgent: + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko; GoogleImageProxy)', + sentAt, + now: longAfter, + }) + expect(r).toEqual({ isPrefetcher: true, reason: 'gmail_image_proxy' }) + }) + + it('flags Apple Mail Privacy Protection by UA', () => { + expect( + classifyPixelRequest({ + userAgent: 'com.apple.mobilemail/MailPrivacyProtection', + sentAt, + now: longAfter, + }), + ).toEqual({ isPrefetcher: true, reason: 'apple_mpp' }) + expect( + classifyPixelRequest({ + userAgent: 'MaskedEmail/1.0', + sentAt, + now: longAfter, + }), + ).toEqual({ isPrefetcher: true, reason: 'apple_mpp' }) + }) + + it('flags Outlook Safe Links / Defender scanners', () => { + expect( + classifyPixelRequest({ + userAgent: 'BingPreview/1.0b', + sentAt, + now: longAfter, + }).isPrefetcher, + ).toBe(true) + expect( + classifyPixelRequest({ + userAgent: 'Microsoft Office SafeLinks Scanner', + sentAt, + now: longAfter, + }).isPrefetcher, + ).toBe(true) + }) + + it('flags known security scanner UAs', () => { + for (const vendor of [ + 'Bitdefender', + 'Mimecast', + 'Proofpoint', + 'Barracuda', + 'Sophos', + 'TrendMicro', + 'Symantec', + 'McAfee', + ]) { + const r = classifyPixelRequest({ + userAgent: `${vendor}-AV/1.0`, + sentAt, + now: longAfter, + }) + expect(r).toEqual({ isPrefetcher: true, reason: 'known_scanner' }) + } + }) +}) + +describe('classifyPixelRequest — time-window heuristic', () => { + it('flags hits inside the send window even with a real-looking UA', () => { + const r = classifyPixelRequest({ + userAgent: + 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15', + sentAt, + now: new Date(sentAt.getTime() + 5_000), // 5s after send + }) + expect(r).toEqual({ isPrefetcher: true, reason: 'sub_send_window' }) + }) + + it('does not flag hits outside the send window', () => { + const r = classifyPixelRequest({ + userAgent: + 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15', + sentAt, + now: new Date(sentAt.getTime() + 60_000), // 1 minute after send + }) + expect(r).toEqual({ isPrefetcher: false }) + }) + + it('honors a custom send-window threshold', () => { + const r = classifyPixelRequest({ + userAgent: 'Mozilla/5.0', + sentAt, + now: new Date(sentAt.getTime() + 45_000), // 45s + minSendWindowSeconds: 60, + }) + expect(r).toEqual({ isPrefetcher: true, reason: 'sub_send_window' }) + }) + + it('ignores invalid sentAt strings', () => { + expect( + classifyPixelRequest({ + userAgent: 'Mozilla/5.0', + sentAt: 'not-a-date', + now: new Date(), + }), + ).toEqual({ isPrefetcher: false }) + }) + + it('does not flag hits before sentAt (negative age)', () => { + // Defensive: if sentAt is in the future for any reason, do not flag — + // it is more likely a clock-skew bug than a prefetcher. + expect( + classifyPixelRequest({ + userAgent: 'Mozilla/5.0', + sentAt: new Date(sentAt.getTime() + 60_000), + now: sentAt, + }), + ).toEqual({ isPrefetcher: false }) + }) + + it('skips the time-window check entirely when sentAt is missing', () => { + expect( + classifyPixelRequest({ + userAgent: 'Mozilla/5.0', + now: new Date(), + }), + ).toEqual({ isPrefetcher: false }) + }) +}) + +describe('classifyPixelRequest — passthrough cases', () => { + it('does not flag a normal browser UA outside the send window', () => { + const r = classifyPixelRequest({ + userAgent: + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + sentAt, + now: longAfter, + }) + expect(r).toEqual({ isPrefetcher: false }) + }) + + it('does not flag empty / missing UA on its own', () => { + expect( + classifyPixelRequest({ userAgent: '', sentAt, now: longAfter }), + ).toEqual({ isPrefetcher: false }) + expect( + classifyPixelRequest({ userAgent: null, sentAt, now: longAfter }), + ).toEqual({ isPrefetcher: false }) + expect( + classifyPixelRequest({ userAgent: undefined, sentAt, now: longAfter }), + ).toEqual({ isPrefetcher: false }) + }) + + it('matches case-insensitively', () => { + expect( + classifyPixelRequest({ + userAgent: 'googleimageproxy/1.0', + sentAt, + now: longAfter, + }), + ).toEqual({ isPrefetcher: true, reason: 'gmail_image_proxy' }) + }) + + it('matches the UA signal even before the send-window timer would fire', () => { + // A scanner UA inside the window should classify as the scanner reason, + // not "sub_send_window" — preserves the most specific diagnostic. + const r = classifyPixelRequest({ + userAgent: 'GoogleImageProxy', + sentAt, + now: new Date(sentAt.getTime() + 3_000), + }) + expect(r).toEqual({ isPrefetcher: true, reason: 'gmail_image_proxy' }) + }) +})