Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions libs/db/src/queries/emailEvent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,31 @@ export const getEventsByTrackingId = async (trackingId: string): Promise<EmailEv

/**
* Check if a specific event type already exists for a tracking ID
* (useful for preventing duplicate open tracking)
* (useful for preventing duplicate open tracking).
*
* `excludePrefetcher`: skip rows tagged `metadata.prefetcher === true`.
* The pixel route writes those rows for Gmail/Apple/scanner hits so
* "did a real human open this" can be answered without inflating counts.
*/
export const eventExistsForTracking = async (
trackingId: string,
eventType: 'sent' | 'opened' | 'clicked' | 'replied' | 'bounced' | 'unsubscribed'
eventType: 'sent' | 'opened' | 'clicked' | 'replied' | 'bounced' | 'unsubscribed',
options: { excludePrefetcher?: boolean } = {}
): Promise<boolean> => {
const filters = [
eq(emailEvent.trackingId, trackingId),
eq(emailEvent.eventType, eventType),
];
if (options.excludePrefetcher) {
filters.push(
sql`(${emailEvent.metadata} -> 'prefetcher' IS NULL OR ${emailEvent.metadata} ->> 'prefetcher' <> 'true')`
);
}

const results = await db
.select({ id: emailEvent.id })
.from(emailEvent)
.where(
and(
eq(emailEvent.trackingId, trackingId),
eq(emailEvent.eventType, eventType)
)
)
.where(and(...filters))
.limit(1);

return results.length > 0;
Expand Down
69 changes: 50 additions & 19 deletions src/app/api/email-tracking/pixel/[trackingId]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import {
eventExistsForTracking,
incrementCampaignStat,
} from '@coldflow/db';
import { classifyPixelRequest } from '@/lib/openTrackingFilter';

/**
* GET /api/email-tracking/pixel/[trackingId].png
Expand Down Expand Up @@ -34,35 +35,65 @@ export async function GET(
const queueEntry = await getQueueEntryByTrackingId(cleanTrackingId);

if (queueEntry) {
// Check if this is the first open event (to avoid inflating stats)
const firstOpen = !(await eventExistsForTracking(cleanTrackingId, 'opened'));

// Get request metadata
const ipAddress = request.headers.get('x-forwarded-for')?.split(',')[0] ||
request.headers.get('x-real-ip') ||
'unknown';
const userAgent = request.headers.get('user-agent') || 'unknown';

// Create email event
await createEmailEvent({
id: nanoid(),
queueId: queueEntry.id,
trackingId: cleanTrackingId,
eventType: 'opened',
ipAddress,
// Filter prefetcher hits (Gmail image proxy, Apple MPP, security
// scanners, sub-send-window scans). Counting these as opens silently
// inflates open-rate to noise.
const classification = classifyPixelRequest({
userAgent,
timestamp: new Date(),
metadata: {
firstOpen,
},
ipAddress,
sentAt: queueEntry.sentAt,
});

// Increment campaign open count only for first open
if (firstOpen) {
await incrementCampaignStat(queueEntry.campaignId, 'openCount');
}
if (classification.isPrefetcher) {
// Record as a discrete event (so debugging stays possible) but never
// increment openCount.
await createEmailEvent({
id: nanoid(),
queueId: queueEntry.id,
trackingId: cleanTrackingId,
eventType: 'opened',
ipAddress,
userAgent,
timestamp: new Date(),
metadata: {
firstOpen: false,
prefetcher: true,
prefetcherReason: classification.reason,
},
});
console.log(
`Email pixel prefetcher: ${cleanTrackingId} (${classification.reason})`,
);
} else {
const firstOpen = !(await eventExistsForTracking(
cleanTrackingId,
'opened',
{ excludePrefetcher: true },
));

console.log(`Email opened: ${cleanTrackingId} (first: ${firstOpen})`);
await createEmailEvent({
id: nanoid(),
queueId: queueEntry.id,
trackingId: cleanTrackingId,
eventType: 'opened',
ipAddress,
userAgent,
timestamp: new Date(),
metadata: { firstOpen },
});

if (firstOpen) {
await incrementCampaignStat(queueEntry.campaignId, 'openCount');
}

console.log(`Email opened: ${cleanTrackingId} (first: ${firstOpen})`);
}
}

// Always return the tracking pixel, even if tracking ID not found
Expand Down
116 changes: 116 additions & 0 deletions src/lib/openTrackingFilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/**
* Open-tracking prefetcher detection.
*
* Email-client scanners fetch tracking pixels before the human ever sees the
* message — Gmail's image proxy pre-caches every image, Apple Mail Privacy
* Protection (MPP) does the same on iOS 15+, corporate security gateways
* scan everything inbound. Counting these as opens silently inflates the
* open-rate metric to noise levels.
*
* This module classifies an incoming pixel request without DB or network
* access, so it can be unit-tested deterministically.
*/

export type PrefetcherClassification =
| { isPrefetcher: false }
| {
isPrefetcher: true
reason:
| 'gmail_image_proxy'
| 'apple_mpp'
| 'outlook_safelinks'
| 'known_scanner'
| 'sub_send_window'
}

export type ClassifyInput = {
userAgent: string | null | undefined
ipAddress?: string | null
sentAt?: Date | string | null
now?: Date
/** Min seconds between send and pixel hit before we trust it. Default 30. */
minSendWindowSeconds?: number
}

/**
* Classify a pixel request as either a real human open or a prefetcher.
* Order of checks matters — most specific UA signals first, then a
* blanket time-window heuristic.
*/
export function classifyPixelRequest(
input: ClassifyInput,
): PrefetcherClassification {
const ua = (input.userAgent ?? '').trim()

if (matchesGmailImageProxy(ua)) {
return { isPrefetcher: true, reason: 'gmail_image_proxy' }
}

if (matchesAppleMpp(ua, input.ipAddress)) {
return { isPrefetcher: true, reason: 'apple_mpp' }
}

if (matchesOutlookSafelinks(ua)) {
return { isPrefetcher: true, reason: 'outlook_safelinks' }
}

if (matchesKnownScanner(ua)) {
return { isPrefetcher: true, reason: 'known_scanner' }
}

if (input.sentAt) {
const sent =
typeof input.sentAt === 'string' ? new Date(input.sentAt) : input.sentAt
if (!Number.isNaN(sent.getTime())) {
const now = input.now ?? new Date()
const ageSeconds = (now.getTime() - sent.getTime()) / 1000
const window = input.minSendWindowSeconds ?? 30
// A real human cannot open a cold email within 30s of it being sent —
// the inbox client hasn't even pushed the notification yet. Anything
// hitting the pixel that fast is a server-side scanner.
if (ageSeconds >= 0 && ageSeconds < window) {
return { isPrefetcher: true, reason: 'sub_send_window' }
}
}
}

return { isPrefetcher: false }
}

function matchesGmailImageProxy(ua: string): boolean {
// Google sets "GoogleImageProxy" in the UA when its server fetches inline
// images on behalf of Gmail web clients before the user sees the message.
return /GoogleImageProxy/i.test(ua)
}

function matchesAppleMpp(ua: string, ipAddress: string | null | undefined): boolean {
// Apple MPP uses Apple's privacy relay; the UA is typically empty or
// contains "Mail/" with a privacy-mask IP. Conservative match: explicit
// Apple privacy strings + the well-known privacy-relay UA stub.
if (/MaskedEmail|MailPrivacyProtection|com\.apple\.mobilemail/i.test(ua)) {
return true
}
// Apple's privacy relay routes through known IP ranges; we don't ship
// the full list, but treat empty-UA-plus-no-IP as suspicious only when
// combined with the time-window check below. Don't flag here on UA alone.
void ipAddress
return false
}

function matchesOutlookSafelinks(ua: string): boolean {
// Microsoft's Defender for Office 365 / Safe Links scans every link and
// image. UA contains "BingPreview", "Microsoft Office Word", "Outlook",
// or "MSIE 10.0" + "ms-office".
return (
/BingPreview|MSOffice|MSIE 10\.0.*ms-office|Microsoft-WebDAV-MiniRedir/i.test(
ua,
) || /SafeLinks/i.test(ua)
)
}

function matchesKnownScanner(ua: string): boolean {
// Generic security-scanner / anti-spam UA patterns common in B2B inboxes.
return /Bitdefender|Mimecast|Proofpoint|Barracuda|Sophos|TrendMicro|Symantec|McAfee|YahooMailProxy|Forcepoint/i.test(
ua,
)
}
Loading