From 404ffc346bc869e32cde5762f056b40397564dc7 Mon Sep 17 00:00:00 2001 From: Jack Misner Date: Fri, 13 Feb 2026 19:47:44 +0000 Subject: [PATCH] feat: add PII filtering for UTM parameter values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Detect and filter personally identifiable information (emails, phone numbers) from UTM parameter values at capture time. Supports reject mode (discard values) and redact mode (replace with [REDACTED]), optional strict allowlist patterns, and an onPiiDetected callback. Disabled by default. 🤖 Generated with [Nori](https://nori.ai) Co-Authored-By: Nori --- README.md | 44 ++++ __tests__/config/loader.test.ts | 103 ++++++++ __tests__/core/capture.test.ts | 56 +++++ __tests__/core/pii-filter.test.ts | 297 ++++++++++++++++++++++++ __tests__/docs.md | 6 +- __tests__/react/useUtmTracking.test.tsx | 25 ++ src/config/defaults.ts | 45 +++- src/config/docs.md | 7 +- src/config/index.ts | 2 + src/config/loader.ts | 68 ++++++ src/core/capture.ts | 22 +- src/core/docs.md | 10 +- src/core/index.ts | 3 + src/core/pii-filter.ts | 111 +++++++++ src/docs.md | 4 +- src/index.ts | 9 + src/react/docs.md | 2 +- src/react/useUtmTracking.ts | 1 + src/types/docs.md | 1 + src/types/index.ts | 45 ++++ 20 files changed, 844 insertions(+), 17 deletions(-) create mode 100644 __tests__/core/pii-filter.test.ts create mode 100644 src/core/pii-filter.ts diff --git a/README.md b/README.md index 00443bb..03e7dd0 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ A comprehensive TypeScript library for capturing, storing, and appending UTM tra - **Capture** UTM parameters from URLs - **Sanitize** parameter values to prevent XSS and injection +- **PII filtering** to detect and reject/redact email addresses, phone numbers, and other PII - **Store** in sessionStorage for the browser session - **Append** UTM parameters to share URLs - **Configurable** key format (snake_case or camelCase) @@ -248,6 +249,46 @@ const params = captureUtmParameters(url, { }); ``` +### PII Filtering + +Detect and filter personally identifiable information (email addresses, phone numbers) from UTM parameter values. Prevents PII from leaking into analytics via misconfigured tracking links. Disabled by default. + +```typescript +import { captureUtmParameters } from '@jackmisner/utm-toolkit'; + +// Reject mode (default) — discard values containing PII +const params = captureUtmParameters('https://example.com?utm_source=john@example.com&utm_medium=cpc', { + piiFiltering: { enabled: true }, +}); +// { utm_medium: 'cpc' } — utm_source was rejected + +// Redact mode — replace PII values with [REDACTED] +const params = captureUtmParameters('https://example.com?utm_source=john@example.com&utm_medium=cpc', { + piiFiltering: { enabled: true, mode: 'redact' }, +}); +// { utm_source: '[REDACTED]', utm_medium: 'cpc' } + +// Strict allowlist — only accept values matching a pattern +const params = captureUtmParameters(url, { + piiFiltering: { + enabled: true, + allowlistPattern: /^[a-z0-9_-]+$/, // Only lowercase alphanumeric, hyphens, underscores + }, +}); + +// Callback for logging PII detections +const params = captureUtmParameters(url, { + piiFiltering: { + enabled: true, + onPiiDetected: (param, value, patternName) => { + console.warn(`PII detected in ${param}: matched ${patternName}`); + }, + }, +}); +``` + +Built-in PII patterns detect: email addresses, international phone numbers, UK phone numbers, and US phone numbers. + ### Configuration ```typescript @@ -332,6 +373,7 @@ installDebugHelpers(); | `shareContextParams` | `object` | `{}` | Platform-specific params | | `excludeFromShares` | `string[]` | `[]` | Params to exclude from shares | | `sanitize` | `SanitizeConfig` | `{ enabled: false }` | Value sanitization settings | +| `piiFiltering` | `PiiFilterConfig` | `{ enabled: false }` | PII detection and filtering | ## TypeScript Types @@ -340,6 +382,8 @@ import type { UtmParameters, UtmConfig, SanitizeConfig, + PiiFilterConfig, + PiiPattern, SharePlatform, UseUtmTrackingReturn, } from '@jackmisner/utm-toolkit'; diff --git a/__tests__/config/loader.test.ts b/__tests__/config/loader.test.ts index 743edac..eae987b 100644 --- a/__tests__/config/loader.test.ts +++ b/__tests__/config/loader.test.ts @@ -334,6 +334,109 @@ describe('sanitize config', () => { }) }) +describe('piiFiltering config', () => { + it('createConfig includes default piiFiltering', () => { + const config = createConfig() + expect(config.piiFiltering).toBeDefined() + expect(config.piiFiltering.enabled).toBe(false) + expect(config.piiFiltering.mode).toBe('reject') + expect(config.piiFiltering.patterns.length).toBeGreaterThan(0) + }) + + it('createConfig merges partial piiFiltering override', () => { + const config = createConfig({ piiFiltering: { enabled: true } }) + expect(config.piiFiltering.enabled).toBe(true) + expect(config.piiFiltering.mode).toBe('reject') + }) + + it('createConfig preserves custom patterns', () => { + const customPatterns = [{ name: 'custom', pattern: /test/, enabled: true }] + const config = createConfig({ piiFiltering: { patterns: customPatterns } }) + expect(config.piiFiltering.patterns).toEqual(customPatterns) + }) + + it('mergeConfig merges piiFiltering', () => { + const base = createConfig() + const merged = mergeConfig(base, { piiFiltering: { enabled: true, mode: 'redact' } }) + expect(merged.piiFiltering.enabled).toBe(true) + expect(merged.piiFiltering.mode).toBe('redact') + expect(merged.piiFiltering.patterns).toEqual(base.piiFiltering.patterns) + }) + + it('validateConfig validates piiFiltering.enabled is boolean', () => { + const errors = validateConfig({ piiFiltering: { enabled: 'yes' } }) + expect(errors).toContain('piiFiltering.enabled must be a boolean') + }) + + it('validateConfig validates piiFiltering.mode is valid', () => { + const errors = validateConfig({ piiFiltering: { mode: 'delete' } }) + expect(errors).toContain('piiFiltering.mode must be "reject" or "redact"') + }) + + it('validateConfig validates piiFiltering.patterns is an array', () => { + const errors = validateConfig({ piiFiltering: { patterns: 'not an array' } }) + expect(errors).toContain('piiFiltering.patterns must be an array') + }) + + it('validateConfig validates piiFiltering is an object', () => { + const errors = validateConfig({ piiFiltering: 'not an object' }) + expect(errors).toContain('piiFiltering must be an object') + }) + + it('validateConfig validates individual pattern objects', () => { + const errors = validateConfig({ + piiFiltering: { + patterns: [{ name: 123, pattern: 'not a regex', enabled: 'yes' }], + }, + }) + expect(errors).toContain('piiFiltering.patterns[0].name must be a string') + expect(errors).toContain('piiFiltering.patterns[0].pattern must be a RegExp') + expect(errors).toContain('piiFiltering.patterns[0].enabled must be a boolean') + }) + + it('validateConfig validates non-object pattern entries', () => { + const errors = validateConfig({ + piiFiltering: { patterns: ['not an object'] }, + }) + expect(errors).toContain('piiFiltering.patterns[0] must be an object') + }) + + it('validateConfig validates allowlistPattern is a RegExp', () => { + const errors = validateConfig({ + piiFiltering: { allowlistPattern: 'not a regex' }, + }) + expect(errors).toContain('piiFiltering.allowlistPattern must be a RegExp') + }) + + it('validateConfig accepts valid allowlistPattern RegExp', () => { + const errors = validateConfig({ + piiFiltering: { allowlistPattern: /^[a-z]+$/ }, + }) + expect(errors).toEqual([]) + }) + + it('validateConfig validates onPiiDetected is a function', () => { + const errors = validateConfig({ + piiFiltering: { onPiiDetected: 'not a function' }, + }) + expect(errors).toContain('piiFiltering.onPiiDetected must be a function') + }) + + it('validateConfig accepts valid onPiiDetected function', () => { + const errors = validateConfig({ + piiFiltering: { onPiiDetected: () => {} }, + }) + expect(errors).toEqual([]) + }) + + it('validateConfig accepts valid piiFiltering config', () => { + const errors = validateConfig({ + piiFiltering: { enabled: true, mode: 'redact' }, + }) + expect(errors).toEqual([]) + }) +}) + describe('getDefaultConfig', () => { it('returns a copy of default config', () => { const config1 = getDefaultConfig() diff --git a/__tests__/core/capture.test.ts b/__tests__/core/capture.test.ts index e7ef619..a6b6a3e 100644 --- a/__tests__/core/capture.test.ts +++ b/__tests__/core/capture.test.ts @@ -230,6 +230,62 @@ describe('sanitization integration', () => { }) }) +describe('PII filtering integration', () => { + const piiFilterConfig = { + enabled: true, + mode: 'reject' as const, + patterns: [ + { + name: 'email', + pattern: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/, + enabled: true, + }, + ], + } + + it('rejects PII values when piiFiltering is enabled', () => { + const result = captureUtmParameters( + 'https://example.com?utm_source=john@example.com&utm_medium=email', + { piiFiltering: piiFilterConfig }, + ) + expect(result).not.toHaveProperty('utm_source') + expect(result.utm_medium).toBe('email') + }) + + it('does not filter when piiFiltering is not provided', () => { + const result = captureUtmParameters('https://example.com?utm_source=john@example.com') + expect(result.utm_source).toBe('john@example.com') + }) + + it('does not filter when piiFiltering.enabled is false', () => { + const result = captureUtmParameters('https://example.com?utm_source=john@example.com', { + piiFiltering: { ...piiFilterConfig, enabled: false }, + }) + expect(result.utm_source).toBe('john@example.com') + }) + + it('works with camelCase key format', () => { + const result = captureUtmParameters( + 'https://example.com?utm_source=john@example.com&utm_medium=cpc', + { keyFormat: 'camelCase', piiFiltering: piiFilterConfig }, + ) + expect(result).not.toHaveProperty('utmSource') + expect(result.utmMedium).toBe('cpc') + }) + + it('applies PII filter after sanitization', () => { + const result = captureUtmParameters( + 'https://example.com?utm_source=john@example.com&utm_campaign=spring-2025', + { + sanitize: { enabled: true }, + piiFiltering: piiFilterConfig, + }, + ) + expect(result).not.toHaveProperty('utm_source') + expect(result.utm_campaign).toBe('spring-2025') + }) +}) + describe('captureFromCurrentUrl', () => { beforeEach(() => { vi.stubGlobal('location', { diff --git a/__tests__/core/pii-filter.test.ts b/__tests__/core/pii-filter.test.ts new file mode 100644 index 0000000..dc77405 --- /dev/null +++ b/__tests__/core/pii-filter.test.ts @@ -0,0 +1,297 @@ +import { describe, it, expect, vi } from 'vitest' +import { detectPii, filterValue, filterParams } from '../../src/core/pii-filter' +import type { PiiFilterConfig, PiiPattern } from '../../src/types' + +const defaultPatterns: PiiPattern[] = [ + { name: 'email', pattern: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/, enabled: true }, + { name: 'phone_international', pattern: /\+\d{10,15}\b/, enabled: true }, + { name: 'phone_uk', pattern: /\b(?:0|\+44)\d{9,10}\b/, enabled: true }, + { name: 'phone_us', pattern: /\b\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/, enabled: true }, +] + +const defaultConfig: PiiFilterConfig = { + enabled: true, + mode: 'reject', + patterns: defaultPatterns, +} + +describe('detectPii', () => { + it('detects email addresses', () => { + const result = detectPii('john@example.com', defaultPatterns) + expect(result).not.toBeNull() + expect(result!.name).toBe('email') + }) + + it('detects email with plus addressing', () => { + const result = detectPii('john+tag@example.com', defaultPatterns) + expect(result).not.toBeNull() + expect(result!.name).toBe('email') + }) + + it('detects international phone numbers', () => { + const result = detectPii('+447911123456', defaultPatterns) + expect(result).not.toBeNull() + }) + + it('detects UK phone numbers', () => { + const result = detectPii('07911123456', defaultPatterns) + expect(result).not.toBeNull() + expect(result!.name).toBe('phone_uk') + }) + + it('detects US phone numbers', () => { + const result = detectPii('(555) 123-4567', defaultPatterns) + expect(result).not.toBeNull() + expect(result!.name).toBe('phone_us') + }) + + it('detects US phone with dots', () => { + const result = detectPii('555.123.4567', defaultPatterns) + expect(result).not.toBeNull() + expect(result!.name).toBe('phone_us') + }) + + it('returns null for clean campaign values', () => { + const result = detectPii('spring-2025_campaign', defaultPatterns) + expect(result).toBeNull() + }) + + it('returns null for typical utm_source values', () => { + const result = detectPii('linkedin', defaultPatterns) + expect(result).toBeNull() + }) + + it('returns null for empty string', () => { + const result = detectPii('', defaultPatterns) + expect(result).toBeNull() + }) + + it('does not flag numeric campaign IDs as phone numbers', () => { + const result = detectPii('campaign_20250101_12345', defaultPatterns) + expect(result).toBeNull() + }) + + it('does not flag short numeric tracking codes', () => { + const result = detectPii('track12345', defaultPatterns) + expect(result).toBeNull() + }) + + it('skips disabled patterns', () => { + const patterns: PiiPattern[] = [ + { name: 'email', pattern: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/, enabled: false }, + ] + const result = detectPii('john@example.com', patterns) + expect(result).toBeNull() + }) + + it('returns first matching pattern', () => { + const value = 'john@example.com' + const patterns: PiiPattern[] = [ + { name: 'custom_first', pattern: /@/, enabled: true }, + { name: 'email', pattern: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/, enabled: true }, + ] + const result = detectPii(value, patterns) + expect(result!.name).toBe('custom_first') + }) +}) + +describe('filterValue', () => { + describe('reject mode', () => { + it('returns undefined for PII values', () => { + const result = filterValue('utm_source', 'john@example.com', defaultConfig) + expect(result).toBeUndefined() + }) + + it('passes through clean values', () => { + const result = filterValue('utm_source', 'linkedin', defaultConfig) + expect(result).toBe('linkedin') + }) + }) + + describe('redact mode', () => { + const redactConfig: PiiFilterConfig = { ...defaultConfig, mode: 'redact' } + + it('replaces entire value with [REDACTED] for PII', () => { + const result = filterValue('utm_source', 'john@example.com', redactConfig) + expect(result).toBe('[REDACTED]') + }) + + it('passes through clean values', () => { + const result = filterValue('utm_source', 'linkedin', redactConfig) + expect(result).toBe('linkedin') + }) + }) + + describe('allowlist mode', () => { + const allowlistConfig: PiiFilterConfig = { + ...defaultConfig, + allowlistPattern: /^[a-z0-9_-]+$/, + } + + it('accepts values matching the allowlist', () => { + const result = filterValue('utm_source', 'spring-2025_campaign', allowlistConfig) + expect(result).toBe('spring-2025_campaign') + }) + + it('rejects values not matching the allowlist', () => { + const result = filterValue('utm_source', 'My Campaign!', allowlistConfig) + expect(result).toBeUndefined() + }) + + it('allowlist takes precedence over PII patterns (clean value with special chars rejected)', () => { + const result = filterValue('utm_source', 'Campaign With Spaces', allowlistConfig) + expect(result).toBeUndefined() + }) + + it('allowlist rejects PII values too', () => { + const result = filterValue('utm_source', 'john@example.com', allowlistConfig) + expect(result).toBeUndefined() + }) + + it('uses redact mode with allowlist', () => { + const config: PiiFilterConfig = { ...allowlistConfig, mode: 'redact' } + const result = filterValue('utm_source', 'My Campaign!', config) + expect(result).toBe('[REDACTED]') + }) + }) + + describe('onPiiDetected callback', () => { + it('calls callback when PII is detected via pattern', () => { + const onPiiDetected = vi.fn() + const config: PiiFilterConfig = { ...defaultConfig, onPiiDetected } + filterValue('utm_source', 'john@example.com', config) + expect(onPiiDetected).toHaveBeenCalledWith('utm_source', 'john@example.com', 'email') + }) + + it('calls callback when value fails allowlist', () => { + const onPiiDetected = vi.fn() + const config: PiiFilterConfig = { + ...defaultConfig, + allowlistPattern: /^[a-z0-9_-]+$/, + onPiiDetected, + } + filterValue('utm_campaign', 'Bad Value!', config) + expect(onPiiDetected).toHaveBeenCalledWith('utm_campaign', 'Bad Value!', 'allowlist') + }) + + it('does not call callback for clean values', () => { + const onPiiDetected = vi.fn() + const config: PiiFilterConfig = { ...defaultConfig, onPiiDetected } + filterValue('utm_source', 'linkedin', config) + expect(onPiiDetected).not.toHaveBeenCalled() + }) + + it('does not break when callback throws', () => { + const onPiiDetected = vi.fn(() => { + throw new Error('callback error') + }) + const config: PiiFilterConfig = { ...defaultConfig, onPiiDetected } + const result = filterValue('utm_source', 'john@example.com', config) + expect(result).toBeUndefined() + expect(onPiiDetected).toHaveBeenCalled() + }) + + it('does not break when allowlist callback throws', () => { + const onPiiDetected = vi.fn(() => { + throw new Error('callback error') + }) + const config: PiiFilterConfig = { + ...defaultConfig, + allowlistPattern: /^[a-z]+$/, + onPiiDetected, + } + const result = filterValue('utm_source', 'Bad Value!', config) + expect(result).toBeUndefined() + expect(onPiiDetected).toHaveBeenCalled() + }) + }) + + describe('enabled flag', () => { + it('returns value unchanged when enabled is false', () => { + const config: PiiFilterConfig = { ...defaultConfig, enabled: false } + const result = filterValue('utm_source', 'john@example.com', config) + expect(result).toBe('john@example.com') + }) + }) +}) + +describe('filterParams', () => { + it('filters PII from all values in reject mode', () => { + const params = { + utm_source: 'john@example.com', + utm_medium: 'email', + utm_campaign: 'spring-2025', + } + const result = filterParams(params, defaultConfig) + expect(result).toEqual({ + utm_medium: 'email', + utm_campaign: 'spring-2025', + }) + expect(result).not.toHaveProperty('utm_source') + }) + + it('redacts PII values in redact mode', () => { + const config: PiiFilterConfig = { ...defaultConfig, mode: 'redact' } + const params = { + utm_source: 'john@example.com', + utm_medium: 'email', + } + const result = filterParams(params, config) + expect(result).toEqual({ + utm_source: '[REDACTED]', + utm_medium: 'email', + }) + }) + + it('preserves clean values', () => { + const params = { + utm_source: 'linkedin', + utm_medium: 'cpc', + utm_campaign: 'spring-2025', + } + const result = filterParams(params, defaultConfig) + expect(result).toEqual(params) + }) + + it('handles undefined values', () => { + const params = { + utm_source: 'linkedin', + utm_medium: undefined, + } + const result = filterParams(params, defaultConfig) + expect(result).toEqual({ + utm_source: 'linkedin', + utm_medium: undefined, + }) + }) + + it('returns empty object for empty input', () => { + const result = filterParams({}, defaultConfig) + expect(result).toEqual({}) + }) + + it('returns params unchanged when enabled is false', () => { + const config: PiiFilterConfig = { ...defaultConfig, enabled: false } + const params = { + utm_source: 'john@example.com', + utm_medium: 'email', + } + const result = filterParams(params, config) + expect(result).toEqual({ + utm_source: 'john@example.com', + utm_medium: 'email', + }) + }) + + it('works with camelCase keys', () => { + const params = { + utmSource: 'john@example.com', + utmMedium: 'cpc', + } + const result = filterParams(params, defaultConfig) + expect(result).toEqual({ + utmMedium: 'cpc', + }) + expect(result).not.toHaveProperty('utmSource') + }) +}) diff --git a/__tests__/docs.md b/__tests__/docs.md index 9758471..d355545 100644 --- a/__tests__/docs.md +++ b/__tests__/docs.md @@ -18,9 +18,9 @@ Path: @/__tests__ ### Core Implementation - **`setup.ts`**: Creates a fresh sessionStorage mock and location mock in `beforeEach`, ensuring tests are isolated. The storage mock implements `getItem`, `setItem`, `removeItem`, `clear`, `length`, and `key`. Location is stubbed with `href`, `search`, `hash`, `pathname`, `protocol`, `host`, and `hostname`. -- **`core/` tests**: Cover capture (URL parsing, allowed parameters, key format conversion, SSR fallback, sanitization integration), sanitizer (HTML stripping, control character removal, custom patterns, truncation, combined rules), storage (write/read/clear, format conversion, validation of stored data, silent failure), appender (query/fragment placement, preserveExisting, remove, extract), keys (bidirectional conversion, standard and custom keys, detection, validation), and validator (protocol, domain, normalization, mutable default protocol). -- **`config/` tests**: Cover `createConfig` merging semantics (nullish coalescing, array replacement, object merge), `validateConfig` error messages, `loadConfigFromJson` fallback behavior, and sanitize config handling (default inclusion, partial merge, custom pattern preservation, validation of each sanitize field). -- **`react/` tests**: Use `@testing-library/react` `renderHook` and `render` to test `useUtmTracking` (auto-capture, manual capture, clear, appendToUrl with share context and exclusions) and `UtmProvider`/`useUtmContext` (context propagation, error on missing provider). +- **`core/` tests**: Cover capture (URL parsing, allowed parameters, key format conversion, SSR fallback, sanitization integration, PII filtering integration), sanitizer (HTML stripping, control character removal, custom patterns, truncation, combined rules), pii-filter (pattern detection, reject/redact modes, allowlist, callback, disabled patterns, edge cases), storage (write/read/clear, format conversion, validation of stored data, silent failure), appender (query/fragment placement, preserveExisting, remove, extract), keys (bidirectional conversion, standard and custom keys, detection, validation), and validator (protocol, domain, normalization, mutable default protocol). +- **`config/` tests**: Cover `createConfig` merging semantics (nullish coalescing, array replacement, object merge), `validateConfig` error messages, `loadConfigFromJson` fallback behavior, sanitize config handling (default inclusion, partial merge, custom pattern preservation, validation of each sanitize field), and piiFiltering config handling (default inclusion, partial merge, custom patterns replacement, mode validation). +- **`react/` tests**: Use `@testing-library/react` `renderHook` and `render` to test `useUtmTracking` (auto-capture, manual capture, clear, appendToUrl with share context and exclusions, sanitization, PII filtering) and `UtmProvider`/`useUtmContext` (context propagation, error on missing provider). ### Things to Know diff --git a/__tests__/react/useUtmTracking.test.tsx b/__tests__/react/useUtmTracking.test.tsx index 9ca004a..dfe2567 100644 --- a/__tests__/react/useUtmTracking.test.tsx +++ b/__tests__/react/useUtmTracking.test.tsx @@ -327,6 +327,31 @@ describe('useUtmTracking', () => { }) }) + describe('PII filtering', () => { + it('filters PII values when piiFiltering is enabled', () => { + vi.stubGlobal('location', { + href: 'https://example.com?utm_source=john@example.com&utm_medium=email', + search: '?utm_source=john@example.com&utm_medium=email', + }) + + const { result } = renderHook(() => + useUtmTracking({ + config: { + captureOnMount: false, + piiFiltering: { enabled: true }, + }, + }), + ) + + act(() => { + result.current.capture() + }) + + expect(result.current.utmParameters).not.toHaveProperty('utm_source') + expect(result.current.utmParameters?.utm_medium).toBe('email') + }) + }) + describe('key format', () => { it('uses snake_case by default', () => { vi.stubGlobal('location', { diff --git a/src/config/defaults.ts b/src/config/defaults.ts index bfea562..6e19276 100644 --- a/src/config/defaults.ts +++ b/src/config/defaults.ts @@ -4,7 +4,7 @@ * Provides sensible defaults for UTM toolkit configuration. */ -import type { ResolvedUtmConfig, SanitizeConfig } from '../types' +import type { PiiFilterConfig, PiiPattern, ResolvedUtmConfig, SanitizeConfig } from '../types' /** * Default sanitization configuration @@ -17,6 +17,42 @@ export const DEFAULT_SANITIZE_CONFIG: SanitizeConfig = { maxLength: 200, } +/** + * Built-in PII detection patterns + */ +export const DEFAULT_PII_PATTERNS: PiiPattern[] = [ + { + name: 'email', + pattern: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/, + enabled: true, + }, + { + name: 'phone_international', + pattern: /\+\d{10,15}\b/, + enabled: true, + }, + { + name: 'phone_uk', + pattern: /\b(?:0|\+44)\d{9,10}\b/, + enabled: true, + }, + { + name: 'phone_us', + pattern: /\b\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/, + enabled: true, + }, +] + +/** + * Default PII filtering configuration + * PII filtering is disabled by default but has sensible defaults when enabled + */ +export const DEFAULT_PII_FILTER_CONFIG: PiiFilterConfig = { + enabled: false, + mode: 'reject', + patterns: [...DEFAULT_PII_PATTERNS], +} + /** * Standard UTM parameters (snake_case format for URLs) */ @@ -63,6 +99,9 @@ export const DEFAULT_CONFIG: ResolvedUtmConfig = { /** Sanitization disabled by default */ sanitize: { ...DEFAULT_SANITIZE_CONFIG }, + + /** PII filtering disabled by default (deep copy to prevent shared references) */ + piiFiltering: { ...DEFAULT_PII_FILTER_CONFIG, patterns: [...DEFAULT_PII_PATTERNS] }, } /** @@ -77,5 +116,9 @@ export function getDefaultConfig(): ResolvedUtmConfig { shareContextParams: { ...DEFAULT_CONFIG.shareContextParams }, excludeFromShares: [...DEFAULT_CONFIG.excludeFromShares], sanitize: { ...DEFAULT_CONFIG.sanitize }, + piiFiltering: { + ...DEFAULT_CONFIG.piiFiltering, + patterns: DEFAULT_CONFIG.piiFiltering.patterns.map((p) => ({ ...p })), + }, } } diff --git a/src/config/docs.md b/src/config/docs.md index 392aa55..302c964 100644 --- a/src/config/docs.md +++ b/src/config/docs.md @@ -14,15 +14,16 @@ Path: @/src/config - `@/src/debug` imports `getDefaultConfig()` from here as a fallback when no config is provided to diagnostic functions. - `DEFAULT_CONFIG` and `STANDARD_UTM_PARAMETERS` are the canonical definitions of default behavior (enabled, snake_case, sessionStorage key `utm_parameters`, auto-capture on mount, append to shares, the 6 standard UTM params). - `DEFAULT_SANITIZE_CONFIG` defines the sanitization defaults: disabled by default, but with safe-by-default values when enabled (`stripHtml: true`, `stripControlChars: true`, `maxLength: 200`). It is exported as a public constant and spread into `DEFAULT_CONFIG.sanitize`. +- `DEFAULT_PII_PATTERNS` defines built-in PII detection regexes (email, phone_international, phone_uk, phone_us), all enabled by default. `DEFAULT_PII_FILTER_CONFIG` wraps these patterns with `enabled: false` and `mode: 'reject'`. Both are exported as public constants and used in `DEFAULT_CONFIG.piiFiltering`. - The config system does not perform side effects -- it is pure data transformation. ### Core Implementation -- `createConfig()` merges a partial user config with defaults using nullish coalescing (`??`) for scalar fields. Array fields (`allowedParameters`, `excludeFromShares`) are replaced wholesale when provided by the user, not merged. Object fields (`defaultParams`, `shareContextParams`) are shallow-merged. The `sanitize` field is merged via `mergeSanitizeConfig()`, which uses nullish coalescing per-field so partial overrides preserve unspecified defaults. +- `createConfig()` merges a partial user config with defaults using nullish coalescing (`??`) for scalar fields. Array fields (`allowedParameters`, `excludeFromShares`) are replaced wholesale when provided by the user, not merged. Object fields (`defaultParams`, `shareContextParams`) are shallow-merged. The `sanitize` field is merged via `mergeSanitizeConfig()` and `piiFiltering` via `mergePiiFilterConfig()`, both using nullish coalescing per-field so partial overrides preserve unspecified defaults. For `piiFiltering`, user-provided `patterns` replace the defaults entirely (array replacement semantics), while scalar fields like `enabled` and `mode` merge individually. - `mergeConfig()` follows the same semantics but takes a `ResolvedUtmConfig` as the base instead of implicitly using defaults -- useful for layering configurations. - `loadConfigFromJson()` accepts `unknown` input, validates it is a non-null non-array object, then delegates to `createConfig()`. Invalid input falls back to defaults with a `console.warn`. -- `validateConfig()` performs runtime type checking on each config field and returns an array of error message strings (empty array means valid). Sanitize validation checks that `sanitize` is an object, `enabled`/`stripHtml`/`stripControlChars` are booleans, and `maxLength` is a positive number. -- `getDefaultConfig()` returns a shallow copy of `DEFAULT_CONFIG` with cloned arrays and objects to prevent mutation of the shared constant. +- `validateConfig()` performs runtime type checking on each config field and returns an array of error message strings (empty array means valid). Sanitize validation checks that `sanitize` is an object, `enabled`/`stripHtml`/`stripControlChars` are booleans, `maxLength` is a positive finite number, and `customPattern` is a RegExp. PII filtering validation checks that `piiFiltering` is an object, `enabled` is boolean, `mode` is `'reject'` or `'redact'`, and `patterns` is an array. +- `getDefaultConfig()` returns a shallow copy of `DEFAULT_CONFIG` with cloned arrays and objects to prevent mutation of the shared constant. For `piiFiltering`, it deep-copies each pattern object (`patterns.map(p => ({...p}))`) since patterns contain RegExp references that should not be shared. ### Things to Know diff --git a/src/config/index.ts b/src/config/index.ts index c3fea51..dd7cb5f 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -5,6 +5,8 @@ export { DEFAULT_CONFIG, DEFAULT_SANITIZE_CONFIG, + DEFAULT_PII_PATTERNS, + DEFAULT_PII_FILTER_CONFIG, STANDARD_UTM_PARAMETERS, getDefaultConfig, } from './defaults' diff --git a/src/config/loader.ts b/src/config/loader.ts index 575c2b9..bfb9b8b 100644 --- a/src/config/loader.ts +++ b/src/config/loader.ts @@ -7,6 +7,7 @@ import type { UtmConfig, ResolvedUtmConfig, + PiiFilterConfig, SanitizeConfig, ShareContextParams, UtmParameters, @@ -58,6 +59,25 @@ function mergeSanitizeConfig( } } +/** + * Merge PII filter config with defaults + */ +function mergePiiFilterConfig( + base: PiiFilterConfig, + override: Partial | undefined, +): PiiFilterConfig { + if (!override) { + return { ...base, patterns: base.patterns.map((p) => ({ ...p })) } + } + return { + enabled: override.enabled ?? base.enabled, + mode: override.mode ?? base.mode, + patterns: override.patterns ? [...override.patterns] : base.patterns.map((p) => ({ ...p })), + allowlistPattern: override.allowlistPattern ?? base.allowlistPattern, + onPiiDetected: override.onPiiDetected ?? base.onPiiDetected, + } +} + /** * Merge two UTM parameter objects */ @@ -108,6 +128,7 @@ export function createConfig(userConfig?: Partial): ResolvedUtmConfig ? [...userConfig.excludeFromShares] : defaults.excludeFromShares, sanitize: mergeSanitizeConfig(defaults.sanitize, userConfig.sanitize), + piiFiltering: mergePiiFilterConfig(defaults.piiFiltering, userConfig.piiFiltering), } } @@ -140,6 +161,7 @@ export function mergeConfig( ? [...override.excludeFromShares] : [...base.excludeFromShares], sanitize: mergeSanitizeConfig(base.sanitize, override.sanitize), + piiFiltering: mergePiiFilterConfig(base.piiFiltering, override.piiFiltering), } } @@ -271,6 +293,52 @@ export function validateConfig(config: unknown): string[] { } } + if (c.piiFiltering !== undefined) { + if ( + typeof c.piiFiltering !== 'object' || + c.piiFiltering === null || + Array.isArray(c.piiFiltering) + ) { + errors.push('piiFiltering must be an object') + } else { + const p = c.piiFiltering as Record + if (p.enabled !== undefined && typeof p.enabled !== 'boolean') { + errors.push('piiFiltering.enabled must be a boolean') + } + if (p.mode !== undefined && p.mode !== 'reject' && p.mode !== 'redact') { + errors.push('piiFiltering.mode must be "reject" or "redact"') + } + if (p.patterns !== undefined) { + if (!Array.isArray(p.patterns)) { + errors.push('piiFiltering.patterns must be an array') + } else { + for (let i = 0; i < p.patterns.length; i++) { + const pat = p.patterns[i] as Record + if (typeof pat !== 'object' || pat === null) { + errors.push(`piiFiltering.patterns[${i}] must be an object`) + continue + } + if (typeof pat.name !== 'string') { + errors.push(`piiFiltering.patterns[${i}].name must be a string`) + } + if (!(pat.pattern instanceof RegExp)) { + errors.push(`piiFiltering.patterns[${i}].pattern must be a RegExp`) + } + if (typeof pat.enabled !== 'boolean') { + errors.push(`piiFiltering.patterns[${i}].enabled must be a boolean`) + } + } + } + } + if (p.allowlistPattern !== undefined && !(p.allowlistPattern instanceof RegExp)) { + errors.push('piiFiltering.allowlistPattern must be a RegExp') + } + if (p.onPiiDetected !== undefined && typeof p.onPiiDetected !== 'function') { + errors.push('piiFiltering.onPiiDetected must be a function') + } + } + } + return errors } diff --git a/src/core/capture.ts b/src/core/capture.ts index fa1877a..47177aa 100644 --- a/src/core/capture.ts +++ b/src/core/capture.ts @@ -5,9 +5,10 @@ * Supports standard UTM parameters and custom utm_ prefixed parameters. */ -import type { KeyFormat, SanitizeConfig, UtmParameters } from '../types' -import { DEFAULT_SANITIZE_CONFIG } from '../config/defaults' +import type { KeyFormat, PiiFilterConfig, SanitizeConfig, UtmParameters } from '../types' +import { DEFAULT_PII_FILTER_CONFIG, DEFAULT_SANITIZE_CONFIG } from '../config/defaults' import { convertParams, isSnakeCaseUtmKey } from './keys' +import { filterParams } from './pii-filter' import { sanitizeParams } from './sanitizer' /** @@ -22,6 +23,9 @@ export interface CaptureOptions { /** Sanitization configuration — when enabled, strips dangerous characters from values */ sanitize?: Partial + + /** PII filtering configuration — when enabled, detects and filters PII from values */ + piiFiltering?: Partial } /** @@ -64,7 +68,7 @@ function isBrowser(): boolean { * ``` */ export function captureUtmParameters(url?: string, options: CaptureOptions = {}): UtmParameters { - const { keyFormat = 'snake_case', allowedParameters, sanitize } = options + const { keyFormat = 'snake_case', allowedParameters, sanitize, piiFiltering } = options // Get URL, defaulting to current page URL in browser const urlString = url ?? (isBrowser() ? window.location.href : '') @@ -96,10 +100,20 @@ export function captureUtmParameters(url?: string, options: CaptureOptions = {}) // Apply sanitization if configured and enabled const resolvedSanitize: SanitizeConfig = { ...DEFAULT_SANITIZE_CONFIG, ...sanitize } - const captured: UtmParameters = resolvedSanitize.enabled + const sanitized: UtmParameters = resolvedSanitize.enabled ? sanitizeParams(params as UtmParameters, resolvedSanitize) : (params as UtmParameters) + // Apply PII filtering if configured and enabled + const resolvedPiiFilter: PiiFilterConfig = { + ...DEFAULT_PII_FILTER_CONFIG, + ...piiFiltering, + patterns: piiFiltering?.patterns ?? [...DEFAULT_PII_FILTER_CONFIG.patterns], + } + const captured: UtmParameters = resolvedPiiFilter.enabled + ? filterParams(sanitized, resolvedPiiFilter) + : sanitized + // Convert to target format if needed if (keyFormat === 'camelCase') { return convertParams(captured, 'camelCase') diff --git a/src/core/docs.md b/src/core/docs.md index f7829cc..999989f 100644 --- a/src/core/docs.md +++ b/src/core/docs.md @@ -23,7 +23,7 @@ The data flow through the core modules follows this path: URL string | v -[capture.ts] -- parses URL, filters to utm_* keys, applies allowedParameters, sanitizes values, converts key format +[capture.ts] -- parses URL, filters to utm_* keys, applies allowedParameters, sanitizes values, filters PII, converts key format | v UtmParameters object @@ -40,10 +40,12 @@ URL string with UTM params - **keys.ts**: Bidirectional key conversion between `snake_case` and `camelCase`. Uses lookup tables (`SNAKE_TO_CAMEL`, `CAMEL_TO_SNAKE`) for the 6 standard keys and regex-based conversion for custom keys. `isSnakeCaseUtmKey` checks for `utm_` prefix; `isCamelCaseUtmKey` checks for `utm` followed by an uppercase letter. `detectKeyFormat` scans keys and returns the first format found, defaulting to `snake_case` for empty objects. -- **capture.ts**: `captureUtmParameters()` takes a URL string (defaulting to `window.location.href`), parses it via `new URL()`, iterates `searchParams`, and filters to keys passing `isSnakeCaseUtmKey`. Optionally filters by an `allowedParameters` set, applies value sanitization when `sanitize.enabled` is true, then converts output via `convertParams`. The pipeline order is: extract params --> filter by allowlist --> sanitize --> convert key format. +- **capture.ts**: `captureUtmParameters()` takes a URL string (defaulting to `window.location.href`), parses it via `new URL()`, iterates `searchParams`, and filters to keys passing `isSnakeCaseUtmKey`. The pipeline order is: extract params --> filter by allowlist --> sanitize --> PII filter --> convert key format. Both sanitization and PII filtering resolve their config by spreading user-provided partial config over the corresponding `DEFAULT_*` constants from `@/src/config/defaults.ts`, then check `enabled` before running. - **sanitizer.ts**: `sanitizeValue()` strips dangerous characters from a single string value. Rules apply in order: HTML-significant characters (`< > " ' \``) --> control characters (\x00-\x1F except tab/newline/CR) --> optional custom regex pattern --> trim --> truncate to `maxLength`. `sanitizeParams()` applies `sanitizeValue()` to every non-undefined value in a `UtmParameters` object, returning a new object with keys preserved unchanged. Both functions are pure and stateless; all behavior is driven by the `SanitizeConfig` argument. +- **pii-filter.ts**: `detectPii()` tests a value against an array of `PiiPattern` objects and returns the first match (or null). `filterValue()` checks a single value: if an `allowlistPattern` is configured, the value must match it to pass (allowlist takes precedence over pattern detection); otherwise, it falls back to `detectPii()`. In `reject` mode, detected PII causes the value to be dropped (returns `undefined`); in `redact` mode, the value is replaced with `'[REDACTED]'`. `filterParams()` applies `filterValue()` to every non-undefined value, omitting keys entirely in reject mode when PII is found. The optional `onPiiDetected` callback fires synchronously with `(key, value, patternName)`. + - **storage.ts**: Uses sessionStorage with a configurable key (default: `utm_parameters`). Write operations skip empty param objects and fail silently with `console.warn`. Read operations validate parsed JSON with `isValidStoredData()`, which checks that all keys pass `isUtmKey` and all values are strings or undefined. - **appender.ts**: `appendUtmParameters()` always converts input params to snake_case before appending to URLs (URL parameters are conventionally snake_case). Supports query string or fragment placement via `AppendOptions.toFragment`. Uses a custom `buildQueryString()` that omits `=` for empty-string values. When adding to query, it also cleans conflicting UTM params from the fragment (and vice versa). `removeUtmParameters()` strips UTM params from both query and fragment. `extractUtmParameters()` pulls UTM params from both locations, with fragment params taking precedence. @@ -57,6 +59,8 @@ URL string with UTM params - **Silent failure**: Storage and capture operations never throw. Errors produce `console.warn` messages and return fallback values. The appender returns the original URL unchanged on failure. - **validator.ts mutable state**: `defaultProtocol` is module-level mutable state modified via `setDefaultProtocol()`. This is global -- all callers share the same default protocol. Tests that call `setDefaultProtocol()` should restore the original value. - **Fragment parameter handling in appender**: When appending to query, conflicting UTM params are removed from the fragment. When appending to fragment, conflicting UTM params are removed from the query. Only fragments that contain `=` are treated as parameter-bearing; plain anchors like `#section` are left alone. -- **Sanitization is capture-time only**: Sanitization runs during `captureUtmParameters()` before values enter the system. It does not run at storage time, append time, or on read. This means values stored in sessionStorage are already sanitized if sanitization was enabled at capture. +- **Sanitization and PII filtering are capture-time only**: Both run during `captureUtmParameters()` before values enter the system. They do not run at storage time, append time, or on read. Values stored in sessionStorage are already sanitized/filtered if these features were enabled at capture. +- **PII filter runs after sanitization**: This ordering matters because sanitization may strip characters (e.g., HTML angle brackets) that could affect whether a PII regex matches. By sanitizing first, PII detection operates on the cleaned value. +- **Regex `lastIndex` reset**: Both `sanitizer.ts` (for `customPattern`) and `pii-filter.ts` (for each `PiiPattern.pattern` and `allowlistPattern`) reset `lastIndex = 0` before calling `.test()` or `.replace()`. This prevents stale state when a regex with the `g` flag is reused across calls. Created and maintained by Nori. diff --git a/src/core/index.ts b/src/core/index.ts index 67d7114..509339c 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -51,6 +51,9 @@ export { // Sanitizer utilities export { sanitizeValue, sanitizeParams } from './sanitizer' +// PII filter utilities +export { detectPii, filterValue, filterParams } from './pii-filter' + // Validator utilities export { validateUrl, diff --git a/src/core/pii-filter.ts b/src/core/pii-filter.ts new file mode 100644 index 0000000..3506d6d --- /dev/null +++ b/src/core/pii-filter.ts @@ -0,0 +1,111 @@ +/** + * PII Filter + * + * Detects and filters personally identifiable information from UTM parameter values. + * Prevents email addresses, phone numbers, and other PII from leaking into analytics. + */ + +import type { PiiFilterConfig, PiiPattern, UtmParameters } from '../types' + +/** + * Detect PII in a value by testing against enabled patterns + * + * @param value - The value to check + * @param patterns - PII patterns to test against + * @returns The first matching pattern, or null if no PII detected + */ +export function detectPii(value: string, patterns: PiiPattern[]): PiiPattern | null { + for (const pattern of patterns) { + if (!pattern.enabled) { + continue + } + pattern.pattern.lastIndex = 0 + if (pattern.pattern.test(value)) { + return pattern + } + } + return null +} + +/** + * Filter a single UTM parameter value for PII + * + * Checks allowlist first (if configured), then pattern-based detection. + * In reject mode, returns undefined for PII values. + * In redact mode, returns '[REDACTED]' for PII values. + * + * @param key - The parameter key (for callback reporting) + * @param value - The parameter value to check + * @param config - PII filter configuration + * @returns The original value if clean, undefined (reject) or '[REDACTED]' (redact) if PII detected + */ +export function filterValue( + key: string, + value: string, + config: PiiFilterConfig, +): string | undefined { + if (!config.enabled) { + return value + } + + // Allowlist check takes precedence + if (config.allowlistPattern) { + config.allowlistPattern.lastIndex = 0 + if (!config.allowlistPattern.test(value)) { + try { + config.onPiiDetected?.(key, value, 'allowlist') + } catch { + // Callback errors should not break the filter pipeline + } + return config.mode === 'redact' ? '[REDACTED]' : undefined + } + // Value passes allowlist — no further checks needed + return value + } + + // Pattern-based PII detection + const detected = detectPii(value, config.patterns) + if (detected) { + try { + config.onPiiDetected?.(key, value, detected.name) + } catch { + // Callback errors should not break the filter pipeline + } + return config.mode === 'redact' ? '[REDACTED]' : undefined + } + + return value +} + +/** + * Filter all values in a UTM parameters object for PII + * + * In reject mode, keys with PII values are removed from the result. + * In redact mode, PII values are replaced with '[REDACTED]'. + * + * @param params - UTM parameters object + * @param config - PII filter configuration + * @returns New object with PII values filtered + */ +export function filterParams(params: UtmParameters, config: PiiFilterConfig): UtmParameters { + if (!config.enabled) { + return { ...params } + } + + const result: Record = {} + + for (const [key, value] of Object.entries(params)) { + if (value === undefined) { + result[key] = undefined + continue + } + + const filtered = filterValue(key, value, config) + if (filtered !== undefined) { + result[key] = filtered + } + // In reject mode, undefined means the key is omitted entirely + } + + return result as UtmParameters +} diff --git a/src/docs.md b/src/docs.md index 6854fea..1dec067 100644 --- a/src/docs.md +++ b/src/docs.md @@ -32,11 +32,11 @@ Consumer API - **types/** (`@/src/types`): Shared type definitions consumed by all other modules. Defines the dual key format system (snake_case/camelCase) and configuration interfaces. - **config/** (`@/src/config`): Pure configuration creation and validation. Merges partial user config with defaults to produce `ResolvedUtmConfig`. -- **core/** (`@/src/core`): Framework-agnostic UTM operations -- capture from URLs, sanitize parameter values, persist in sessionStorage, append to outbound URLs, convert key formats, validate URLs. All SSR-safe. +- **core/** (`@/src/core`): Framework-agnostic UTM operations -- capture from URLs, sanitize parameter values, filter PII, persist in sessionStorage, append to outbound URLs, convert key formats, validate URLs. All SSR-safe. - **debug/** (`@/src/debug`): Development-time diagnostics. Assembles state snapshots and provides formatted console output and optional `window.utmDebug` helpers. - **react/** (`@/src/react`): React hook and context provider that orchestrate the core modules into stateful React APIs with auto-capture-on-mount behavior. -**Key data flow**: URL with UTM params --> `capture` (with optional sanitization) --> `store` in sessionStorage --> `appendToUrl` for outbound link generation. +**Key data flow**: URL with UTM params --> `capture` (with optional sanitization and PII filtering) --> `store` in sessionStorage --> `appendToUrl` for outbound link generation. ### Things to Know diff --git a/src/index.ts b/src/index.ts index b100e14..7f6f9bb 100644 --- a/src/index.ts +++ b/src/index.ts @@ -52,6 +52,11 @@ export { sanitizeValue, sanitizeParams, + // PII Filter + detectPii, + filterValue, + filterParams, + // Validator validateUrl, normalizeUrl, @@ -69,6 +74,8 @@ export { export { DEFAULT_CONFIG, DEFAULT_SANITIZE_CONFIG, + DEFAULT_PII_PATTERNS, + DEFAULT_PII_FILTER_CONFIG, STANDARD_UTM_PARAMETERS, getDefaultConfig, createConfig, @@ -100,4 +107,6 @@ export type { UtmProviderProps, DiagnosticInfo, SanitizeConfig, + PiiPattern, + PiiFilterConfig, } from './types' diff --git a/src/react/docs.md b/src/react/docs.md index 4e82174..3eb82cc 100644 --- a/src/react/docs.md +++ b/src/react/docs.md @@ -29,7 +29,7 @@ useState initializer --> getStoredUtmParameters() --> initial state from session useEffect (once, via ref guard) --> if captureOnMount && enabled: | v -capture() --> captureUtmParameters(window.location.href) --> if has params: +capture() --> captureUtmParameters(window.location.href, {sanitize, piiFiltering}) --> if has params: | storeUtmParameters() | setUtmParameters() | else if has defaultParams: diff --git a/src/react/useUtmTracking.ts b/src/react/useUtmTracking.ts index 5327dfe..9e7c88e 100644 --- a/src/react/useUtmTracking.ts +++ b/src/react/useUtmTracking.ts @@ -122,6 +122,7 @@ export function useUtmTracking(options: UseUtmTrackingOptions = {}): UseUtmTrack keyFormat: config.keyFormat, allowedParameters: config.allowedParameters, sanitize: config.sanitize, + piiFiltering: config.piiFiltering, }) // Only store if we found some parameters diff --git a/src/types/docs.md b/src/types/docs.md index 6c0528c..dbbea8a 100644 --- a/src/types/docs.md +++ b/src/types/docs.md @@ -24,6 +24,7 @@ Path: @/src/types - `ShareContextParams` uses `Partial>` with a `default` key for base params and platform-specific overrides, enabling a layered merge strategy in `useUtmTracking`'s `appendToUrl` callback. - `AppendOptions` controls whether UTM params go into query string or fragment, and whether existing UTM params on the target URL are preserved. - `SanitizeConfig` defines value sanitization behavior with fields for `enabled`, `stripHtml`, `stripControlChars`, `maxLength`, and an optional `customPattern` (RegExp). It appears as `Partial` on `UtmConfig` (user input) and as a required `SanitizeConfig` on `ResolvedUtmConfig` (resolved output). This follows the same partial-in/resolved-out pattern used by the rest of the config system. +- `PiiPattern` defines a named regex pattern with an `enabled` toggle. `PiiFilterConfig` groups these patterns with a `mode` (`'reject'` or `'redact'`), an optional `allowlistPattern` (RegExp for strict validation), and an optional synchronous `onPiiDetected` callback. Like `SanitizeConfig`, it appears as `Partial` on `UtmConfig` and as a required `PiiFilterConfig` on `ResolvedUtmConfig`. ### Things to Know diff --git a/src/types/index.ts b/src/types/index.ts index b412654..0c01414 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -124,6 +124,47 @@ export interface SanitizeConfig { customPattern?: RegExp } +/** + * A named PII detection pattern + */ +export interface PiiPattern { + /** Identifier for this pattern (e.g. 'email', 'phone_us') */ + name: string + + /** Regex to detect PII in a value */ + pattern: RegExp + + /** Whether this pattern is active */ + enabled: boolean +} + +/** + * Configuration for PII filtering + */ +export interface PiiFilterConfig { + /** Enable PII filtering (default: false) */ + enabled: boolean + + /** How to handle detected PII: 'reject' discards the value, 'redact' replaces it with [REDACTED] */ + mode: 'reject' | 'redact' + + /** PII detection patterns (default: built-in email + phone patterns) */ + patterns: PiiPattern[] + + /** Optional strict allowlist — values must match this pattern to be accepted (takes precedence over PII patterns) */ + allowlistPattern?: RegExp + + /** + * Optional callback fired when PII is detected. + * + * WARNING: The `value` parameter contains the raw detected PII. + * Do NOT log or transmit this value to analytics services, + * as that would defeat the purpose of PII filtering. + * This callback is intended for counting/alerting only. + */ + onPiiDetected?: (param: string, value: string, patternName: string) => void +} + /** * Main configuration interface for UTM toolkit */ @@ -160,6 +201,9 @@ export interface UtmConfig { /** Value sanitization configuration */ sanitize?: Partial + + /** PII filtering configuration */ + piiFiltering?: Partial } /** @@ -176,6 +220,7 @@ export interface ResolvedUtmConfig { shareContextParams: ShareContextParams excludeFromShares: string[] sanitize: SanitizeConfig + piiFiltering: PiiFilterConfig } /**