diff --git a/packages/build-tools/src/context.ts b/packages/build-tools/src/context.ts index 6f45a2db06..e762b31635 100644 --- a/packages/build-tools/src/context.ts +++ b/packages/build-tools/src/context.ts @@ -45,6 +45,7 @@ export type ArtifactToUpload = type: GenericArtifactType; name: string; paths: string[]; + metadata?: Record; }; export interface BuildContextOptions { diff --git a/packages/build-tools/src/steps/easFunctions.ts b/packages/build-tools/src/steps/easFunctions.ts index e38e5a87a9..ec0ac8c8b7 100644 --- a/packages/build-tools/src/steps/easFunctions.ts +++ b/packages/build-tools/src/steps/easFunctions.ts @@ -98,7 +98,7 @@ export function getEasFunctions(ctx: CustomBuildContext): BuildFunction[] { createUploadToAscBuildFunction(), createReportMaestroTestResultsFunction(ctx), - createMaestroTestsBuildFunction(), + createMaestroTestsBuildFunction(ctx), ]; if (ctx.hasBuildJob()) { diff --git a/packages/build-tools/src/steps/functions/__tests__/maestroScreenshots.test.ts b/packages/build-tools/src/steps/functions/__tests__/maestroScreenshots.test.ts new file mode 100644 index 0000000000..00db6d78c6 --- /dev/null +++ b/packages/build-tools/src/steps/functions/__tests__/maestroScreenshots.test.ts @@ -0,0 +1,287 @@ +import fs from 'fs/promises'; +import os from 'os'; +import path from 'path'; + +import { createMockLogger } from '../../../__tests__/utils/logger'; +import { + type HarvestedScreenshot, + computePureFailureFlowNames, + harvestFailureScreenshotsAsync, + parseFailureScreenshotFilename, + selectFailureScreenshots, +} from '../maestroScreenshots'; + +describe(parseFailureScreenshotFilename, () => { + it('parses a plain failure screenshot', () => { + expect(parseFailureScreenshotFilename('screenshot-❌-1781186692250-(Login Flow).png')).toEqual({ + flowName: 'Login Flow', + capturedAtMs: 1781186692250, + }); + }); + + it('parses a sharded failure screenshot', () => { + expect( + parseFailureScreenshotFilename('screenshot-shard-2-❌-1781186692250-(Login Flow).png') + ).toEqual({ + flowName: 'Login Flow', + capturedAtMs: 1781186692250, + }); + }); + + it('handles parentheses inside the flow name (right-to-left parse)', () => { + expect( + parseFailureScreenshotFilename('screenshot-❌-1781186692250-(Login (staging) Flow).png') + ).toEqual({ + flowName: 'Login (staging) Flow', + capturedAtMs: 1781186692250, + }); + }); + + it('rejects non-failure screenshots (✅, ⚠️) and unrelated files', () => { + expect( + parseFailureScreenshotFilename('screenshot-✅-1781186692250-(Login Flow).png') + ).toBeNull(); + expect(parseFailureScreenshotFilename('commands-(Login Flow).json')).toBeNull(); + expect(parseFailureScreenshotFilename('android-maestro-junit-attempt-0.xml')).toBeNull(); + }); +}); + +describe(harvestFailureScreenshotsAsync, () => { + const logger = createMockLogger(); + let testsDirectory: string; + + beforeEach(async () => { + testsDirectory = await fs.mkdtemp(path.join(os.tmpdir(), 'maestro-harvest-test-')); + }); + + afterEach(async () => { + await fs.rm(testsDirectory, { recursive: true, force: true }); + }); + + async function makeDebugDir(name: string, files: string[], mtimeMs: number): Promise { + const dir = path.join(testsDirectory, name); + await fs.mkdir(dir); + for (const file of files) { + await fs.writeFile(path.join(dir, file), ''); + } + const when = new Date(mtimeMs); + await fs.utimes(dir, when, when); + } + + it('returns shots from dirs modified at/after sinceMtimeMs, attributed to the attempt', async () => { + const sinceMtimeMs = 1_000_000; + await makeDebugDir( + 'attempt-dir', + ['screenshot-❌-1781186692250-(Login Flow).png', 'commands.json'], + sinceMtimeMs + 5_000 + ); + + const shots = await harvestFailureScreenshotsAsync({ + testsDirectory, + capturedSinceMs: sinceMtimeMs, + attemptIndex: 1, + logger, + }); + + expect(shots).toEqual([ + { + fileAbsPath: path.join( + testsDirectory, + 'attempt-dir', + 'screenshot-❌-1781186692250-(Login Flow).png' + ), + displayName: 'Failure Screenshot: Login Flow (attempt 2)', + metadata: { + kind: 'maestro-test-screenshot', + flowName: 'Login Flow', + attemptIndex: 1, + capturedAtMs: 1781186692250, + }, + }, + ]); + }); + + it('ignores dirs older than sinceMtimeMs (previous attempts)', async () => { + const sinceMtimeMs = 1_000_000; + await makeDebugDir( + 'old-dir', + ['screenshot-❌-1781186692250-(Old Flow).png'], + sinceMtimeMs - 5_000 + ); + + const shots = await harvestFailureScreenshotsAsync({ + testsDirectory, + capturedSinceMs: sinceMtimeMs, + attemptIndex: 0, + logger, + }); + + expect(shots).toEqual([]); + }); + + it('scans multiple new dirs (maestro <2.5.0 split-dir bug)', async () => { + const sinceMtimeMs = 1_000_000; + await makeDebugDir( + 'dir-a', + [`screenshot-❌-${sinceMtimeMs + 100}-(Flow A).png`], + sinceMtimeMs + 1_000 + ); + await makeDebugDir( + 'dir-b', + [`screenshot-❌-${sinceMtimeMs + 200}-(Flow B).png`], + sinceMtimeMs + 2_000 + ); + + const shots = await harvestFailureScreenshotsAsync({ + testsDirectory, + capturedSinceMs: sinceMtimeMs, + attemptIndex: 0, + logger, + }); + + expect(shots.map(shot => shot.metadata.flowName).sort()).toEqual(['Flow A', 'Flow B']); + }); + + it('keeps the disk flow name (slashes already substituted) and strips the shard prefix', async () => { + const sinceMtimeMs = 1_000_000; + await makeDebugDir( + 'sharded-dir', + ['screenshot-shard-3-❌-1781186692250-(Login_Sub Flow).png'], + sinceMtimeMs + 1_000 + ); + + const shots = await harvestFailureScreenshotsAsync({ + testsDirectory, + capturedSinceMs: sinceMtimeMs, + attemptIndex: 0, + logger, + }); + + expect(shots).toHaveLength(1); + expect(shots[0].metadata.flowName).toBe('Login_Sub Flow'); + }); + + it('ignores a stale screenshot whose capturedAtMs predates sinceMtimeMs (dir touched later)', async () => { + const sinceMtimeMs = 1_000_000; + // The dir mtime is AFTER this attempt started (so it passes the dir gate), but the + // screenshot inside was captured BEFORE it — a prior attempt's shot in a dir touched later. + await makeDebugDir( + 'touched-dir', + ['screenshot-❌-999000-(Stale Flow).png'], + sinceMtimeMs + 5_000 + ); + + const shots = await harvestFailureScreenshotsAsync({ + testsDirectory, + capturedSinceMs: sinceMtimeMs, + attemptIndex: 1, + logger, + }); + + expect(shots).toEqual([]); + }); + + it('logs and returns [] when the tests dir cannot be read', async () => { + const shots = await harvestFailureScreenshotsAsync({ + testsDirectory: path.join(testsDirectory, 'does-not-exist'), + capturedSinceMs: 0, + attemptIndex: 0, + logger, + }); + + expect(shots).toEqual([]); + }); +}); + +describe(computePureFailureFlowNames, () => { + it('classifies a flow that failed every attempt as pure', () => { + const pure = computePureFailureFlowNames([ + { name: 'Login Flow', status: 'failed' }, + { name: 'Login Flow', status: 'failed' }, + ]); + expect([...pure]).toEqual(['Login Flow']); + }); + + it('excludes a flaky flow (failed then passed)', () => { + const pure = computePureFailureFlowNames([ + { name: 'Flaky Flow', status: 'failed' }, + { name: 'Flaky Flow', status: 'passed' }, + ]); + expect(pure.size).toBe(0); + }); + + it('excludes a fail-pass-fail flow (it passed at some point)', () => { + const pure = computePureFailureFlowNames([ + { name: 'FPF', status: 'failed' }, + { name: 'FPF', status: 'passed' }, + { name: 'FPF', status: 'failed' }, + ]); + expect(pure.size).toBe(0); + }); + + it('excludes a flow that only passed', () => { + expect(computePureFailureFlowNames([{ name: 'Happy', status: 'passed' }]).size).toBe(0); + }); + + it('normalizes the JUnit name to the screenshot form (/ -> _)', () => { + const pure = computePureFailureFlowNames([{ name: 'sub/Login', status: 'failed' }]); + expect([...pure]).toEqual(['sub_Login']); + }); + + it('excludes flows that collide only after / -> _ normalization (one failed, one passed)', () => { + const pure = computePureFailureFlowNames([ + { name: 'sub/Login', status: 'failed' }, + { name: 'sub_Login', status: 'passed' }, + ]); + expect(pure.size).toBe(0); + }); + + it('returns an empty set for no testcases', () => { + expect(computePureFailureFlowNames([]).size).toBe(0); + }); +}); + +describe(selectFailureScreenshots, () => { + function makeShot(flowName: string, attemptIndex: number): HarvestedScreenshot { + return { + fileAbsPath: `/tmp/${flowName}-${attemptIndex}.png`, + displayName: `Failure Screenshot: ${flowName} (attempt ${attemptIndex + 1})`, + metadata: { + kind: 'maestro-test-screenshot', + flowName, + attemptIndex, + capturedAtMs: 1_000 + attemptIndex, + }, + }; + } + + it('keeps only the final attempt for a pure-failure flow', () => { + const shots = [makeShot('Pure', 0), makeShot('Pure', 1), makeShot('Pure', 2)]; + const selected = selectFailureScreenshots(shots, new Set(['Pure'])); + expect(selected.map(shot => shot.metadata.attemptIndex)).toEqual([2]); + }); + + it('keeps every attempt for a flow not classified pure (flaky)', () => { + const shots = [makeShot('Flaky', 0), makeShot('Flaky', 1)]; + const selected = selectFailureScreenshots(shots, new Set()); + expect(selected.map(shot => shot.metadata.attemptIndex)).toEqual([0, 1]); + }); + + it('reduces pure flows while preserving flaky flows and input order', () => { + const shots = [ + makeShot('Pure', 0), + makeShot('Flaky', 0), + makeShot('Pure', 1), + makeShot('Flaky', 1), + ]; + const selected = selectFailureScreenshots(shots, new Set(['Pure'])); + expect(selected.map(shot => `${shot.metadata.flowName}#${shot.metadata.attemptIndex}`)).toEqual( + ['Flaky#0', 'Pure#1', 'Flaky#1'] + ); + }); + + it('keeps all when the pure set is empty', () => { + const shots = [makeShot('A', 0), makeShot('A', 1)]; + expect(selectFailureScreenshots(shots, new Set())).toHaveLength(2); + }); +}); diff --git a/packages/build-tools/src/steps/functions/__tests__/maestroTests.test.ts b/packages/build-tools/src/steps/functions/__tests__/maestroTests.test.ts index d99b9d6d55..816d0b85eb 100644 --- a/packages/build-tools/src/steps/functions/__tests__/maestroTests.test.ts +++ b/packages/build-tools/src/steps/functions/__tests__/maestroTests.test.ts @@ -1,10 +1,15 @@ -import { SystemError, UserError } from '@expo/eas-build-job'; +import { GenericArtifactType, SystemError, UserError } from '@expo/eas-build-job'; import spawn from '@expo/turtle-spawn'; +import fs from 'fs/promises'; +import os from 'os'; +import path from 'path'; import { createGlobalContextMock } from '../../../__tests__/utils/context'; import { createMockLogger } from '../../../__tests__/utils/logger'; +import { CustomBuildContext } from '../../../customBuildContext'; import * as discovery from '../maestroFlowDiscovery'; import * as parser from '../maestroResultParser'; +import * as maestroScreenshots from '../maestroScreenshots'; import { createMaestroTestsBuildFunction } from '../maestroTests'; jest.mock('@expo/turtle-spawn', () => ({ @@ -17,7 +22,29 @@ jest.mock('../../../utils/retry', () => ({ sleepAsync: jest.fn().mockResolvedValue(undefined), })); +// Partial mock: stub the harvest (filesystem) but keep the real reduction helpers +// (computePureFailureFlowNames / selectFailureScreenshots) the step now calls post-loop. +jest.mock('../maestroScreenshots', () => ({ + ...jest.requireActual('../maestroScreenshots'), + harvestFailureScreenshotsAsync: jest.fn(), +})); + const mockedSpawn = jest.mocked(spawn); +const mockedHarvest = jest.mocked(maestroScreenshots.harvestFailureScreenshotsAsync); +const mockUploadArtifact = jest.fn(); + +function makeShot(index: number): maestroScreenshots.HarvestedScreenshot { + return { + fileAbsPath: path.join(os.tmpdir(), `src-screenshot-${index}.png`), + displayName: 'Failure Screenshot: Login (attempt 1)', + metadata: { + kind: 'maestro-test-screenshot', + flowName: 'Login', + attemptIndex: 0, + capturedAtMs: 1781186692250, + }, + }; +} const SPAWN_SUCCESS = { status: 0, @@ -41,7 +68,10 @@ function createStep( ReturnType['createBuildStepFromFunctionCall'] > { const logger = createMockLogger(); - const fn = createMaestroTestsBuildFunction(); + const ctx = { + runtimeApi: { uploadArtifact: mockUploadArtifact }, + } as unknown as CustomBuildContext; + const fn = createMaestroTestsBuildFunction(ctx); const globalCtx = createGlobalContextMock({ logger }); globalCtx.updateEnv(options.env ?? { HOME: '/home/expo' }); return fn.createBuildStepFromFunctionCall(globalCtx, { callInputs }); @@ -54,10 +84,18 @@ describe('createMaestroTestsBuildFunction', () => { // hit the real disk. Individual tests override this. jest.restoreAllMocks(); jest.spyOn(parser, 'mergeJUnitReports').mockResolvedValue(); + mockedHarvest.mockReset(); + mockedHarvest.mockResolvedValue([]); + mockUploadArtifact.mockReset(); + mockUploadArtifact.mockResolvedValue({ artifactId: 'artifact-1' }); }); it('exports a factory that returns a BuildFunction instance', () => { - expect(createMaestroTestsBuildFunction()).toBeDefined(); + expect( + createMaestroTestsBuildFunction({ + runtimeApi: { uploadArtifact: mockUploadArtifact }, + } as unknown as CustomBuildContext) + ).toBeDefined(); }); it('sets all outputs before running any flows', async () => { @@ -704,4 +742,149 @@ describe('createMaestroTestsBuildFunction', () => { const step = createStep({ flow_path: ['a.yaml'], platform: 'android' }); // no shards await step.executeAsync(); // should succeed, not throw }); + + it('uploads harvested screenshots after the retry loop with metadata', async () => { + mockedSpawn.mockResolvedValue(SPAWN_SUCCESS); + const shot = makeShot(0); + await fs.writeFile(shot.fileAbsPath, ''); + mockedHarvest.mockResolvedValue([shot]); + + const step = createStep({ flow_path: ['a.yaml'], platform: 'android', output_format: 'junit' }); + await step.executeAsync(); + + expect(mockUploadArtifact).toHaveBeenCalledTimes(1); + expect(mockUploadArtifact).toHaveBeenCalledWith( + expect.objectContaining({ + artifact: expect.objectContaining({ + type: GenericArtifactType.OTHER, + name: shot.displayName, + metadata: shot.metadata, + }), + }) + ); + }); + + it('uploads screenshots even when all attempts fail, before throwing ERR_MAESTRO_TESTS_FAILED', async () => { + mockedSpawn.mockRejectedValue(rejectExit1()); + const shot = makeShot(0); + await fs.writeFile(shot.fileAbsPath, ''); + mockedHarvest.mockResolvedValue([shot]); + + const step = createStep({ flow_path: ['a.yaml'], platform: 'android', output_format: 'junit' }); + await expect(step.executeAsync()).rejects.toThrow(UserError); + + expect(mockUploadArtifact).toHaveBeenCalledTimes(1); + }); + + it('swallows upload errors without affecting the test verdict', async () => { + mockedSpawn.mockResolvedValue(SPAWN_SUCCESS); + const shot = makeShot(0); + await fs.writeFile(shot.fileAbsPath, ''); + mockedHarvest.mockResolvedValue([shot]); + mockUploadArtifact.mockRejectedValue(new Error('upload boom')); + + const step = createStep({ flow_path: ['a.yaml'], platform: 'android', output_format: 'junit' }); + await step.executeAsync(); + + expect(mockUploadArtifact).toHaveBeenCalledTimes(1); + }); + + it('skips screenshot upload but preserves the verdict when the JUnit re-parse throws', async () => { + mockedSpawn.mockRejectedValue(rejectExit1()); + mockedHarvest.mockResolvedValue([makeShot(0)]); + jest.spyOn(parser, 'parseJUnitTestCases').mockRejectedValue(new Error('malformed junit')); + + const step = createStep({ flow_path: ['a.yaml'], platform: 'android', output_format: 'junit' }); + + // The classify throw is swallowed; the maestro failure verdict still surfaces. + await expect(step.executeAsync()).rejects.toThrow(UserError); + expect(mockUploadArtifact).not.toHaveBeenCalled(); + }); + + it('skips screenshot upload but preserves the verdict when the staging dir cannot be created', async () => { + mockedSpawn.mockRejectedValue(rejectExit1()); + mockedHarvest.mockResolvedValue([makeShot(0)]); + jest.spyOn(parser, 'parseJUnitTestCases').mockResolvedValue([]); + jest.spyOn(fs, 'mkdtemp').mockRejectedValue(new Error('no tmp space')); + + const step = createStep({ flow_path: ['a.yaml'], platform: 'android', output_format: 'junit' }); + + // mkdtemp failure is swallowed; the maestro failure verdict still surfaces. + await expect(step.executeAsync()).rejects.toThrow(UserError); + expect(mockUploadArtifact).not.toHaveBeenCalled(); + }); + + it('caps uploads at 30 screenshots (excess dropped)', async () => { + mockedSpawn.mockResolvedValue(SPAWN_SUCCESS); + const shots = Array.from({ length: 35 }, (_, index) => makeShot(index)); + await Promise.all(shots.map(shot => fs.writeFile(shot.fileAbsPath, ''))); + mockedHarvest.mockResolvedValue(shots); + + const step = createStep({ flow_path: ['a.yaml'], platform: 'android', output_format: 'junit' }); + await step.executeAsync(); + + expect(mockUploadArtifact).toHaveBeenCalledTimes(30); + }); + + it('does not harvest or upload screenshots when output_format is not junit', async () => { + mockedSpawn.mockResolvedValue(SPAWN_SUCCESS); + + const step = createStep({ flow_path: ['a.yaml'], platform: 'android', output_format: 'html' }); + await step.executeAsync(); + + expect(mockedHarvest).not.toHaveBeenCalled(); + expect(mockUploadArtifact).not.toHaveBeenCalled(); + }); + + it('uploads only the final attempt for a pure-failure flow, but every attempt for a flaky flow', async () => { + mockedSpawn.mockResolvedValue(SPAWN_SUCCESS); + const tc = (name: string, status: 'passed' | 'failed'): parser.JUnitTestCaseResult => ({ + name, + file: undefined, + status, + duration: 0, + errorMessage: null, + tags: [], + properties: {}, + }); + jest + .spyOn(parser, 'parseJUnitTestCases') + .mockResolvedValue([ + tc('PureFlow', 'failed'), + tc('FlakyFlow', 'failed'), + tc('FlakyFlow', 'passed'), + ]); + + const makeShotFor = ( + flowName: string, + attemptIndex: number + ): maestroScreenshots.HarvestedScreenshot => ({ + fileAbsPath: path.join(os.tmpdir(), `wiring-${flowName}-${attemptIndex}.png`), + displayName: `Failure Screenshot: ${flowName} (attempt ${attemptIndex + 1})`, + metadata: { + kind: 'maestro-test-screenshot', + flowName, + attemptIndex, + capturedAtMs: 1781186692250 + attemptIndex, + }, + }); + const shots = [ + makeShotFor('PureFlow', 0), + makeShotFor('PureFlow', 1), + makeShotFor('FlakyFlow', 0), + makeShotFor('FlakyFlow', 1), + ]; + await Promise.all(shots.map(shot => fs.writeFile(shot.fileAbsPath, ''))); + mockedHarvest.mockResolvedValue(shots); + + const step = createStep({ flow_path: ['a.yaml'], platform: 'android', output_format: 'junit' }); + await step.executeAsync(); + + // PureFlow keeps only its final attempt (1); FlakyFlow keeps every failed attempt. + // Uploads run concurrently, so compare the set, not the order. + const uploaded = mockUploadArtifact.mock.calls + .map(([arg]) => `${arg.artifact.metadata.flowName}#${arg.artifact.metadata.attemptIndex}`) + .sort(); + expect(uploaded).toEqual(['FlakyFlow#0', 'FlakyFlow#1', 'PureFlow#1']); + }); }); diff --git a/packages/build-tools/src/steps/functions/maestroScreenshots.ts b/packages/build-tools/src/steps/functions/maestroScreenshots.ts new file mode 100644 index 0000000000..be0619edc4 --- /dev/null +++ b/packages/build-tools/src/steps/functions/maestroScreenshots.ts @@ -0,0 +1,159 @@ +import { bunyan } from '@expo/logger'; +import { type Dirent } from 'fs'; +import fs from 'fs/promises'; +import path from 'path'; + +export interface ParsedFailureScreenshot { + flowName: string; + capturedAtMs: number; +} + +// Maestro failure screenshot: `screenshot-[shard-N-]❌--().png`. +// The flow name may contain parentheses, so anchor on the `-(` after the epoch and the +// `).png` suffix rather than scanning for parens. +const FAILURE_SCREENSHOT_PATTERN = /^screenshot-(?:shard-\d+-)?❌-(\d+)-\((.+)\)\.png$/u; + +export function parseFailureScreenshotFilename(filename: string): ParsedFailureScreenshot | null { + const match = filename.match(FAILURE_SCREENSHOT_PATTERN); + if (!match) { + return null; + } + const capturedAtMs = Number(match[1]); + if (!Number.isFinite(capturedAtMs)) { + return null; + } + return { + flowName: match[2], + capturedAtMs, + }; +} + +export interface HarvestedScreenshot { + fileAbsPath: string; + displayName: string; // `Failure Screenshot: (attempt )` + metadata: { + kind: 'maestro-test-screenshot'; + // The Maestro flow name straight from the filename (already '/'->'_' substituted by + // Maestro). The website matches this against `attempt.name` (normalized the same way). + flowName: string; + attemptIndex: number; + capturedAtMs: number; + }; +} + +// Scans every debug dir under testsDirectory whose mtime is recent enough (mtime-based to cover +// the maestro <2.5.0 bug that splits one invocation across two dirs), then keeps only screenshots +// whose own capturedAtMs is at/after capturedSinceMs. Never throws: an unreadable dir is logged +// and skipped so a screenshot harvest can't affect the maestro step outcome. +export async function harvestFailureScreenshotsAsync(args: { + testsDirectory: string; + capturedSinceMs: number; + attemptIndex: number; + logger: bunyan; +}): Promise { + // Gate dirs by mtime floored to the second, so a dir created within the same second as the + // attempt start isn't stat'd below the baseline. The exact capturedAtMs gate below is what + // prevents mis-attributing an earlier attempt's screenshot. + const dirSinceMtimeMs = Math.floor(args.capturedSinceMs / 1000) * 1000; + let entries: Dirent[]; + try { + entries = await fs.readdir(args.testsDirectory, { withFileTypes: true }); + } catch (err: any) { + args.logger.info({ err }, `Skipping screenshot harvest: cannot read ${args.testsDirectory}.`); + return []; + } + + const results: HarvestedScreenshot[] = []; + for (const entry of entries) { + if (!entry.isDirectory()) { + continue; + } + const dirPath = path.join(args.testsDirectory, entry.name); + let filenames: string[]; + try { + if ((await fs.stat(dirPath)).mtimeMs < dirSinceMtimeMs) { + continue; + } + filenames = await fs.readdir(dirPath); + } catch (err: any) { + args.logger.info({ err }, `Skipping unreadable debug dir ${dirPath}.`); + continue; + } + for (const filename of filenames) { + const parsed = parseFailureScreenshotFilename(filename); + // Re-check capture time per file: a dir's mtime can advance after this attempt began (or + // fall within the floored-second dir baseline), so the dir gate alone would re-attribute an + // earlier attempt's screenshot to this one. + if (parsed === null || parsed.capturedAtMs < args.capturedSinceMs) { + continue; + } + results.push({ + fileAbsPath: path.join(dirPath, filename), + displayName: `Failure Screenshot: ${parsed.flowName} (attempt ${args.attemptIndex + 1})`, + metadata: { + kind: 'maestro-test-screenshot', + flowName: parsed.flowName, + attemptIndex: args.attemptIndex, + capturedAtMs: parsed.capturedAtMs, + }, + }); + } + } + return results; +} + +// Maestro substitutes '/' -> '_' in screenshot filenames (so HarvestedScreenshot.flowName +// is already in that form) but keeps '/' in the JUnit testcase name. Normalize the JUnit +// name the same way before comparing — matching the website's own normalization. +function normalizeFlowName(name: string): string { + return name.replace(/\//g, '_'); +} + +// A flow is a "pure failure" when it failed and never passed across all attempts. The website +// surfaces only the final screenshot for such a flow, so keeping its earlier attempts would +// spend the per-job artifact budget on screenshots that are never shown — we keep only the final +// one (selectFailureScreenshots). A flow that passed at least once (flaky / fail-pass-fail) is +// NOT pure: each of its failed attempts is a distinct failure and is kept. +export function computePureFailureFlowNames( + testCases: readonly { name: string; status: 'passed' | 'failed' }[] +): Set { + const passed = new Set(); + const failed = new Set(); + for (const testCase of testCases) { + const flowName = normalizeFlowName(testCase.name); + if (testCase.status === 'passed') { + passed.add(flowName); + } else { + failed.add(flowName); + } + } + const pure = new Set(); + for (const flowName of failed) { + if (!passed.has(flowName)) { + pure.add(flowName); + } + } + return pure; +} + +// Keep every harvested screenshot for flaky/mixed flows; for pure-failure flows keep only +// the final (highest attemptIndex) screenshot. +export function selectFailureScreenshots( + harvested: readonly HarvestedScreenshot[], + pureFailureFlowNames: ReadonlySet +): HarvestedScreenshot[] { + const finalAttemptByPureFlow = new Map(); + for (const shot of harvested) { + if (!pureFailureFlowNames.has(shot.metadata.flowName)) { + continue; + } + const finalAttempt = finalAttemptByPureFlow.get(shot.metadata.flowName); + if (finalAttempt === undefined || shot.metadata.attemptIndex > finalAttempt) { + finalAttemptByPureFlow.set(shot.metadata.flowName, shot.metadata.attemptIndex); + } + } + return harvested.filter(shot => { + const finalAttempt = finalAttemptByPureFlow.get(shot.metadata.flowName); + return finalAttempt === undefined || shot.metadata.attemptIndex === finalAttempt; + }); +} diff --git a/packages/build-tools/src/steps/functions/maestroTests.ts b/packages/build-tools/src/steps/functions/maestroTests.ts index c8f21580d0..3226bd40dd 100644 --- a/packages/build-tools/src/steps/functions/maestroTests.ts +++ b/packages/build-tools/src/steps/functions/maestroTests.ts @@ -1,4 +1,5 @@ -import { SystemError, UserError } from '@expo/eas-build-job'; +import { GenericArtifactType, SystemError, UserError } from '@expo/eas-build-job'; +import { bunyan } from '@expo/logger'; import { BuildFunction, BuildRuntimePlatform, @@ -8,6 +9,7 @@ import { } from '@expo/steps'; import spawn from '@expo/turtle-spawn'; import fs from 'fs/promises'; +import os from 'os'; import path from 'path'; import { z } from 'zod'; @@ -18,7 +20,15 @@ import { mergeJUnitReports, parseFailedFlowsFromFileAttrs, parseFailedFlowsFromJUnit, + parseJUnitTestCases, } from './maestroResultParser'; +import { + type HarvestedScreenshot, + computePureFailureFlowNames, + harvestFailureScreenshotsAsync, + selectFailureScreenshots, +} from './maestroScreenshots'; +import { CustomBuildContext } from '../../customBuildContext'; import { sleepAsync } from '../../utils/retry'; const FlowPathSchema = z.array(z.string().min(1)).min(1); @@ -81,7 +91,7 @@ function buildMaestroArgs(args: { return out; } -export function createMaestroTestsBuildFunction(): BuildFunction { +export function createMaestroTestsBuildFunction(ctx: CustomBuildContext): BuildFunction { return new BuildFunction({ namespace: 'eas', id: 'maestro_tests', @@ -217,6 +227,7 @@ export function createMaestroTestsBuildFunction(): BuildFunction { // trusted; we then fall through to dumb retry (re-run everything). let flowsToRun: string[] = flowPaths; let lastAttemptExitCode: number | null = null; + const harvested: HarvestedScreenshot[] = []; const totalAttempts = retries + 1; for (let attempt = 0; attempt <= retries; attempt++) { @@ -239,6 +250,8 @@ export function createMaestroTestsBuildFunction(): BuildFunction { `Running maestro (attempt ${attempt + 1}/${totalAttempts}): maestro ${maestroArgs.join(' ')}` ); + const attemptStartedAtMs = Date.now(); + try { await spawn('maestro', maestroArgs, { cwd: stepCtx.workingDirectory, @@ -258,6 +271,20 @@ export function createMaestroTestsBuildFunction(): BuildFunction { } } + // Harvest this attempt's failure screenshots before any retry subsetting. Gated on + // junit: test-case-result rows (and therefore the summary icons) only exist for junit + // runs, so harvesting other formats would just create orphan artifacts the website hides. + if (outputFormat === 'junit') { + harvested.push( + ...(await harvestFailureScreenshotsAsync({ + testsDirectory, + capturedSinceMs: attemptStartedAtMs, + attemptIndex: attempt, + logger, + })) + ); + } + if (lastAttemptExitCode === 0 || attempt === retries) { break; } @@ -330,6 +357,12 @@ export function createMaestroTestsBuildFunction(): BuildFunction { } } + // Upload before the ERR_MAESTRO_TESTS_FAILED throw below so fully-failed runs (which need + // screenshots most) still upload. Harvest only ran for junit, so guard the same way. + if (outputFormat === 'junit') { + await uploadFailureScreenshotsAsync({ harvested, junitReportDirectory, ctx, logger }); + } + // The retry loop exits via success (0), numeric status (retryable), // or throw (infra). A non-null non-zero status means the user's tests // failed every attempt. @@ -342,3 +375,82 @@ export function createMaestroTestsBuildFunction(): BuildFunction { }, }); } + +// Reduce harvested failure screenshots to what's worth uploading, then upload them as workflow +// artifacts. Best-effort and verdict-neutral: never throws, so a screenshot problem can't mask +// the maestro test result. Caller guards on junit (harvest only runs for junit). +async function uploadFailureScreenshotsAsync({ + harvested, + junitReportDirectory, + ctx, + logger, +}: { + harvested: HarvestedScreenshot[]; + junitReportDirectory: string; + ctx: CustomBuildContext; + logger: bunyan; +}): Promise { + // Reduce to the attempts worth uploading — every failed attempt for flaky flows, only the final + // attempt for all-failed flows. See computePureFailureFlowNames / selectFailureScreenshots. + // Guard the JUnit re-parse so a malformed/missing report can't throw past here and mask the + // test verdict (the whole step is verdict-neutral for screenshots). + let selected: HarvestedScreenshot[]; + try { + const pureFailureFlowNames = computePureFailureFlowNames( + await parseJUnitTestCases(junitReportDirectory) + ); + selected = selectFailureScreenshots(harvested, pureFailureFlowNames); + } catch (err: any) { + logger.warn({ err }, 'Failed to classify failure screenshots; skipping screenshot upload.'); + return; + } + if (selected.length === 0) { + return; + } + + // Cap well under www's 50-artifact-per-job limit. + const MAX_SCREENSHOT_UPLOADS = 30; + const toUpload = selected.slice(0, MAX_SCREENSHOT_UPLOADS); + if (selected.length > toUpload.length) { + logger.warn( + `Found ${selected.length} failure screenshots; uploading only the first ${toUpload.length}.` + ); + } + + // Copy each shot to an ASCII-safe name outside testsDirectory (the originals contain a + // non-ASCII marker and testsDirectory is uploaded wholesale as the tarball). + let safeScreenshotDir: string; + try { + safeScreenshotDir = await fs.mkdtemp(path.join(os.tmpdir(), 'eas-maestro-screenshots-')); + } catch (err: any) { + logger.warn( + { err }, + 'Failed to create the failure-screenshot staging dir; skipping screenshot upload.' + ); + return; + } + + await Promise.all( + toUpload.map(async (shot, index) => { + try { + // `index` disambiguates two flows that fail within the same millisecond of an attempt. + const safePath = path.join( + safeScreenshotDir, + `failure-attempt-${shot.metadata.attemptIndex}-${index}-${shot.metadata.capturedAtMs}.png` + ); + await fs.copyFile(shot.fileAbsPath, safePath); + await ctx.runtimeApi.uploadArtifact({ + artifact: { + type: GenericArtifactType.OTHER, + name: shot.displayName, + paths: [safePath], + metadata: shot.metadata, + }, + logger, + }); + } catch (err: any) { + logger.warn({ err }, 'Failed to upload failure screenshot.'); + } + }) + ); +} diff --git a/packages/worker/src/context.ts b/packages/worker/src/context.ts index 4c141b4d78..c2195c3a79 100644 --- a/packages/worker/src/context.ts +++ b/packages/worker/src/context.ts @@ -74,6 +74,7 @@ export async function createBuildContext({ artifactPaths: paths, logger, name: artifact.name, + metadata: artifact.metadata, }), analytics ); diff --git a/packages/worker/src/upload.ts b/packages/worker/src/upload.ts index 62bd614440..f8e1e98428 100644 --- a/packages/worker/src/upload.ts +++ b/packages/worker/src/upload.ts @@ -143,10 +143,12 @@ export async function uploadWorkflowArtifactAsync( name: _name, logger, artifactPaths, + metadata, }: { name: string; logger: bunyan; artifactPaths: string[]; + metadata?: Record; } ): Promise<{ artifactId: string | null }> { const { localPath, filename, size } = await prepareArtifactsForUploadAsync(logger, artifactPaths); @@ -157,6 +159,7 @@ export async function uploadWorkflowArtifactAsync( filename, name, size, + metadata, }); await GCS.uploadWithSignedUrl({ @@ -238,7 +241,12 @@ function getCommonParentDir(path1: string, path2: string): string { async function createUploadSessionAsync( ctx: BuildContext, - { filename, name, size }: { filename: string; name: string; size: number } + { + filename, + name, + size, + metadata, + }: { filename: string; name: string; size: number; metadata?: Record } ): Promise<{ bucketKey: string; signedUrl: GCS.SignedUrl; @@ -265,7 +273,7 @@ async function createUploadSessionAsync( 'POST', // 'name' is ignored by Turtle Build router, but provide it for potential use for telemetry, etc. { - json: { filename, name, size }, + json: { filename, name, size, metadata }, headers: { Authorization: `Bearer ${robotAccessToken}`, }, @@ -282,7 +290,7 @@ async function createUploadSessionAsync( new URL(`workflows/${workflowJobId}/upload-sessions/`, config.wwwApiV2BaseUrl).toString(), 'POST', { - json: { filename, name, size }, + json: { filename, name, size, metadata }, headers: { Authorization: `Bearer ${robotAccessToken}`, },