diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml
new file mode 100644
index 000000000000..7fdded9d6a84
--- /dev/null
+++ b/.github/workflows/test-integration.yml
@@ -0,0 +1,70 @@
+name: Integration tests
+
+on:
+ pull_request:
+ branches: [ master, v4 ]
+ workflow_dispatch:
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ remote-browser:
+ name: Remote browser integration
+ runs-on: ubuntu-22.04
+
+ # Side-services provide the remote browser and a deterministic HTTP target.
+ services:
+ browserless:
+ image: ghcr.io/browserless/chromium:latest
+ ports:
+ - 3000:3000
+ env:
+ CONCURRENT: 4
+ options: >-
+ --health-cmd "wget -qO- http://localhost:3000/json/version || exit 1"
+ --health-interval 5s
+ --health-timeout 5s
+ --health-retries 12
+ httpbin:
+ # kennethreitz/httpbin is python:3.6-slim and ships without wget/curl,
+ # so no Docker HEALTHCHECK — httpbin starts in <1s and the first test
+ # request will surface any real failure.
+ image: kennethreitz/httpbin:latest
+ ports:
+ - 8080:80
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v6
+
+ - name: Use Node.js 24
+ uses: actions/setup-node@v6
+ with:
+ node-version: 24
+ package-manager-cache: false
+
+ - name: Turbo cache
+ uses: actions/cache@v5
+ with:
+ path: .turbo
+ key: turbo-${{ github.job }}-${{ github.ref_name }}-${{ github.sha }}
+ restore-keys: |
+ turbo-${{ github.job }}-${{ github.ref_name }}-
+
+ - uses: apify/workflows/pnpm-install@main
+
+ # No `playwright install` — these tests connect to remote Browserless
+ # over CDP and never launch a local browser binary.
+
+ - name: Build
+ run: pnpm ci:build
+
+ - name: Run integration tests
+ run: pnpm test:integration
+ env:
+ BROWSERLESS_URL: http://localhost:3000
+ HTTPBIN_URL: http://httpbin
+ CRAWLEE_DIFFICULT_TESTS: 1
+ RETRY_TESTS: 1
diff --git a/docs/guides/remote_browser.mdx b/docs/guides/remote_browser.mdx
new file mode 100644
index 000000000000..f02d41be4b64
--- /dev/null
+++ b/docs/guides/remote_browser.mdx
@@ -0,0 +1,70 @@
+---
+id: remote-browser
+title: "Remote browser services"
+sidebar_label: "Remote browsers"
+description: Connect Crawlee crawlers to remote browser services like Browserbase, Browserless, or Steel.
+---
+
+import ApiLink from '@site/src/components/ApiLink';
+import CodeBlock from '@theme/CodeBlock';
+
+import RemoteBrowserConfigSource from '!!raw-loader!./remote_browser_config.ts';
+import RemoteBrowserProviderSource from '!!raw-loader!./remote_browser_provider.ts';
+import RemoteBrowserPuppeteerSource from '!!raw-loader!./remote_browser_puppeteer.ts';
+
+Instead of launching a local browser, Crawlee can connect to a remote browser service like [Browserbase](https://browserbase.com/), [Browserless](https://browserless.io/), [Steel](https://steel.dev/), or any service that exposes a WebSocket/CDP endpoint. The crawler manages session rotation and the request lifecycle the same way it does locally — only the browser itself runs elsewhere.
+
+Use this when you need IPs in specific regions, want to offload CPU/memory from your runner, or need stealth features the service provides.
+
+## How it works
+
+Set the crawler's `remoteBrowser` option with the connection details. The crawler builds a `RemoteBrowserPool` around its own browser plugin, so the connection is always for the matching browser — there's no plugin to construct and no way to mismatch the pool with the crawler. The pool (an `IBrowserPool` wrapping the regular `BrowserPool`) owns everything remote: resolving the endpoint, releasing sessions when browsers close, and capping how many remote browsers run at once.
+
+## Basic usage
+
+The simplest form is a static connection URL. Use this when the service exposes a single endpoint and doesn't need per-session setup.
+
+{RemoteBrowserConfigSource}
+
+`endpoint` can also be a function returning `{ url, context }`, called once per browser launch. Pair it with a `release` callback (it receives the `context`) to clean up sessions on the service side when the browser closes, crashes, or the pool is destroyed.
+
+`maxOpenBrowsers` caps the number of concurrent remote browsers — set it to the service's concurrent-session limit to avoid 429 errors. The pool enforces it inside `newPage()`, which waits for a free slot rather than overshooting.
+
+### Self-hosted
+
+Some services ship a Docker image you can run locally or on your own infrastructure. For example, [Browserless](https://www.browserless.io/) has an open-source Chromium image:
+
+```bash
+docker run -p 3000:3000 -e CONCURRENT=4 ghcr.io/browserless/chromium
+```
+
+Point the pool at the local endpoint with `endpoint: 'ws://localhost:3000'`.
+
+## Custom provider
+
+For services with a session-create / session-release lifecycle, extend `RemoteBrowserProvider` and pass the instance as the pool's `endpoint`. `connect()` runs once per browser launch and returns the connection URL plus an optional `context` object passed back to `release()`. `maxOpenBrowsers` set on the provider is adopted by the pool.
+
+{RemoteBrowserProviderSource}
+
+## Puppeteer
+
+`PuppeteerCrawler` works the same way — build the pool with a `PuppeteerPlugin`. Puppeteer connects over CDP:
+
+{RemoteBrowserPuppeteerSource}
+
+For Playwright you can choose the protocol via the `remoteBrowser.connection.protocol` option: `'cdp'` (default, `connectOverCDP()`) or `'playwright'` (`connect()`, Playwright's own WebSocket protocol).
+
+## Sharing a pool across crawlers
+
+`remoteBrowser` builds a pool the crawler owns and tears down. To share one remote pool across multiple crawlers, construct a `RemoteBrowserPool` yourself and pass it as the `browserPool` option instead — a pool supplied that way is never destroyed by the crawler, so you control its lifecycle. Use `remoteBrowser` *or* `browserPool`, not both.
+
+## Limitations
+
+- **`headless` and `launchOptions` don't apply.** The remote service controls headless mode and browser flags; configure them on the service side.
+- **`useIncognitoPages` is forced to `true`** for Playwright remote connections — `connect()` / `connectOverCDP()` don't accept persistent contexts. For state shared across requests, use the `SessionPool`.
+- **`userDataDir` has no effect** — there's no local profile when the browser runs remotely. Use the service's persistence API (e.g. Browserbase Contexts, Steel Profiles).
+
+## Further reading
+
+- `RemoteBrowserPool` API reference
+- `RemoteBrowserProvider` API reference
diff --git a/docs/guides/remote_browser_config.ts b/docs/guides/remote_browser_config.ts
new file mode 100644
index 000000000000..41f4e0542fe8
--- /dev/null
+++ b/docs/guides/remote_browser_config.ts
@@ -0,0 +1,19 @@
+import { PlaywrightCrawler } from 'crawlee';
+
+const token = process.env.BROWSERLESS_TOKEN!;
+
+const crawler = new PlaywrightCrawler({
+ // Connect to a remote browser instead of launching locally. The crawler builds the right
+ // pool for its browser — you only supply the connection details.
+ remoteBrowser: {
+ endpoint: `wss://production-sfo.browserless.io?token=${token}`,
+ // Optional — respect the service's concurrent session limit.
+ maxOpenBrowsers: 5,
+ },
+ async requestHandler({ page, request, log }) {
+ const title = await page.title();
+ log.info(`${request.loadedUrl} — "${title}"`);
+ },
+});
+
+await crawler.run(['https://crawlee.dev']);
diff --git a/docs/guides/remote_browser_provider.ts b/docs/guides/remote_browser_provider.ts
new file mode 100644
index 000000000000..45594d0fe4f4
--- /dev/null
+++ b/docs/guides/remote_browser_provider.ts
@@ -0,0 +1,46 @@
+import { RemoteBrowserProvider } from '@crawlee/browser-pool';
+import { PlaywrightCrawler } from 'crawlee';
+
+const apiKey = process.env.BROWSERBASE_API_KEY!;
+const projectId = process.env.BROWSERBASE_PROJECT_ID!;
+
+class BrowserbaseProvider extends RemoteBrowserProvider<{ id: string }> {
+ // Respect the service's concurrent session limit to avoid 429s.
+ override maxOpenBrowsers = 5;
+
+ async connect() {
+ const response = await fetch('https://api.browserbase.com/v1/sessions', {
+ method: 'POST',
+ headers: { 'x-bb-api-key': apiKey, 'Content-Type': 'application/json' },
+ body: JSON.stringify({ projectId }),
+ });
+
+ if (!response.ok) {
+ throw new Error(`Failed to create session: ${response.status} ${response.statusText}`);
+ }
+
+ const session = (await response.json()) as { id: string; connectUrl: string };
+ return { url: session.connectUrl, context: { id: session.id } };
+ }
+
+ override async release({ id }: { id: string }) {
+ await fetch(`https://api.browserbase.com/v1/sessions/${id}`, {
+ method: 'POST',
+ headers: { 'x-bb-api-key': apiKey, 'Content-Type': 'application/json' },
+ body: JSON.stringify({ status: 'REQUEST_RELEASE' }),
+ });
+ }
+}
+
+const crawler = new PlaywrightCrawler({
+ // Pass the provider as the `endpoint`; the crawler's pool calls connect()/release() per browser.
+ remoteBrowser: {
+ endpoint: new BrowserbaseProvider(),
+ },
+ async requestHandler({ page, request, log }) {
+ const title = await page.title();
+ log.info(`${request.loadedUrl} — "${title}"`);
+ },
+});
+
+await crawler.run(['https://crawlee.dev']);
diff --git a/docs/guides/remote_browser_puppeteer.ts b/docs/guides/remote_browser_puppeteer.ts
new file mode 100644
index 000000000000..2bfc14be3d65
--- /dev/null
+++ b/docs/guides/remote_browser_puppeteer.ts
@@ -0,0 +1,16 @@
+import { PuppeteerCrawler } from 'crawlee';
+
+const token = process.env.BROWSERLESS_TOKEN!;
+
+const crawler = new PuppeteerCrawler({
+ // PuppeteerCrawler connects over CDP. Same `remoteBrowser` option, matching browser guaranteed.
+ remoteBrowser: {
+ endpoint: `wss://production-sfo.browserless.io?token=${token}`,
+ },
+ async requestHandler({ page, request, log }) {
+ const title = await page.title();
+ log.info(`${request.loadedUrl} — "${title}"`);
+ },
+});
+
+await crawler.run(['https://crawlee.dev']);
diff --git a/package.json b/package.json
index 531e1c73cc1f..85cdeb825206 100644
--- a/package.json
+++ b/package.json
@@ -37,6 +37,9 @@
"ci:build": "turbo run build --filter=./packages/* --cache-dir=\".turbo\" && node ./scripts/typescript_fixes.mjs",
"test": "vitest run --silent",
"test:e2e": "node test/e2e/run.mjs",
+ "test:integration": "cross-env CRAWLEE_DIFFICULT_TESTS=1 vitest run --silent=true test/integration",
+ "test:integration:services:up": "docker network create crawlee-it 2>/dev/null; docker run -d --rm --name crawlee-it-browserless --network crawlee-it -p 3000:3000 -e CONCURRENT=4 ghcr.io/browserless/chromium && docker run -d --rm --name crawlee-it-httpbin --network crawlee-it --network-alias httpbin -p 8080:80 kennethreitz/httpbin",
+ "test:integration:services:down": "docker stop crawlee-it-browserless crawlee-it-httpbin; docker network rm crawlee-it 2>/dev/null; true",
"test:full": "cross-env CRAWLEE_DIFFICULT_TESTS=1 vitest run --silent",
"tsc-check-tests": "tsc --noEmit --project test/tsconfig.json",
"coverage": "vitest --coverage",
diff --git a/packages/browser-crawler/src/internals/browser-crawler.ts b/packages/browser-crawler/src/internals/browser-crawler.ts
index e93c52b3fa20..3d4cc2ecb2aa 100644
--- a/packages/browser-crawler/src/internals/browser-crawler.ts
+++ b/packages/browser-crawler/src/internals/browser-crawler.ts
@@ -34,10 +34,11 @@ import type {
BrowserPoolHooks,
BrowserPoolOptions,
CommonPage,
+ CrawlerRemoteBrowserOptions,
InferBrowserPluginArray,
LaunchContext,
} from '@crawlee/browser-pool';
-import { BrowserPool } from '@crawlee/browser-pool';
+import { BrowserPool, RemoteBrowserPool } from '@crawlee/browser-pool';
import type { BatchAddRequestsResult, Cookie as CookieObject, IBrowserPool, ISession } from '@crawlee/types';
import type { RobotsTxtFile } from '@crawlee/utils';
import { CLOUDFLARE_RETRY_CSS_SELECTORS, RETRY_CSS_SELECTORS, sleep } from '@crawlee/utils';
@@ -123,6 +124,19 @@ export interface BrowserCrawlerOptions<
*/
browserPool?: IBrowserPool;
+ /**
+ * Connect to a remote browser service (Browserbase, Browserless, Steel, …) instead of launching locally.
+ *
+ * The crawler builds a {@apilink RemoteBrowserPool} around its own browser plugin, so the connection is
+ * always for the right browser — there is no plugin to construct and no way to mismatch the pool with the
+ * crawler. Supply the connection details only: a static `endpoint` URL, a function returning one per launch,
+ * or a {@apilink RemoteBrowserProvider}.
+ *
+ * Mutually exclusive with `browserPool`. For sharing a remote pool across crawlers, construct a
+ * {@apilink RemoteBrowserPool} yourself and pass it as `browserPool` instead.
+ */
+ remoteBrowser?: CrawlerRemoteBrowserOptions;
+
/**
* Function that is called to process each request.
*
@@ -322,12 +336,11 @@ export abstract class BrowserCrawler<
browserPool: IBrowserPool;
/**
- * Set when the crawler constructed its own {@apilink BrowserPool} (no `browserPool` option was provided).
- * Holds the same instance as `browserPool`, but typed as the concrete class so the crawler can call
- * lifecycle methods (`destroy`) that aren't part of {@apilink IBrowserPool}. A user-supplied pool is
- * never owned and never torn down by the crawler.
+ * Set when the crawler constructed its own pool (a {@apilink BrowserPool}, or a {@apilink RemoteBrowserPool}
+ * built from the `remoteBrowser` option). Holds the same instance as `browserPool` but is the only reference
+ * the crawler tears down — a user-supplied `browserPool` is never owned and never destroyed by the crawler.
*/
- private ownedBrowserPool?: BrowserPool;
+ private ownedBrowserPool?: { destroy: () => Promise };
launchContext: BrowserLaunchContext;
@@ -349,6 +362,7 @@ export abstract class BrowserCrawler<
launchContext: ow.optional.object,
headless: ow.optional.any(ow.boolean, ow.string),
browserPool: ow.optional.object.validate(validators.browserPool),
+ remoteBrowser: ow.optional.object,
browserPoolOptions: ow.optional.object,
saveResponseCookies: ow.optional.boolean,
proxyConfiguration: ow.optional.object.validate(validators.proxyConfiguration),
@@ -368,6 +382,7 @@ export abstract class BrowserCrawler<
saveResponseCookies = true,
launchContext = {},
browserPool,
+ remoteBrowser,
browserPoolOptions,
preNavigationHooks = [],
postNavigationHooks = [],
@@ -422,6 +437,13 @@ export abstract class BrowserCrawler<
this.saveResponseCookies = saveResponseCookies;
+ if (browserPool && remoteBrowser) {
+ throw new Error(
+ "Set at most one of 'browserPool' and 'remoteBrowser'. To share a remote pool across crawlers, " +
+ 'build a RemoteBrowserPool yourself and pass it as `browserPool`.',
+ );
+ }
+
if (browserPool) {
this.browserPool = browserPool;
return;
@@ -435,10 +457,25 @@ export abstract class BrowserCrawler<
resolvedBrowserPoolOptions.useFingerprints = false;
}
- this.ownedBrowserPool = new BrowserPool({
+ if (remoteBrowser) {
+ // The crawler already built the right plugin for its browser — hand it to a RemoteBrowserPool so the
+ // remote connection is always for the matching browser (no plugin to construct, no way to mismatch).
+ const { browserPlugins, ...remoteBrowserPoolOptions } = resolvedBrowserPoolOptions;
+ const remotePool = new RemoteBrowserPool({
+ browserPlugins: browserPlugins as BrowserPlugin[],
+ ...remoteBrowser,
+ browserPoolOptions: remoteBrowserPoolOptions as any,
+ });
+ this.ownedBrowserPool = remotePool;
+ this.browserPool = remotePool as IBrowserPool;
+ return;
+ }
+
+ const ownedBrowserPool = new BrowserPool({
...(resolvedBrowserPoolOptions as any),
});
- this.browserPool = this.ownedBrowserPool as IBrowserPool;
+ this.ownedBrowserPool = ownedBrowserPool;
+ this.browserPool = ownedBrowserPool as IBrowserPool;
}
protected override buildContextPipeline(): ContextPipeline<
diff --git a/packages/browser-pool/src/abstract-classes/browser-controller.ts b/packages/browser-pool/src/abstract-classes/browser-controller.ts
index 684f65738a6b..7c5d407416b2 100644
--- a/packages/browser-pool/src/abstract-classes/browser-controller.ts
+++ b/packages/browser-pool/src/abstract-classes/browser-controller.ts
@@ -208,6 +208,7 @@ export abstract class BrowserController<
this.log.debug(`Could not close browser.\nCause: ${(error as Error).message}`, { id: this.id });
}
+ await this._releaseRemoteBrowser();
this.emit(BROWSER_CONTROLLER_EVENTS.BROWSER_CLOSED, this);
setTimeout(() => {
@@ -225,9 +226,25 @@ export abstract class BrowserController<
async kill(): Promise {
await this.hasBrowserPromise;
await this._kill();
+ await this._releaseRemoteBrowser();
this.emit(BROWSER_CONTROLLER_EVENTS.BROWSER_CLOSED, this);
}
+ /**
+ * Releases the remote browser session (if this controller serves a remote browser) via the plugin's
+ * {@apilink RemoteConnection}. Safe to call multiple times — the token is cleared after the first call
+ * and the pool's registry also dedupes, so `release()` fires at most once across close()/kill().
+ */
+ private async _releaseRemoteBrowser(): Promise {
+ const token = this.launchContext?._remoteToken;
+ if (token === undefined) return;
+
+ // Clear so release only fires once (close() schedules kill() after a timeout).
+ this.launchContext._remoteToken = undefined;
+
+ await this.browserPlugin.remoteConnection?.release(token);
+ }
+
/**
* Opens new browser page.
* @ignore
diff --git a/packages/browser-pool/src/abstract-classes/browser-plugin.ts b/packages/browser-pool/src/abstract-classes/browser-plugin.ts
index 8d090a8939b5..b08e3b55c38e 100644
--- a/packages/browser-pool/src/abstract-classes/browser-plugin.ts
+++ b/packages/browser-pool/src/abstract-classes/browser-plugin.ts
@@ -4,7 +4,8 @@ import merge from 'lodash.merge';
import type { LaunchContextOptions } from '../launch-context.js';
import { LaunchContext } from '../launch-context.js';
-import type { UnwrapPromise } from '../utils.js';
+import type { RemoteConnection, RemoteConnectionParameters } from '../remote-browser-pool.js';
+import { sanitizeEndpointForLog, type UnwrapPromise } from '../utils.js';
import type { BrowserController } from './browser-controller.js';
/**
@@ -117,6 +118,18 @@ export abstract class BrowserPlugin<
ignoreProxyCertificate?: boolean;
+ /**
+ * Set by {@apilink RemoteBrowserPool} when this plugin connects to a remote browser service instead of
+ * launching locally. Holds the bridge the plugin uses to resolve endpoints and release sessions; all
+ * remote-session policy lives in the pool, not here.
+ *
+ * @internal
+ */
+ remoteConnection?: RemoteConnection;
+
+ /** Static connect() parameters for a remote connection (protocol, headers, …). @internal */
+ remoteConnectionParameters?: RemoteConnectionParameters;
+
constructor(library: Library, options: BrowserPluginOptions = {}) {
const {
launchOptions = {} as LibraryOptions,
@@ -137,6 +150,54 @@ export abstract class BrowserPlugin<
this.ignoreProxyCertificate = ignoreProxyCertificate;
}
+ /**
+ * Configures this plugin to connect to a remote browser using the given {@apilink RemoteConnection}.
+ * Called by {@apilink RemoteBrowserPool}; subclasses may override to apply library-specific defaults
+ * (e.g. forcing incognito pages).
+ *
+ * @internal
+ */
+ useRemoteConnection(connection: RemoteConnection, parameters: RemoteConnectionParameters = {}): void {
+ this.remoteConnection = connection;
+ this.remoteConnectionParameters = parameters;
+ }
+
+ /**
+ * Resolves a remote endpoint via the injected {@apilink RemoteConnection}, stores the session token on
+ * the launch context (so the controller can release it on close), and runs the library-specific `connect`.
+ * On failure the session is released and the error is wrapped in a {@apilink BrowserLaunchError}.
+ *
+ * Subclasses implement only the `connect` callback — the resolve / token / release / error-wrap scaffolding
+ * lives here so it stays identical across plugins.
+ */
+ protected async _connectToRemoteBrowser(
+ launchContext: LaunchContext,
+ connect: (url: string) => Promise,
+ ): Promise {
+ const connection = this.remoteConnection!;
+
+ let url: string;
+ let token: number;
+ try {
+ ({ url, token } = await connection.resolve({ proxyUrl: launchContext.proxyUrl }));
+ } catch (cause) {
+ throw new BrowserLaunchError('Failed to resolve the remote browser endpoint.', { cause });
+ }
+
+ launchContext._remoteToken = token;
+
+ try {
+ return await connect(url);
+ } catch (cause) {
+ await connection.release(token);
+ throw new BrowserLaunchError(
+ `Failed to connect to remote browser at "${sanitizeEndpointForLog(url)}". ` +
+ 'Check that the endpoint is reachable and accepts the configured protocol.',
+ { cause },
+ );
+ }
+ }
+
/**
* Creates a `LaunchContext` with all the information needed
* to launch a browser. Aside from library specific launch options,
@@ -154,6 +215,7 @@ export abstract class BrowserPlugin<
userDataDir = this.userDataDir,
browserPerProxy = this.browserPerProxy,
ignoreProxyCertificate = this.ignoreProxyCertificate,
+ isRemote = !!this.remoteConnection,
} = options;
return new LaunchContext({
@@ -165,6 +227,7 @@ export abstract class BrowserPlugin<
userDataDir,
browserPerProxy,
ignoreProxyCertificate,
+ isRemote,
});
}
@@ -188,15 +251,16 @@ export abstract class BrowserPlugin<
NewPageResult
> = this.createLaunchContext(),
): Promise {
+ // launchOptions is only used by the local launch path below — remote connections ignore it.
launchContext.launchOptions ??= {} as LibraryOptions;
const { proxyUrl, launchOptions } = launchContext;
- if (proxyUrl) {
+ if (proxyUrl && !launchContext.isRemote) {
await this._addProxyToLaunchOptions(launchContext);
}
- if (this._isChromiumBasedBrowser(launchContext)) {
+ if (!launchContext.isRemote && this._isChromiumBasedBrowser(launchContext)) {
// This will set the args for chromium based browsers to hide the webdriver.
(launchOptions as Dictionary).args = this._mergeArgsToHideWebdriver(launchOptions!.args);
// When User-Agent is not set, and we're using Chromium in headless mode,
@@ -208,6 +272,10 @@ export abstract class BrowserPlugin<
}
}
+ if (launchContext.isRemote) {
+ this.log.info('Connecting to remote browser (skipping local proxy and webdriver stealth configuration).');
+ }
+
return this._launch(launchContext);
}
diff --git a/packages/browser-pool/src/browser-pool.ts b/packages/browser-pool/src/browser-pool.ts
index 7ec5e1b37ca8..2776913cbe63 100644
--- a/packages/browser-pool/src/browser-pool.ts
+++ b/packages/browser-pool/src/browser-pool.ts
@@ -307,6 +307,7 @@ export class BrowserPool<
{
browserPlugins: BrowserPlugins;
maxOpenPagesPerBrowser: number;
+ maxOpenBrowsers: number;
retireBrowserAfterPageCount: number;
operationTimeoutMillis: number;
closeInactiveBrowserAfterMillis: number;
@@ -399,6 +400,7 @@ export class BrowserPool<
this.browserPlugins = browserPlugins as unknown as BrowserPlugins;
this.maxOpenPagesPerBrowser = maxOpenPagesPerBrowser;
+ this.maxOpenBrowsers = Infinity;
this.retireBrowserAfterPageCount = retireBrowserAfterPageCount;
this.operationTimeoutMillis = operationTimeoutSecs * 1000;
this.closeInactiveBrowserAfterMillis = closeInactiveBrowserAfterSecs * 1000;
@@ -931,6 +933,28 @@ export class BrowserPool<
}
}
+ /**
+ * Returns `true` if the pool can accept a new browser launch without exceeding
+ * {@link BrowserPoolOptions.maxOpenBrowsers}. Counts starting, active, and retired browsers.
+ */
+ hasFreeBrowserSlot(): boolean {
+ const total =
+ this.startingBrowserControllers.size +
+ this.activeBrowserControllers.size +
+ this.retiredBrowserControllers.size;
+ return total < this.maxOpenBrowsers;
+ }
+
+ /**
+ * Returns `true` if any active browser has room for another page.
+ */
+ hasActiveBrowserWithFreeCapacity(): boolean {
+ for (const controller of this.activeBrowserControllers) {
+ if (controller.activePages < this.maxOpenPagesPerBrowser) return true;
+ }
+ return false;
+ }
+
private _initializeFingerprinting(): void {
const { useFingerprintCache = true, fingerprintCacheSize = 10_000 } = this.fingerprintOptions;
this.fingerprintGenerator = new FingerprintGenerator(this.fingerprintOptions.fingerprintGeneratorOptions);
diff --git a/packages/browser-pool/src/fingerprinting/hooks.ts b/packages/browser-pool/src/fingerprinting/hooks.ts
index 07096e416890..9a95752ad079 100644
--- a/packages/browser-pool/src/fingerprinting/hooks.ts
+++ b/packages/browser-pool/src/fingerprinting/hooks.ts
@@ -34,6 +34,9 @@ export function createFingerprintPreLaunchHook(browserPool: BrowserPool {
+ // Remote browsers may have their own fingerprinting — skip local fingerprint injection
+ if (launchContext.isRemote) return;
+
const { useIncognitoPages } = launchContext;
const session = launchContext.session as ISession | undefined;
const cacheKey = session?.id ?? launchContext.proxyUrl;
@@ -75,6 +78,7 @@ export function createFingerprintPreLaunchHook(browserPool: BrowserPool {
const { launchContext, browserPlugin } = browserController;
+ if (launchContext.isRemote) return;
const { fingerprint } = launchContext.fingerprint!;
if (launchContext.useIncognitoPages && browserPlugin instanceof PlaywrightPlugin && pageOptions) {
@@ -93,6 +97,7 @@ export function createPrePageCreateHook() {
export function createPostPageCreateHook(fingerprintInjector: FingerprintInjector) {
return async (page: any, browserController: BrowserController): Promise => {
const { browserPlugin, launchContext } = browserController;
+ if (launchContext.isRemote) return;
const fingerprint = launchContext.fingerprint!;
// TODO this will require refactoring, we should use common API instead of branching based on plugin type,
diff --git a/packages/browser-pool/src/index.ts b/packages/browser-pool/src/index.ts
index f8502a42dbe6..9f3b49455248 100644
--- a/packages/browser-pool/src/index.ts
+++ b/packages/browser-pool/src/index.ts
@@ -51,6 +51,16 @@ export type {
export { BrowserPlugin, BrowserLaunchError, DEFAULT_USER_AGENT } from './abstract-classes/browser-plugin.js';
export type { LaunchContextOptions } from './launch-context.js';
export { LaunchContext } from './launch-context.js';
+export { RemoteBrowserProvider } from './remote-browser-provider.js';
+export { RemoteBrowserPool } from './remote-browser-pool.js';
+export type {
+ RemoteBrowserPoolOptions,
+ CrawlerRemoteBrowserOptions,
+ RemoteBrowserEndpoint,
+ ResolvedRemoteEndpoint,
+ RemoteConnection,
+ RemoteConnectionParameters,
+} from './remote-browser-pool.js';
export type { InferBrowserPluginArray, UnwrapPromise } from './utils.js';
export { anonymizeProxySugar, type AnonymizeProxySugarOptions } from './anonymize-proxy.js';
export type { IBrowserPool, NewPageOptions } from '@crawlee/types';
diff --git a/packages/browser-pool/src/launch-context.ts b/packages/browser-pool/src/launch-context.ts
index b433f8cb20f7..4bbec3236835 100644
--- a/packages/browser-pool/src/launch-context.ts
+++ b/packages/browser-pool/src/launch-context.ts
@@ -56,6 +56,12 @@ export interface LaunchContextOptions<
* This is useful when using HTTPS proxies with self-signed certificates.
*/
ignoreProxyCertificate?: boolean;
+ /**
+ * Whether this launch context represents a connection to a remote browser
+ * rather than a locally launched one.
+ * @default false
+ */
+ isRemote?: boolean;
}
export class LaunchContext<
@@ -71,12 +77,21 @@ export class LaunchContext<
useIncognitoPages: boolean;
browserPerProxy?: boolean;
userDataDir: string;
+ readonly isRemote: boolean;
ignoreProxyCertificate?: boolean;
private _proxyUrl?: string;
private readonly _reservedFieldNames = [...Reflect.ownKeys(this), 'extend'];
fingerprint?: BrowserFingerprintWithHeaders;
+
+ /**
+ * Token identifying the remote browser session this context connected to, set by the plugin and read by
+ * the {@apilink BrowserController} to release the session on close. Only present for remote connections.
+ * @internal
+ */
+ _remoteToken?: number;
+
[K: PropertyKey]: unknown;
constructor(options: LaunchContextOptions) {
@@ -89,6 +104,7 @@ export class LaunchContext<
browserPerProxy,
userDataDir = '',
ignoreProxyCertificate,
+ isRemote,
} = options;
this.id = id;
@@ -98,6 +114,7 @@ export class LaunchContext<
this.useIncognitoPages = useIncognitoPages ?? false;
this.userDataDir = userDataDir;
this.ignoreProxyCertificate = ignoreProxyCertificate ?? false;
+ this.isRemote = isRemote ?? false;
this._proxyUrl = proxyUrl;
}
diff --git a/packages/browser-pool/src/playwright/playwright-browser.ts b/packages/browser-pool/src/playwright/playwright-browser.ts
index 2b94fce5421e..c1e2c65b8ef8 100644
--- a/packages/browser-pool/src/playwright/playwright-browser.ts
+++ b/packages/browser-pool/src/playwright/playwright-browser.ts
@@ -21,7 +21,6 @@ export class PlaywrightBrowser extends EventEmitter {
const { browserContext, version } = options;
this._browserContext = browserContext;
-
this._version = version;
this._browserContext.once('close', () => {
diff --git a/packages/browser-pool/src/playwright/playwright-controller.ts b/packages/browser-pool/src/playwright/playwright-controller.ts
index 0f7a4c1bf539..aeb927ff5f13 100644
--- a/packages/browser-pool/src/playwright/playwright-controller.ts
+++ b/packages/browser-pool/src/playwright/playwright-controller.ts
@@ -45,6 +45,11 @@ export class PlaywrightController extends BrowserController<
...contextOptions,
};
+ // Remote browsers handle their own proxy — don't inject local proxy settings into context
+ if (this.launchContext.isRemote) {
+ delete contextOptions?.proxy;
+ }
+
if (contextOptions?.proxy) {
const [anonymizedProxyUrl, closeProxy] = await anonymizeProxySugar(
contextOptions.proxy.server,
diff --git a/packages/browser-pool/src/playwright/playwright-plugin.ts b/packages/browser-pool/src/playwright/playwright-plugin.ts
index a48cf1fedfec..bd71092fcf0f 100644
--- a/packages/browser-pool/src/playwright/playwright-plugin.ts
+++ b/packages/browser-pool/src/playwright/playwright-plugin.ts
@@ -7,6 +7,7 @@ import { anonymizeProxySugar } from '../anonymize-proxy.js';
import type { createProxyServerForContainers } from '../container-proxy-server.js';
import type { LaunchContext } from '../launch-context.js';
import { getLocalProxyAddress } from '../proxy-server.js';
+import type { RemoteConnection, RemoteConnectionParameters } from '../remote-browser-pool.js';
import type { SafeParameters } from '../utils.js';
import { PlaywrightBrowser as PlaywrightBrowserWithPersistentContext } from './playwright-browser.js';
import { PlaywrightController } from './playwright-controller.js';
@@ -19,7 +20,34 @@ export class PlaywrightPlugin extends BrowserPlugin<
private _browserVersion?: string;
_containerProxyServer?: Awaited>;
+ /**
+ * Playwright remote connections only support incognito pages — `connect()` / `connectOverCDP()` don't
+ * accept persistent contexts. Force it on (and inform the user) when wired for a remote connection.
+ */
+ override useRemoteConnection(connection: RemoteConnection, parameters: RemoteConnectionParameters = {}): void {
+ super.useRemoteConnection(connection, parameters);
+
+ if (!this.useIncognitoPages) {
+ this.log.info(
+ 'Remote Playwright connection — useIncognitoPages forced to true. ' +
+ 'Pages will not share cookies/storage between each other; use the SessionPool for shared state.',
+ );
+ }
+ this.useIncognitoPages = true;
+ }
+
protected async _launch(launchContext: LaunchContext): Promise {
+ if (this.remoteConnection) {
+ return this._connectToRemoteBrowser(launchContext, async (url) => {
+ const connectOptions = (this.remoteConnectionParameters?.connectOptions ?? {}) as any;
+ if (this.remoteConnectionParameters?.protocol === 'playwright') {
+ this.log.info('Connecting to remote browser via connect (Playwright WebSocket).');
+ return this.library.connect(url, connectOptions);
+ }
+ this.log.info('Connecting to remote browser via connectOverCDP.');
+ return this.library.connectOverCDP(url, connectOptions);
+ });
+ }
const { launchOptions, useIncognitoPages, userDataDir, proxyUrl } = launchContext;
let browser: PlaywrightBrowser;
diff --git a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts
index 91ea817d03a9..6e18b22b21cb 100644
--- a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts
+++ b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts
@@ -7,6 +7,7 @@ import type * as PuppeteerTypes from 'puppeteer';
import { BrowserPlugin } from '../abstract-classes/browser-plugin.js';
import { anonymizeProxySugar } from '../anonymize-proxy.js';
import type { LaunchContext } from '../launch-context.js';
+import type { RemoteConnection, RemoteConnectionParameters } from '../remote-browser-pool.js';
import { noop } from '../utils.js';
import type { PuppeteerNewPageOptions } from './puppeteer-controller.js';
import { PuppeteerController } from './puppeteer-controller.js';
@@ -19,6 +20,18 @@ export class PuppeteerPlugin extends BrowserPlugin<
PuppeteerTypes.Browser,
PuppeteerNewPageOptions
> {
+ /** Pages share cookies/storage on the remote browser (Puppeteer defaults to non-incognito). */
+ override useRemoteConnection(connection: RemoteConnection, parameters: RemoteConnectionParameters = {}): void {
+ super.useRemoteConnection(connection, parameters);
+
+ if (!this.useIncognitoPages) {
+ this.log.info(
+ 'Remote Puppeteer connection — pages will share cookies and storage on the remote ' +
+ 'browser instance (useIncognitoPages defaults to false).',
+ );
+ }
+ }
+
protected async _launch(
launchContext: LaunchContext<
typeof Puppeteer,
@@ -38,71 +51,74 @@ export class PuppeteerPlugin extends BrowserPlugin<
// ignore
}
- const {
- launchOptions,
- userDataDir,
- useIncognitoPages,
- experimentalContainers,
- proxyUrl,
- ignoreProxyCertificate,
- } = launchContext;
-
- if (experimentalContainers) {
- throw new Error('Experimental containers are only available with Playwright');
- }
-
- launchOptions!.userDataDir = launchOptions!.userDataDir ?? userDataDir;
-
- if (launchOptions!.headless === false) {
- if (Array.isArray(launchOptions!.args)) {
- launchOptions!.args.push('--disable-site-isolation-trials');
- } else {
- launchOptions!.args = ['--disable-site-isolation-trials'];
- }
- }
-
- if (launchOptions!.headless === true && oldPuppeteerVersion) {
- launchOptions!.headless = 'new' as any;
- }
+ const { useIncognitoPages, proxyUrl, ignoreProxyCertificate } = launchContext;
let browser: PuppeteerTypes.Browser;
- {
- const [anonymizedProxyUrl, close] = await anonymizeProxySugar(proxyUrl, undefined, undefined, {
- ignoreProxyCertificate: launchContext.ignoreProxyCertificate,
+ if (this.remoteConnection) {
+ browser = await this._connectToRemoteBrowser(launchContext, async (url) => {
+ const connectOptions = this.remoteConnectionParameters?.connectOptions ?? {};
+ this.log.info('Connecting to remote browser via connect (CDP).');
+ return this.library.connect({ ...connectOptions, browserWSEndpoint: url });
});
+ } else {
+ const { launchOptions, userDataDir, experimentalContainers } = launchContext;
+
+ if (experimentalContainers) {
+ throw new Error('Experimental containers are only available with Playwright');
+ }
- if (proxyUrl) {
- const proxyArg = `${PROXY_SERVER_ARG}${anonymizedProxyUrl ?? proxyUrl}`;
+ launchOptions!.userDataDir = launchOptions!.userDataDir ?? userDataDir;
+ if (launchOptions!.headless === false) {
if (Array.isArray(launchOptions!.args)) {
- launchOptions!.args.push(proxyArg);
+ launchOptions!.args.push('--disable-site-isolation-trials');
} else {
- launchOptions!.args = [proxyArg];
+ launchOptions!.args = ['--disable-site-isolation-trials'];
}
}
- try {
- browser = await this.library.launch(launchOptions);
+ if (launchOptions!.headless === true && oldPuppeteerVersion) {
+ launchOptions!.headless = 'new' as any;
+ }
- if (anonymizedProxyUrl) {
- browser.on('disconnected', async () => {
- await close();
- });
+ {
+ const [anonymizedProxyUrl, close] = await anonymizeProxySugar(proxyUrl, undefined, undefined, {
+ ignoreProxyCertificate: launchContext.ignoreProxyCertificate,
+ });
+
+ if (proxyUrl) {
+ const proxyArg = `${PROXY_SERVER_ARG}${anonymizedProxyUrl ?? proxyUrl}`;
+
+ if (Array.isArray(launchOptions!.args)) {
+ launchOptions!.args.push(proxyArg);
+ } else {
+ launchOptions!.args = [proxyArg];
+ }
+ }
+
+ try {
+ browser = await this.library.launch(launchOptions);
+
+ if (anonymizedProxyUrl) {
+ browser.on('disconnected', async () => {
+ await close();
+ });
+ }
+ } catch (error: any) {
+ await close();
+
+ this._throwAugmentedLaunchError(
+ error,
+ launchContext.launchOptions?.executablePath,
+ '`apify/actor-node-puppeteer-chrome`',
+ "Try installing a browser, if it's missing, by running `npx @puppeteer/browsers install chromium --path [path]` and pointing `executablePath` to the downloaded executable (https://pptr.dev/browsers-api)",
+ );
}
- } catch (error: any) {
- await close();
-
- this._throwAugmentedLaunchError(
- error,
- launchContext.launchOptions?.executablePath,
- '`apify/actor-node-puppeteer-chrome`',
- "Try installing a browser, if it's missing, by running `npx @puppeteer/browsers install chromium --path [path]` and pointing `executablePath` to the downloaded executable (https://pptr.dev/browsers-api)",
- );
}
}
- browser.on('targetcreated', async (target: PuppeteerTypes.Target) => {
+ const targetCreatedHandler = async (target: PuppeteerTypes.Target) => {
try {
const page = await target.page();
@@ -115,7 +131,16 @@ export class PuppeteerPlugin extends BrowserPlugin<
} catch (error: any) {
this.log.exception(error, 'Failed to retrieve page from target.');
}
- });
+ };
+
+ browser.on('targetcreated', targetCreatedHandler);
+
+ // Clean up the listener when a remote browser disconnects to prevent leaks
+ if (this.remoteConnection) {
+ browser.once('disconnected', () => {
+ browser.off('targetcreated', targetCreatedHandler);
+ });
+ }
const boundMethods = (
[
@@ -142,30 +167,35 @@ export class PuppeteerPlugin extends BrowserPlugin<
let page: PuppeteerTypes.Page;
if (useIncognitoPages) {
- const [anonymizedProxyUrl, close] = await anonymizeProxySugar(
- proxyUrl,
- undefined,
- undefined,
- { ignoreProxyCertificate },
- );
+ // Skip proxy setup for remote connections — proxy is managed by the remote service.
+ const effectiveProxyUrl = this.remoteConnection ? undefined : proxyUrl;
+ const [anonymizedProxyUrl, close] = effectiveProxyUrl
+ ? await anonymizeProxySugar(effectiveProxyUrl, undefined, undefined, {
+ ignoreProxyCertificate,
+ })
+ : ([undefined, noop] as const);
+
+ const proxyServer = anonymizedProxyUrl ?? effectiveProxyUrl;
+ const contextOptions = proxyServer ? { proxyServer } : {};
+ const context = (await (browser as any)[method](
+ contextOptions,
+ )) as PuppeteerTypes.BrowserContext;
try {
- const context = (await (browser as any)[method]({
- proxyServer: anonymizedProxyUrl ?? proxyUrl,
- })) as PuppeteerTypes.BrowserContext;
-
page = await context.newPage(...args);
-
- if (anonymizedProxyUrl) {
- page.on('close', async () => {
- await close();
- });
- }
} catch (error) {
+ await context.close().catch(noop);
await close();
throw error;
}
+
+ page.once('close', async () => {
+ if (anonymizedProxyUrl) {
+ await close();
+ }
+ await context.close().catch(noop);
+ });
} else {
page = await boundMethods.newPage(...args);
}
diff --git a/packages/browser-pool/src/remote-browser-pool.ts b/packages/browser-pool/src/remote-browser-pool.ts
new file mode 100644
index 000000000000..1d27353593cc
--- /dev/null
+++ b/packages/browser-pool/src/remote-browser-pool.ts
@@ -0,0 +1,318 @@
+import { type CrawleeLogger, serviceLocator } from '@crawlee/core';
+import type { IBrowserPool, NewPageOptions, PageState } from '@crawlee/types';
+
+import type { BrowserPlugin } from './abstract-classes/browser-plugin.js';
+import { BrowserPool } from './browser-pool.js';
+import type { BrowserPoolHooks, BrowserPoolOptions } from './browser-pool.js';
+import { BROWSER_POOL_EVENTS } from './events.js';
+import { RemoteBrowserProvider } from './remote-browser-provider.js';
+
+/**
+ * The result of resolving a remote browser endpoint: the URL to connect to plus an optional opaque
+ * `context` object that is handed back to `release`.
+ */
+export interface ResolvedRemoteEndpoint {
+ /** The browser endpoint URL to connect to. */
+ url: string;
+ /** Opaque metadata passed back to `release()` — e.g. session IDs, API tokens. */
+ context?: Record;
+}
+
+/**
+ * A remote browser endpoint: either a static URL string, or a function called once per browser launch
+ * that returns a URL (optionally with a `context` for `release`).
+ *
+ * The function receives the `proxyUrl` resolved by Crawlee's proxy configuration for the launch, so it
+ * can forward it to the remote service's proxy API.
+ */
+export type RemoteBrowserEndpoint =
+ | string
+ | ((options?: { proxyUrl?: string }) => string | ResolvedRemoteEndpoint | Promise);
+
+/**
+ * The bridge a {@apilink RemoteBrowserPool} injects into a {@apilink BrowserPlugin} so the plugin can
+ * connect to a remote browser without owning any remote-session policy.
+ *
+ * The plugin only knows how to make the library-specific `connect()` call; everything else — resolving
+ * the endpoint, calling the user's `release()`, and guaranteeing release fires at most once — lives in
+ * the pool. The plugin calls {@apilink RemoteConnection.resolve|resolve} before connecting, stores the
+ * returned `token` on its launch context, and the controller later calls
+ * {@apilink RemoteConnection.release|release} with that token when the browser closes.
+ *
+ * @internal
+ */
+export interface RemoteConnection {
+ /** Resolves the endpoint for a single browser launch. The `token` identifies the session for release. */
+ resolve(options?: { proxyUrl?: string }): Promise<{ url: string; token: number }>;
+ /** Releases the remote session for `token`. Idempotent — safe to call from both `close()` and `kill()`. */
+ release(token: number): Promise;
+}
+
+/**
+ * Owns the lifecycle of remote browser sessions for a single {@apilink RemoteBrowserPool}: endpoint
+ * resolution, the user's `release()` callback, and a release-at-most-once guarantee. Implements
+ * {@apilink RemoteConnection} so it can be injected into a plugin.
+ */
+class RemoteSessionRegistry implements RemoteConnection {
+ private readonly sessions = new Map<
+ number,
+ { url: string; context?: Record; released: boolean }
+ >();
+ private nextToken = 0;
+
+ constructor(
+ private readonly endpoint: RemoteBrowserEndpoint,
+ private readonly onRelease:
+ | ((info: { endpoint: string; context?: Record }) => unknown)
+ | undefined,
+ private readonly log: CrawleeLogger,
+ ) {}
+
+ async resolve(options?: { proxyUrl?: string }): Promise<{ url: string; token: number }> {
+ const resolved = typeof this.endpoint === 'function' ? await this.endpoint(options) : this.endpoint;
+
+ let result: ResolvedRemoteEndpoint;
+ if (typeof resolved === 'string') {
+ if (!resolved) throw new Error('Remote browser endpoint resolved to an empty string.');
+ result = { url: resolved };
+ } else if (!resolved?.url) {
+ throw new Error("Remote browser endpoint() must return a URL string or an object with a non-empty 'url'.");
+ } else {
+ result = resolved;
+ }
+
+ const token = this.nextToken++;
+ this.sessions.set(token, { url: result.url, context: result.context, released: false });
+ return { url: result.url, token };
+ }
+
+ async release(token: number): Promise {
+ const session = this.sessions.get(token);
+ // Release at most once per session — guards a close()/teardown race (the `released` flag is set
+ // synchronously before the awaited onRelease, so releaseAll() can't double-fire an in-flight release).
+ if (!session || session.released) return;
+ session.released = true;
+
+ try {
+ await this.onRelease?.({ endpoint: session.url, context: session.context });
+ } catch (err) {
+ this.log.warning('Remote browser release() failed.', { error: (err as Error)?.message });
+ } finally {
+ this.sessions.delete(token);
+ }
+ }
+
+ /** Releases every session that is still open. Called on pool teardown so no remote session leaks. */
+ async releaseAll(): Promise {
+ await Promise.all([...this.sessions.keys()].map(async (token) => this.release(token)));
+ }
+}
+
+/**
+ * Per-plugin remote connection parameters, passed to {@apilink BrowserPlugin.useRemoteConnection}.
+ * The endpoint is supplied per-launch via {@apilink RemoteConnection}; these are the static connect()
+ * parameters (protocol, headers, timeouts, …).
+ */
+export interface RemoteConnectionParameters {
+ /**
+ * Playwright only: which protocol to connect with. `'cdp'` uses `connectOverCDP()` (the default),
+ * `'playwright'` uses `connect()` (Playwright's own WebSocket protocol). Ignored by Puppeteer.
+ */
+ protocol?: 'cdp' | 'playwright';
+ /** Extra options forwarded to the library `connect()` / `connectOverCDP()` call (endpoint excluded). */
+ connectOptions?: Record;
+}
+
+export interface RemoteBrowserPoolOptions {
+ /**
+ * The browser plugin(s) used to connect to the remote service — e.g. `new PlaywrightPlugin(playwright.chromium)`
+ * or `new PuppeteerPlugin(puppeteer)`. The pool configures them for remote connection; do not set a local
+ * `launchOptions` on them.
+ */
+ browserPlugins: BrowserPlugin[];
+ /**
+ * The remote browser endpoint: a static URL, a function returning one per launch, or a
+ * {@apilink RemoteBrowserProvider} instance encapsulating a session create/release lifecycle.
+ */
+ endpoint: RemoteBrowserEndpoint | RemoteBrowserProvider;
+ /**
+ * Cleanup callback invoked when a browser closes, crashes, or the pool is destroyed. Receives the
+ * `context` returned by a function endpoint. Errors are caught and logged. Ignored when `endpoint`
+ * is a {@apilink RemoteBrowserProvider} (its own `release()` is used instead).
+ */
+ release?: (info: { endpoint: string; context?: Record }) => unknown;
+ /**
+ * Maximum number of remote browsers open at once. When reached, {@apilink RemoteBrowserPool.newPage|newPage}
+ * waits for a browser to close before connecting a new one. Set it to your service's concurrent-session limit
+ * to avoid `429` errors. Defaults to the {@apilink RemoteBrowserProvider.maxOpenBrowsers|provider's value}, or
+ * `Infinity`.
+ */
+ maxOpenBrowsers?: number;
+ /** Static connect() parameters (Playwright protocol selection, headers, timeouts, …). */
+ connection?: RemoteConnectionParameters;
+ /** Extra {@apilink BrowserPool} options (lifecycle hooks, page limits, fingerprinting, …). */
+ browserPoolOptions?: Omit & BrowserPoolHooks;
+ /** Fallback poll interval (ms) while waiting for a free browser slot. The wait is event-driven; this only bounds it. @default 500 */
+ slotPollIntervalMillis?: number;
+}
+
+/**
+ * The remote-connection configuration a browser crawler accepts on its `remoteBrowser` option. It is the
+ * {@apilink RemoteBrowserPoolOptions} a user supplies *minus* the parts the crawler provides itself — the
+ * `browserPlugins` (the crawler builds the correct one for its browser) and `browserPoolOptions` (taken from
+ * the crawler's own `browserPoolOptions`). This is what makes the crawler path both terse and mismatch-proof.
+ */
+export type CrawlerRemoteBrowserOptions = Omit;
+
+/**
+ * An {@apilink IBrowserPool} implementation for remote browser services.
+ *
+ * Unlike configuring a remote browser through a crawler's `launchContext`, this pool is the single owner
+ * of all remote-session concerns:
+ * - **endpoint resolution** — static URL, per-launch function, or {@apilink RemoteBrowserProvider};
+ * - **release lifecycle** — `release()` fires exactly once per session on close/crash/teardown (no leaks,
+ * no double-release);
+ * - **concurrency** — {@apilink RemoteBrowserPoolOptions.maxOpenBrowsers|maxOpenBrowsers} is enforced inside
+ * {@apilink RemoteBrowserPool.newPage|newPage}, which waits for a free slot rather than overshooting.
+ *
+ * The wrapped {@apilink BrowserPool} and its plugin only perform the library-specific `connect()` call.
+ *
+ * Pass an instance as the crawler's `browserPool` option:
+ *
+ * ```typescript
+ * import { PlaywrightPlugin, RemoteBrowserPool } from '@crawlee/browser-pool';
+ * import { PlaywrightCrawler } from 'crawlee';
+ * import playwright from 'playwright';
+ *
+ * const browserPool = new RemoteBrowserPool({
+ * browserPlugins: [new PlaywrightPlugin(playwright.chromium)],
+ * endpoint: 'wss://production-sfo.browserless.io?token=xxx',
+ * maxOpenBrowsers: 2,
+ * });
+ *
+ * const crawler = new PlaywrightCrawler({ browserPool });
+ * ```
+ *
+ * @category Browser management
+ */
+export class RemoteBrowserPool implements IBrowserPool {
+ /** The wrapped pool that performs the remote connections and serves pages. */
+ readonly browserPool: BrowserPool;
+
+ /** The wrapped pool viewed through the {@apilink IBrowserPool} contract (the bare type widens pages to `never`). */
+ private readonly pool: IBrowserPool;
+
+ private readonly registry: RemoteSessionRegistry;
+ private readonly slotPollIntervalMillis: number;
+ private readonly log: CrawleeLogger;
+
+ /** Shared by all `newPage` callers waiting for a free slot, so they don't each register their own listeners. */
+ private _capacityChange?: Promise;
+
+ constructor(options: RemoteBrowserPoolOptions) {
+ const {
+ browserPlugins,
+ endpoint,
+ release,
+ maxOpenBrowsers,
+ connection = {},
+ browserPoolOptions = {},
+ slotPollIntervalMillis = 500,
+ } = options;
+
+ this.log = serviceLocator.getLogger().child({ prefix: 'RemoteBrowserPool' });
+ this.slotPollIntervalMillis = slotPollIntervalMillis;
+
+ // A RemoteBrowserProvider carries its own endpoint, release, and maxOpenBrowsers.
+ const provider = endpoint instanceof RemoteBrowserProvider ? endpoint : undefined;
+ const resolvedEndpoint: RemoteBrowserEndpoint = provider
+ ? (opts) => provider.connect(opts)
+ : (endpoint as RemoteBrowserEndpoint);
+ const resolvedRelease = provider
+ ? ({ context }: { context?: Record }) => provider.release(context as any)
+ : release;
+ const resolvedMax = maxOpenBrowsers ?? provider?.maxOpenBrowsers;
+
+ this.registry = new RemoteSessionRegistry(resolvedEndpoint, resolvedRelease, this.log);
+
+ // Wire every plugin for remote connection.
+ for (const plugin of browserPlugins) {
+ plugin.useRemoteConnection(this.registry, connection);
+ }
+
+ this.browserPool = new BrowserPool({ ...browserPoolOptions, browserPlugins }) as unknown as BrowserPool;
+ this.pool = this.browserPool as unknown as IBrowserPool;
+
+ if (resolvedMax !== undefined) {
+ this.browserPool.maxOpenBrowsers = resolvedMax;
+ }
+ }
+
+ /** Maximum number of remote browsers that may be open at the same time. */
+ get maxOpenBrowsers(): number {
+ return this.browserPool.maxOpenBrowsers;
+ }
+
+ set maxOpenBrowsers(value: number) {
+ this.browserPool.maxOpenBrowsers = value;
+ }
+
+ /**
+ * Opens a new page, waiting first until {@apilink RemoteBrowserPoolOptions.maxOpenBrowsers|maxOpenBrowsers}
+ * allows it (either a new browser slot is free, or an active browser still has page capacity).
+ */
+ async newPage(options?: NewPageOptions): Promise {
+ await this._waitForFreeSlot();
+ return this.pool.newPage(options);
+ }
+
+ async closePage(page: Page, options?: { error?: Error }): Promise {
+ return this.pool.closePage(page, options);
+ }
+
+ async extractPageState(page: Page): Promise {
+ return this.pool.extractPageState(page);
+ }
+
+ async injectPageState(page: Page, state: PageState): Promise {
+ return this.pool.injectPageState(page, state);
+ }
+
+ /** Closes all browsers, releases any still-open remote sessions, and tears down the wrapped pool. */
+ async destroy(): Promise {
+ await this.browserPool.destroy();
+ // Backstop: release any sessions whose browser never emitted a close (e.g. dropped on teardown).
+ await this.registry.releaseAll();
+ }
+
+ /** Resolves once the wrapped pool can serve another page without exceeding `maxOpenBrowsers`. */
+ private async _waitForFreeSlot(): Promise {
+ while (!this.browserPool.hasFreeBrowserSlot() && !this.browserPool.hasActiveBrowserWithFreeCapacity()) {
+ await this._nextCapacityChange();
+ }
+ }
+
+ /**
+ * Resolves on the next browser-retired / page-closed event, or after `slotPollIntervalMillis`. All
+ * concurrently-waiting `newPage` calls share a single promise (and a single pair of event listeners)
+ * per tick, so a fleet of saturated callers doesn't fan out into N listener pairs on the pool.
+ */
+ private _nextCapacityChange(): Promise {
+ this._capacityChange ??= new Promise((resolve) => {
+ const done = () => {
+ clearTimeout(timer);
+ this.browserPool.off(BROWSER_POOL_EVENTS.BROWSER_RETIRED, done);
+ this.browserPool.off(BROWSER_POOL_EVENTS.PAGE_CLOSED, done);
+ this._capacityChange = undefined;
+ resolve();
+ };
+
+ const timer = setTimeout(done, this.slotPollIntervalMillis);
+ timer.unref?.();
+ this.browserPool.once(BROWSER_POOL_EVENTS.BROWSER_RETIRED, done);
+ this.browserPool.once(BROWSER_POOL_EVENTS.PAGE_CLOSED, done);
+ });
+
+ return this._capacityChange;
+ }
+}
diff --git a/packages/browser-pool/src/remote-browser-provider.ts b/packages/browser-pool/src/remote-browser-provider.ts
new file mode 100644
index 000000000000..425d61ca65b1
--- /dev/null
+++ b/packages/browser-pool/src/remote-browser-provider.ts
@@ -0,0 +1,79 @@
+/**
+ * Abstract base class for remote browser service providers.
+ *
+ * Implement this class to encapsulate the lifecycle of a remote browser session
+ * (creation, connection URL resolution, and cleanup). {@apilink RemoteBrowserPool}
+ * calls {@link connect} once per browser launch and {@link release} when the browser
+ * closes, crashes, the pool is destroyed, or the connection fails during launch.
+ *
+ * Pass the provider instance as the `endpoint` of a {@apilink RemoteBrowserPool}, then
+ * hand the pool to a crawler via its `browserPool` option:
+ *
+ * ```typescript
+ * const browserPool = new RemoteBrowserPool({
+ * browserPlugins: [new PlaywrightPlugin(playwright.chromium)],
+ * endpoint: new MyProvider(),
+ * });
+ *
+ * const crawler = new PlaywrightCrawler({ browserPool });
+ * ```
+ *
+ * **Example — simple static endpoint (e.g. Browserless):**
+ * ```typescript
+ * class BrowserlessProvider extends RemoteBrowserProvider {
+ * maxOpenBrowsers = 2; // respect the service's concurrent session limit
+ *
+ * async connect() {
+ * return { url: `wss://production-sfo.browserless.io?token=${token}` };
+ * }
+ * }
+ * ```
+ *
+ * **Example — session lifecycle with concurrency limit (e.g. Browserbase):**
+ * ```typescript
+ * class BrowserbaseProvider extends RemoteBrowserProvider<{ id: string }> {
+ * maxOpenBrowsers = 2; // respect the service's concurrent session limit
+ *
+ * async connect({ proxyUrl } = {}) {
+ * const session = await createSession(apiKey, projectId, {
+ * proxies: proxyUrl ? [{ type: 'external', server: proxyUrl }] : undefined,
+ * });
+ * return { url: session.connectUrl, context: { id: session.id } };
+ * }
+ *
+ * async release(context: { id: string }) {
+ * await releaseSession(apiKey, context.id);
+ * }
+ * }
+ * ```
+ */
+export abstract class RemoteBrowserProvider = Record> {
+ /**
+ * Maximum number of browsers that can be open at the same time.
+ * Set this to your remote service's concurrent session limit to avoid 429 errors.
+ */
+ maxOpenBrowsers?: number;
+
+ /**
+ * Called once per browser launch. Return the WebSocket/CDP endpoint URL
+ * and an optional `context` object that will be passed back to {@link release}.
+ *
+ * @param options.proxyUrl - The proxy URL resolved by Crawlee's proxy configuration
+ * for this browser session. Pass it to your remote service's proxy API if supported.
+ */
+ abstract connect(options?: {
+ proxyUrl?: string;
+ }): Promise<{ url: string; context?: TContext }> | { url: string; context?: TContext };
+
+ /**
+ * Called when the browser closes, crashes, the pool is destroyed, or the
+ * connection fails right after {@link connect} succeeds.
+ * Override this to clean up remote sessions, release API resources, etc.
+ *
+ * Errors thrown here are caught and logged as warnings — they never crash the crawler.
+ * Safe to assume this is called at most once per {@link connect} call.
+ *
+ * @param _context The same `context` object returned by {@link connect}.
+ */
+ async release(_context: TContext): Promise {}
+}
diff --git a/packages/browser-pool/src/utils.ts b/packages/browser-pool/src/utils.ts
index ae224fee62e5..ca59455d72c5 100644
--- a/packages/browser-pool/src/utils.ts
+++ b/packages/browser-pool/src/utils.ts
@@ -6,6 +6,25 @@ export type UnwrapPromise = T extends PromiseLike ? UnwrapPromise
export function noop(..._args: unknown[]): void {}
+/**
+ * Strips secrets from a URL so it can be safely included in logs and error messages. Removes userinfo
+ * credentials and the entire query string and fragment — remote browser services routinely carry tokens
+ * there (e.g. Browserless `?token=…`), and we can't tell which params are sensitive. Keeps the
+ * protocol, host, port, and path, which are enough to diagnose connection failures.
+ */
+export function sanitizeEndpointForLog(endpoint: string): string {
+ try {
+ const url = new URL(endpoint);
+ url.username = '';
+ url.password = '';
+ url.search = '';
+ url.hash = '';
+ return url.toString();
+ } catch {
+ return '';
+ }
+}
+
/**
* This is required when using optional dependencies.
* Importing a type gives `any`, but `Parameters` gives `unknown[]` instead of `any`
diff --git a/packages/browser-pool/test/remote-browser-pool.test.ts b/packages/browser-pool/test/remote-browser-pool.test.ts
new file mode 100644
index 000000000000..7e5b48df5a09
--- /dev/null
+++ b/packages/browser-pool/test/remote-browser-pool.test.ts
@@ -0,0 +1,230 @@
+import { vi } from 'vitest';
+
+import { serviceLocator } from '@crawlee/core';
+import type { CrawleeLogger } from '@crawlee/core';
+
+import { BROWSER_POOL_EVENTS } from '../src/events.js';
+import type { RemoteConnection } from '../src/remote-browser-pool.js';
+import { RemoteBrowserPool } from '../src/remote-browser-pool.js';
+import { RemoteBrowserProvider } from '../src/remote-browser-provider.js';
+
+function createMockLogger(): CrawleeLogger {
+ const logger: any = {
+ child: vi.fn(() => logger),
+ error: vi.fn(),
+ exception: vi.fn(),
+ softFail: vi.fn(),
+ warning: vi.fn(),
+ warningOnce: vi.fn(),
+ info: vi.fn(),
+ debug: vi.fn(),
+ perf: vi.fn(),
+ deprecated: vi.fn(),
+ getOptions: vi.fn(() => ({})),
+ setOptions: vi.fn(),
+ setLevel: vi.fn(),
+ getLevel: vi.fn(),
+ };
+ return logger;
+}
+
+/**
+ * A stand-in plugin that captures the {@link RemoteConnection} the pool injects, so tests can drive
+ * endpoint resolution / release directly without launching a real browser.
+ */
+function createCapturingPlugin() {
+ let connection: RemoteConnection | undefined;
+ const plugin: any = {
+ useRemoteConnection: (conn: RemoteConnection) => {
+ connection = conn;
+ },
+ };
+ return { plugin, getConnection: () => connection! };
+}
+
+beforeEach(() => {
+ serviceLocator.setLogger(createMockLogger());
+});
+
+describe('RemoteBrowserPool — endpoint resolution', () => {
+ it('resolves a static string endpoint', async () => {
+ const { plugin, getConnection } = createCapturingPlugin();
+ const pool = new RemoteBrowserPool({ browserPlugins: [plugin], endpoint: 'wss://remote:9222' });
+
+ const { url, token } = await getConnection().resolve();
+
+ expect(url).toBe('wss://remote:9222');
+ expect(typeof token).toBe('number');
+ await pool.destroy();
+ });
+
+ it('resolves a function endpoint and forwards proxyUrl', async () => {
+ const endpoint = vi.fn(() => 'wss://dynamic:9222');
+ const { plugin, getConnection } = createCapturingPlugin();
+ const pool = new RemoteBrowserPool({ browserPlugins: [plugin], endpoint });
+
+ const { url } = await getConnection().resolve({ proxyUrl: 'http://proxy:8080' });
+
+ expect(url).toBe('wss://dynamic:9222');
+ expect(endpoint).toHaveBeenCalledWith({ proxyUrl: 'http://proxy:8080' });
+ await pool.destroy();
+ });
+
+ it('throws when an endpoint resolves to an empty string', async () => {
+ const { plugin, getConnection } = createCapturingPlugin();
+ const pool = new RemoteBrowserPool({ browserPlugins: [plugin], endpoint: () => '' });
+
+ await expect(getConnection().resolve()).rejects.toThrow(/empty string/);
+ await pool.destroy();
+ });
+
+ it('throws when a function endpoint returns an object without a url', async () => {
+ const { plugin, getConnection } = createCapturingPlugin();
+ const pool = new RemoteBrowserPool({ browserPlugins: [plugin], endpoint: () => ({}) as any });
+
+ await expect(getConnection().resolve()).rejects.toThrow(/non-empty 'url'/);
+ await pool.destroy();
+ });
+});
+
+describe('RemoteBrowserPool — release lifecycle', () => {
+ it('calls release with the context from a function endpoint, exactly once', async () => {
+ const release = vi.fn();
+ const { plugin, getConnection } = createCapturingPlugin();
+ const pool = new RemoteBrowserPool({
+ browserPlugins: [plugin],
+ endpoint: () => ({ url: 'wss://remote:9222', context: { id: 'sess-1' } }),
+ release,
+ });
+
+ const { token } = await getConnection().resolve();
+ await getConnection().release(token);
+ await getConnection().release(token); // second call must be a no-op (close()+kill())
+
+ expect(release).toHaveBeenCalledTimes(1);
+ expect(release).toHaveBeenCalledWith({ endpoint: 'wss://remote:9222', context: { id: 'sess-1' } });
+ await pool.destroy();
+ });
+
+ it('releases all still-open sessions on destroy()', async () => {
+ const release = vi.fn();
+ const { plugin, getConnection } = createCapturingPlugin();
+ const pool = new RemoteBrowserPool({ browserPlugins: [plugin], endpoint: 'wss://remote:9222', release });
+
+ await getConnection().resolve();
+ await getConnection().resolve();
+
+ await pool.destroy();
+
+ expect(release).toHaveBeenCalledTimes(2);
+ });
+
+ it('swallows errors thrown by release()', async () => {
+ const release = vi.fn(() => {
+ throw new Error('release boom');
+ });
+ const { plugin, getConnection } = createCapturingPlugin();
+ const pool = new RemoteBrowserPool({ browserPlugins: [plugin], endpoint: 'wss://remote:9222', release });
+
+ const { token } = await getConnection().resolve();
+ await expect(getConnection().release(token)).resolves.toBeUndefined();
+ await pool.destroy();
+ });
+});
+
+describe('RemoteBrowserPool — RemoteBrowserProvider endpoint', () => {
+ class TestProvider extends RemoteBrowserProvider<{ id: string }> {
+ override maxOpenBrowsers = 3;
+ connect = vi.fn(async () => ({ url: 'wss://provider:9222', context: { id: 'sess-1' } }));
+ override release = vi.fn(async () => {});
+ }
+
+ it('wires connect/release and adopts the provider maxOpenBrowsers', async () => {
+ const provider = new TestProvider();
+ const { plugin, getConnection } = createCapturingPlugin();
+ const pool = new RemoteBrowserPool({ browserPlugins: [plugin], endpoint: provider });
+
+ expect(pool.maxOpenBrowsers).toBe(3);
+
+ const { url, token } = await getConnection().resolve({ proxyUrl: 'http://proxy:8080' });
+ expect(url).toBe('wss://provider:9222');
+ expect(provider.connect).toHaveBeenCalledWith({ proxyUrl: 'http://proxy:8080' });
+
+ await getConnection().release(token);
+ expect(provider.release).toHaveBeenCalledWith({ id: 'sess-1' });
+ await pool.destroy();
+ });
+
+ it('an explicit maxOpenBrowsers overrides the provider value', async () => {
+ const { plugin } = createCapturingPlugin();
+ const pool = new RemoteBrowserPool({
+ browserPlugins: [plugin],
+ endpoint: new TestProvider(),
+ maxOpenBrowsers: 7,
+ });
+
+ expect(pool.maxOpenBrowsers).toBe(7);
+ await pool.destroy();
+ });
+});
+
+describe('RemoteBrowserPool — maxOpenBrowsers throttle', () => {
+ it('proxies maxOpenBrowsers to the wrapped pool', async () => {
+ const { plugin } = createCapturingPlugin();
+ const pool = new RemoteBrowserPool({
+ browserPlugins: [plugin],
+ endpoint: 'wss://remote:9222',
+ maxOpenBrowsers: 2,
+ });
+
+ expect(pool.browserPool.maxOpenBrowsers).toBe(2);
+ pool.maxOpenBrowsers = 5;
+ expect(pool.browserPool.maxOpenBrowsers).toBe(5);
+ await pool.destroy();
+ });
+
+ it('opens immediately when a browser slot is free', async () => {
+ const { plugin } = createCapturingPlugin();
+ const pool = new RemoteBrowserPool({
+ browserPlugins: [plugin],
+ endpoint: 'wss://remote:9222',
+ maxOpenBrowsers: 2,
+ });
+
+ pool.browserPool.hasFreeBrowserSlot = vi.fn(() => true);
+ pool.browserPool.hasActiveBrowserWithFreeCapacity = vi.fn(() => false);
+ const newPage = vi.fn(async () => ({ id: 'p' }));
+ (pool.browserPool as any).newPage = newPage;
+
+ await pool.newPage({ id: 'p' });
+ expect(newPage).toHaveBeenCalledOnce();
+ await pool.destroy();
+ });
+
+ it('waits while at capacity, then opens once a browser is retired', async () => {
+ const { plugin } = createCapturingPlugin();
+ const pool = new RemoteBrowserPool({
+ browserPlugins: [plugin],
+ endpoint: 'wss://remote:9222',
+ maxOpenBrowsers: 1,
+ slotPollIntervalMillis: 50,
+ });
+
+ let atCapacity = true;
+ pool.browserPool.hasFreeBrowserSlot = vi.fn(() => !atCapacity);
+ pool.browserPool.hasActiveBrowserWithFreeCapacity = vi.fn(() => false);
+ const newPage = vi.fn(async () => ({ id: 'p' }));
+ (pool.browserPool as any).newPage = newPage;
+
+ const pagePromise = pool.newPage();
+ await new Promise((r) => setTimeout(r, 20));
+ expect(newPage).not.toHaveBeenCalled();
+
+ atCapacity = false;
+ pool.browserPool.emit(BROWSER_POOL_EVENTS.BROWSER_RETIRED, {} as any);
+
+ await pagePromise;
+ expect(newPage).toHaveBeenCalledOnce();
+ await pool.destroy();
+ });
+});
diff --git a/packages/browser-pool/test/remote-browser.test.ts b/packages/browser-pool/test/remote-browser.test.ts
new file mode 100644
index 000000000000..d4d24a4fac46
--- /dev/null
+++ b/packages/browser-pool/test/remote-browser.test.ts
@@ -0,0 +1,211 @@
+import { vi } from 'vitest';
+
+import { serviceLocator } from '@crawlee/core';
+import type { CrawleeLogger } from '@crawlee/core';
+
+import { PlaywrightPlugin } from '../src/playwright/playwright-plugin.js';
+import { PuppeteerPlugin } from '../src/puppeteer/puppeteer-plugin.js';
+import type { RemoteConnection } from '../src/remote-browser-pool.js';
+
+// ---------------------------------------------------------------------------
+// Mock helpers
+// ---------------------------------------------------------------------------
+
+function createMockPage() {
+ return {
+ close: vi.fn().mockResolvedValue(undefined),
+ url: vi.fn(() => 'about:blank'),
+ on: vi.fn(),
+ once: vi.fn(),
+ };
+}
+
+function createMockBrowser() {
+ const page = createMockPage();
+ const mockContext = {
+ newPage: vi.fn().mockResolvedValue(page),
+ close: vi.fn().mockResolvedValue(undefined),
+ on: vi.fn(),
+ once: vi.fn(),
+ };
+ return {
+ newPage: vi.fn().mockResolvedValue(createMockPage()),
+ close: vi.fn().mockResolvedValue(undefined),
+ contexts: vi.fn(() => [mockContext]),
+ on: vi.fn(),
+ off: vi.fn(),
+ once: vi.fn(),
+ version: vi.fn(() => '120.0.0'),
+ pages: vi.fn(() => []),
+ process: vi.fn(() => null),
+ userAgent: vi.fn().mockResolvedValue('mock-ua'),
+ createBrowserContext: vi.fn().mockResolvedValue(mockContext),
+ createIncognitoBrowserContext: vi.fn().mockResolvedValue(mockContext),
+ };
+}
+
+function createMockPlaywrightLibrary(browser = createMockBrowser()) {
+ return {
+ launch: vi.fn().mockResolvedValue(browser),
+ connect: vi.fn().mockResolvedValue(browser),
+ connectOverCDP: vi.fn().mockResolvedValue(browser),
+ name: vi.fn(() => 'chromium'),
+ launchPersistentContext: vi.fn().mockResolvedValue(browser),
+ };
+}
+
+function createMockPuppeteerLibrary(browser = createMockBrowser()) {
+ return {
+ launch: vi.fn().mockResolvedValue(browser),
+ connect: vi.fn().mockResolvedValue(browser),
+ product: 'chrome',
+ };
+}
+
+function createMockLogger(): CrawleeLogger & { warning: ReturnType; info: ReturnType } {
+ const logger: any = {
+ child: vi.fn(() => logger),
+ error: vi.fn(),
+ exception: vi.fn(),
+ softFail: vi.fn(),
+ warning: vi.fn(),
+ warningOnce: vi.fn(),
+ info: vi.fn(),
+ debug: vi.fn(),
+ perf: vi.fn(),
+ deprecated: vi.fn(),
+ getOptions: vi.fn(() => ({})),
+ setOptions: vi.fn(),
+ setLevel: vi.fn(),
+ getLevel: vi.fn(),
+ };
+ return logger;
+}
+
+/** A fake {@link RemoteConnection} that resolves to a fixed URL and records release() calls. */
+function createConnection(url = 'wss://remote:9222', context?: Record): RemoteConnection & {
+ resolve: ReturnType;
+ release: ReturnType;
+} {
+ return {
+ resolve: vi.fn(async (_options?: { proxyUrl?: string }) => ({ url, token: 42, context })),
+ release: vi.fn(async () => {}),
+ } as any;
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+let mockLogger: ReturnType;
+
+beforeEach(() => {
+ mockLogger = createMockLogger();
+ serviceLocator.setLogger(mockLogger);
+});
+
+describe('Remote connection — PlaywrightPlugin', () => {
+ it('useRemoteConnection forces incognito pages on and marks the launch context remote', () => {
+ const plugin = new PlaywrightPlugin(createMockPlaywrightLibrary() as any, { useIncognitoPages: false });
+ plugin.useRemoteConnection(createConnection());
+
+ expect(plugin.useIncognitoPages).toBe(true);
+ expect(plugin.createLaunchContext().isRemote).toBe(true);
+ });
+
+ it('connects via connectOverCDP by default and skips a local launch', async () => {
+ const lib = createMockPlaywrightLibrary();
+ const plugin = new PlaywrightPlugin(lib as any);
+ const connection = createConnection('http://remote:9222');
+ plugin.useRemoteConnection(connection, { connectOptions: { timeout: 5000 } });
+
+ const ctx = plugin.createLaunchContext();
+ await plugin.launch(ctx);
+
+ expect(connection.resolve).toHaveBeenCalledTimes(1);
+ expect(lib.connectOverCDP).toHaveBeenCalledWith('http://remote:9222', { timeout: 5000 });
+ expect(lib.connect).not.toHaveBeenCalled();
+ expect(lib.launch).not.toHaveBeenCalled();
+ expect(ctx._remoteToken).toBe(42);
+ });
+
+ it("connects via connect() when protocol is 'playwright'", async () => {
+ const lib = createMockPlaywrightLibrary();
+ const plugin = new PlaywrightPlugin(lib as any);
+ plugin.useRemoteConnection(createConnection('ws://remote:3000'), { protocol: 'playwright' });
+
+ await plugin.launch(plugin.createLaunchContext());
+
+ expect(lib.connect).toHaveBeenCalledWith('ws://remote:3000', {});
+ expect(lib.connectOverCDP).not.toHaveBeenCalled();
+ });
+
+ it('releases the session and throws BrowserLaunchError when connect fails', async () => {
+ const lib = createMockPlaywrightLibrary();
+ lib.connectOverCDP.mockRejectedValueOnce(new Error('ECONNREFUSED'));
+ const plugin = new PlaywrightPlugin(lib as any);
+ const connection = createConnection();
+ plugin.useRemoteConnection(connection);
+
+ await expect(plugin.launch(plugin.createLaunchContext())).rejects.toThrow(/Failed to connect to remote browser/);
+ expect(connection.release).toHaveBeenCalledWith(42);
+ });
+
+ it('throws BrowserLaunchError (without connecting) when endpoint resolution fails', async () => {
+ const lib = createMockPlaywrightLibrary();
+ const plugin = new PlaywrightPlugin(lib as any);
+ const connection = createConnection();
+ connection.resolve.mockRejectedValueOnce(new Error('no session'));
+ plugin.useRemoteConnection(connection);
+
+ await expect(plugin.launch(plugin.createLaunchContext())).rejects.toThrow(/resolve the remote browser endpoint/);
+ expect(lib.connectOverCDP).not.toHaveBeenCalled();
+ expect(connection.release).not.toHaveBeenCalled();
+ });
+
+ it('a plain plugin (no remote connection) launches locally', async () => {
+ const lib = createMockPlaywrightLibrary();
+ const plugin = new PlaywrightPlugin(lib as any);
+
+ await plugin.launch(plugin.createLaunchContext());
+
+ expect(lib.launch).toHaveBeenCalledTimes(1);
+ expect(lib.connect).not.toHaveBeenCalled();
+ expect(lib.connectOverCDP).not.toHaveBeenCalled();
+ });
+});
+
+describe('Remote connection — PuppeteerPlugin', () => {
+ it('connects via connect() with the resolved endpoint and skips a local launch', async () => {
+ const lib = createMockPuppeteerLibrary();
+ const plugin = new PuppeteerPlugin(lib as any);
+ const connection = createConnection('ws://remote:9222');
+ plugin.useRemoteConnection(connection, { connectOptions: { protocolTimeout: 1000 } });
+
+ const ctx = plugin.createLaunchContext();
+ await plugin.launch(ctx);
+
+ expect(connection.resolve).toHaveBeenCalledTimes(1);
+ expect(lib.connect).toHaveBeenCalledWith({ protocolTimeout: 1000, browserWSEndpoint: 'ws://remote:9222' });
+ expect(lib.launch).not.toHaveBeenCalled();
+ expect(ctx._remoteToken).toBe(42);
+ });
+
+ it('releases the session and throws BrowserLaunchError when connect fails', async () => {
+ const lib = createMockPuppeteerLibrary();
+ lib.connect.mockRejectedValueOnce(new Error('ECONNREFUSED'));
+ const plugin = new PuppeteerPlugin(lib as any);
+ const connection = createConnection();
+ plugin.useRemoteConnection(connection);
+
+ await expect(plugin.launch(plugin.createLaunchContext())).rejects.toThrow(/Failed to connect to remote browser/);
+ expect(connection.release).toHaveBeenCalledWith(42);
+ });
+
+ it('marks the launch context remote', () => {
+ const plugin = new PuppeteerPlugin(createMockPuppeteerLibrary() as any);
+ plugin.useRemoteConnection(createConnection());
+
+ expect(plugin.createLaunchContext().isRemote).toBe(true);
+ });
+});
diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml
index d687fbfdce24..f21d9ea663dc 100644
--- a/pnpm-workspace.yaml
+++ b/pnpm-workspace.yaml
@@ -37,7 +37,7 @@ allowBuilds:
better-sqlite3: true
bufferutil: true
core-js: true
- core-js-pure: set this to true or false
+ core-js-pure: false
esbuild: true
nx: true
protobufjs: true
diff --git a/test/core/browser_launchers/playwright_launcher.test.ts b/test/core/browser_launchers/playwright_launcher.test.ts
index e2008b3458e0..ba29d0408c28 100644
--- a/test/core/browser_launchers/playwright_launcher.test.ts
+++ b/test/core/browser_launchers/playwright_launcher.test.ts
@@ -288,4 +288,5 @@ describe('launchPlaywright()', () => {
recursive: true,
});
});
+
});
diff --git a/test/core/browser_launchers/puppeteer_launcher.test.ts b/test/core/browser_launchers/puppeteer_launcher.test.ts
index 0963cd62de6b..4b36679a0ffe 100644
--- a/test/core/browser_launchers/puppeteer_launcher.test.ts
+++ b/test/core/browser_launchers/puppeteer_launcher.test.ts
@@ -308,4 +308,5 @@ describe('launchPuppeteer()', () => {
recursive: true,
});
});
+
});
diff --git a/test/core/crawlers/browser_crawler.test.ts b/test/core/crawlers/browser_crawler.test.ts
index 388b45781111..c0ca30eeba07 100644
--- a/test/core/crawlers/browser_crawler.test.ts
+++ b/test/core/crawlers/browser_crawler.test.ts
@@ -6,6 +6,7 @@ import {
BrowserPool as BrowserPoolClass,
OperatingSystemsName,
PuppeteerPlugin,
+ RemoteBrowserPool,
} from '@crawlee/browser-pool';
import { bindMethodsToServiceLocator, BLOCKED_STATUS_CODES, ServiceLocator, SessionPool } from '@crawlee/core';
import type { PuppeteerGoToOptions } from '@crawlee/puppeteer';
@@ -176,6 +177,46 @@ describe('BrowserCrawler', () => {
}
});
+ test.concurrent('builds and owns a RemoteBrowserPool from the remoteBrowser option', async () => {
+ const localStorageEmulator = new MemoryStorageEmulator();
+ await localStorageEmulator.init();
+
+ try {
+ const crawler = new BrowserCrawlerTest({
+ remoteBrowser: { endpoint: 'ws://remote:9222', maxOpenBrowsers: 2 },
+ browserPoolOptions: { browserPlugins: [new PuppeteerPlugin(puppeteer)] },
+ requestHandler: async () => {},
+ });
+
+ expect(crawler.browserPool).toBeInstanceOf(RemoteBrowserPool);
+ expect((crawler.browserPool as RemoteBrowserPool).maxOpenBrowsers).toBe(2);
+
+ await (crawler.browserPool as RemoteBrowserPool).destroy();
+ } finally {
+ await localStorageEmulator.destroy();
+ }
+ });
+
+ test.concurrent('throws when both browserPool and remoteBrowser are set', async () => {
+ const localStorageEmulator = new MemoryStorageEmulator();
+ await localStorageEmulator.init();
+ const externalPool = new BrowserPoolClass({ browserPlugins: [new PuppeteerPlugin(puppeteer)] });
+
+ try {
+ expect(
+ () =>
+ new BrowserCrawlerTest({
+ browserPool: externalPool,
+ remoteBrowser: { endpoint: 'ws://remote:9222' },
+ requestHandler: async () => {},
+ }),
+ ).toThrow(/at most one of 'browserPool' and 'remoteBrowser'/);
+ } finally {
+ await externalPool.destroy();
+ await localStorageEmulator.destroy();
+ }
+ });
+
test.concurrent('should retire session after TimeoutError', async () => {
const localStorageEmulator = new MemoryStorageEmulator();
await localStorageEmulator.init();
diff --git a/test/integration/helpers.ts b/test/integration/helpers.ts
new file mode 100644
index 000000000000..c680a62c5a75
--- /dev/null
+++ b/test/integration/helpers.ts
@@ -0,0 +1,28 @@
+/**
+ * Helpers for remote-browser integration tests.
+ *
+ * These tests require a running Browserless instance and a deterministic HTTP
+ * target (httpbin). In CI both are provided as GitHub Actions service
+ * containers on a shared network. Locally, start them via
+ * `pnpm test:integration:services:up`.
+ *
+ * Network model: HTTPBIN_URL is consumed by the REMOTE browser (not the test
+ * runner). The browser lives in the Browserless container, so the URL must
+ * resolve inside that container's Docker network — typically `http://httpbin`
+ * via service name/alias.
+ *
+ * Env vars:
+ * BROWSERLESS_URL default: http://localhost:3000 (host-side; how the test
+ * runner reaches CDP)
+ * HTTPBIN_URL default: http://httpbin (browser-side; how the
+ * remote browser reaches
+ * httpbin via Docker DNS)
+ */
+
+export const BROWSERLESS_URL = process.env.BROWSERLESS_URL ?? 'http://localhost:3000';
+export const HTTPBIN_URL = process.env.HTTPBIN_URL ?? 'http://httpbin';
+
+/** Build a URL on the httpbin service from a path (e.g. '/cookies'). */
+export function httpbin(path: string): string {
+ return `${HTTPBIN_URL}${path.startsWith('/') ? path : `/${path}`}`;
+}
diff --git a/test/integration/remote-browser-incognito.test.ts b/test/integration/remote-browser-incognito.test.ts
new file mode 100644
index 000000000000..4c176870df99
--- /dev/null
+++ b/test/integration/remote-browser-incognito.test.ts
@@ -0,0 +1,64 @@
+/**
+ * Integration test: PlaywrightCrawler against a remote Browserless CDP endpoint
+ * forces useIncognitoPages: true, so two pages on the same remote browser do
+ * NOT share cookies.
+ *
+ * Mirrors temp-examples/examples/cookie-sharing-pages-same-remote-browser.ts:
+ * - retireBrowserAfterPageCount: 10 → both requests stay on the same browser
+ * - saveResponseCookies: false → Session cannot carry cookies across requests
+ * - Request 1 → /cookies/set?TOKEN=… (httpbin Set-Cookie)
+ * - Request 2 → /cookies (httpbin echoes received cookies in body)
+ *
+ * With the wrapper removed, request 2's body should report no cookies.
+ */
+import { PlaywrightCrawler } from 'crawlee';
+import { expect, test } from 'vitest';
+
+import { BROWSERLESS_URL, httpbin } from './helpers.js';
+
+// Gate on CRAWLEE_DIFFICULT_TESTS so plain `pnpm test` skips integration tests
+// (no Docker required); `pnpm test:integration` and `pnpm test:full` set the flag.
+test.skipIf(!process.env.CRAWLEE_DIFFICULT_TESTS)(
+ 'remote Playwright CDP: pages on the same browser do not share cookies',
+ async () => {
+ const observations: { controllerId: string; body: { cookies: Record } }[] = [];
+ const controllerIdByPage = new WeakMap