Automattic · WRasada · Jun 4, 2026
diff --git a/__fixtures__/dev-env-e2e/mydumper-detection.expected.sql b/__fixtures__/dev-env-e2e/mydumper-detection.expected.sql
@@ -1,5 +1,5 @@
 
--- metadata.header -1
+-- metadata.header 00000000000000000198
 # Started dump at: 2024-07-26 03:00:36
 [config]
 quote_character = BACKTICK
@@ -8,7 +8,7 @@ quote_character = BACKTICK
 SQL_MODE='NO_AUTO_VALUE_ON_ZERO,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION' /*!40101
 
 
--- some_db-schema-create.sql -1
+-- some_db-schema-create.sql 00000000000000000358
 /*!40101 SET NAMES utf8mb4*/;
 /*!40014 SET FOREIGN_KEY_CHECKS=0*/;
 /*!40101 SET SQL_MODE='NO_AUTO_VALUE_ON_ZERO,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION'*/;

diff --git a/__tests__/lib/database.js b/__tests__/lib/database.js
@@ -0,0 +1,155 @@
+import fs from 'fs';
+import { Readable } from 'node:stream';
+import { pipeline } from 'node:stream/promises';
+import os from 'os';
+import path from 'path';
+
+import { MyDumperSectionSizeTransform, patchMyDumperSectionSizes } from '../../src/lib/database';
+
+/**
+ * Builds a mydumper-style stream: each section is a "-- <name> <size>" header line
+ * followed by its content (ending with its own newline) and a separator newline
+ * before the next header. The final section runs to end of stream.
+ */
+const buildStream = sections =>
+	sections
+		.map( ( { name, declaredSize, content }, index ) => {
+			const separator = index === sections.length - 1 ? '' : '\n';
+			return `-- ${ name } ${ declaredSize }\n${ content }${ separator }`;
+		} )
+		.join( '' );
+
+const runTransform = async ( input, { chunkSize = 8 } = {} ) => {
+	const chunks = [];
+	for ( let offset = 0; offset < input.length; offset += chunkSize ) {
+		chunks.push( Buffer.from( input.slice( offset, offset + chunkSize ), 'latin1' ) );
+	}
+
+	const transform = new MyDumperSectionSizeTransform();
+	const outputFile = path.join(
+		fs.mkdtempSync( path.join( os.tmpdir(), 'mydumper-transform-test-' ) ),
+		'out.sql'
+	);
+	await pipeline( Readable.from( chunks ), transform, fs.createWriteStream( outputFile ) );
+	await patchMyDumperSectionSizes( outputFile, transform );
+
+	return { transform, outputFile, output: fs.readFileSync( outputFile, 'latin1' ) };
+};
+
+const parseHeaders = output => {
+	const headers = [];
+	const regex = /^-- ([^ ]+) (\d+)$/gm;
+	let match;
+	while ( ( match = regex.exec( output ) ) !== null ) {
+		headers.push( {
+			name: match[ 1 ],
+			size: parseInt( match[ 2 ], 10 ),
+			start: match.index,
+			end: match.index + match[ 0 ].length,
+		} );
+	}
+	return headers;
+};
+
+describe( 'lib/database', () => {
+	describe( 'MyDumperSectionSizeTransform', () => {
+		it( 'recomputes section sizes from actual content (stale sizes after search-replace)', async () => {
+			// Declared sizes are stale on purpose: content was "search-replaced".
+			const metadata = '# Started dump\n[config]\nquote-character = BACKTICK\n';
+			const schema = 'CREATE TABLE `wp_options` (`id` bigint);\n';
+			const data = "INSERT INTO `wp_options` VALUES (1,'new.domain');\n";
+			const input = buildStream( [
+				{ name: 'metadata.header', declaredSize: 9999, content: metadata },
+				{ name: 'db.wp_options-schema.sql', declaredSize: 1, content: schema },
+				{ name: 'db.wp_options.00000.sql', declaredSize: 12345, content: data },
+			] );
+
+			const { output } = await runTransform( input );
+			const headers = parseHeaders( output );
+
+			expect( headers ).toHaveLength( 3 );
+			expect( headers[ 0 ].size ).toBe( metadata.length );
+			expect( headers[ 1 ].size ).toBe( schema.length );
+			expect( headers[ 2 ].size ).toBe( data.length );
+		} );
+
+		it( 'preserves content bytes exactly and keeps the size convention parseable', async () => {
+			const content1 = 'line one\nline two\n';
+			const content2 = "INSERT INTO `t` VALUES ('x');\n";
+			const input = buildStream( [
+				{ name: 'metadata.header', declaredSize: 0, content: content1 },
+				{ name: 'db.t.00000.sql', declaredSize: 0, content: content2 },
+			] );
+
+			const { output } = await runTransform( input );
+			const headers = parseHeaders( output );
+
+			// Re-derive each section's content from the declared size and compare:
+			// size counts content bytes; one separator newline precedes the next header.
+			const section1 = output.slice(
+				headers[ 0 ].end + 1,
+				headers[ 0 ].end + 1 + headers[ 0 ].size
+			);
+			expect( section1 ).toBe( content1 );
+			expect( output[ headers[ 0 ].end + 1 + headers[ 0 ].size ] ).toBe( '\n' );
+
+			const section2 = output.slice(
+				headers[ 1 ].end + 1,
+				headers[ 1 ].end + 1 + headers[ 1 ].size
+			);
+			expect( section2 ).toBe( content2 );
+		} );
+
+		it.each( [ 1, 3, 7, 64 ] )(
+			'is chunk-boundary safe (chunk size %i splits headers mid-line)',
+			async chunkSize => {
+				const content = 'some content here\n';
+				const input = buildStream( [
+					{ name: 'metadata.header', declaredSize: 5, content },
+					{ name: 'db.table-schema.sql', declaredSize: 5, content },
+				] );
+
+				const { output } = await runTransform( input, { chunkSize } );
+				const headers = parseHeaders( output );
+
+				expect( headers ).toHaveLength( 2 );
+				expect( headers[ 0 ].size ).toBe( content.length );
+				expect( headers[ 1 ].size ).toBe( content.length );
+			}
+		);
+
+		it( 'documents the known limitation: content lines shaped "-- <token> <digits>" are treated as headers', async () => {
+			// mydumper never emits such content lines (string newlines are escaped, so content
+			// cannot begin a line with "-- "), but a hand-edited dump could contain one. This
+			// test documents the behavior so a future change here is deliberate, not accidental.
+			const content = 'real content\n-- handwritten_note 42\nmore content\n';
+			const input = buildStream( [ { name: 'metadata.header', declaredSize: 1, content } ] );
+
+			const { transform } = await runTransform( input );
+
+			// The phantom line is counted as a section boundary.
+			expect( transform.fixups ).toHaveLength( 2 );
+		} );
+
+		it( 'does not treat header-looking content lines without a numeric size as headers', async () => {
+			const content = '-- this is just a comment\n-- not a header either\n';
+			const input = buildStream( [ { name: 'metadata.header', declaredSize: 1, content } ] );
+
+			const { output, transform } = await runTransform( input );
+
+			expect( transform.fixups ).toHaveLength( 1 );
+			expect( parseHeaders( output ) ).toHaveLength( 1 );
+			expect( output ).toContain( content );
+		} );
+
+		it( 'handles a final section without a trailing newline', async () => {
+			const content = 'no trailing newline';
+			const input = `-- metadata.header 5\n${ content }`;
+
+			const { output, transform } = await runTransform( input );
+
+			expect( transform.fixups[ 0 ].size ).toBe( content.length );
+			expect( parseHeaders( output )[ 0 ].size ).toBe( content.length );
+		} );
+	} );
+} );
diff --git a/__tests__/lib/search-and-replace.js b/__tests__/lib/search-and-replace.js
@@ -43,6 +43,10 @@ describe( 'lib/search-and-replace', () => {
 			new Error( 'No search and replace parameters provided.' )
 		);
 	} );
+	it( 'should throw for compressed input files', async () => {
+		const promise = searchAndReplace( '/tmp/some-dump.sql.GZ', 'a,b', {}, binary );
+		await expect( promise ).rejects.toThrow( 'Compressed files are not supported' );
+	} );
 	it( 'will accept and use a string of replacement pairs (when one replacement provided)', async () => {
 		// Mock the confirmation prompt so it doesn't actually prompt, and manipulate the resolved value
 		const promptMock = await jest.spyOn( prompt, 'confirm' ).mockResolvedValue( true );
@@ -86,6 +90,66 @@ describe( 'lib/search-and-replace', () => {
 		fs.unlinkSync( outputFileName );
 	} );
 
+	it( 'recomputes mydumper section header sizes after replacement changes content length', async () => {
+		// Minimal mydumper-format stream dump. Sizes follow the mydumper convention:
+		// content bytes including the content's own trailing newline, with a single
+		// separator newline before the next header; final section runs to EOF.
+		const metadata = '# Started dump\n[config]\nquote-character = BACKTICK\n';
+		const schemaCreate = 'CREATE DATABASE `testdb`;\n';
+		const data = "INSERT INTO `wp_options` VALUES ('ohai world, ohai');\n";
+		const myDumperFile = path.join(
+			fs.mkdtempSync( path.join( require( 'os' ).tmpdir(), 'mydumper-sr-test-' ) ),
+			'dump.sql'
+		);
+		fs.writeFileSync(
+			myDumperFile,
+			`-- metadata.header ${ metadata.length }\n${ metadata }\n` +
+				`-- testdb-schema-create.sql ${ schemaCreate.length }\n${ schemaCreate }\n` +
+				`-- testdb.wp_options.00000.sql ${ data.length }\n${ data }`
+		);
+
+		const { outputFileName } = await searchAndReplace(
+			myDumperFile,
+			'ohai,ohHeyLongerValue',
+			{ output: true },
+			binary
+		);
+
+		const result = fs.readFileSync( outputFileName, { encoding: 'utf-8' } );
+
+		// Replacement happened
+		expect( result ).toContain( 'ohHeyLongerValue' );
+		expect( result ).not.toContain( 'ohai' );
+
+		// Every header's declared size must match the actual content that follows it
+		const headerRegex = /^-- ([^ ]+) (\d+)$/gm;
+		const headers = [];
+		let match;
+		while ( ( match = headerRegex.exec( result ) ) !== null ) {
+			headers.push( {
+				size: parseInt( match[ 2 ], 10 ),
+				contentStart: match.index + match[ 0 ].length + 1,
+			} );
+		}
+		expect( headers ).toHaveLength( 3 );
+
+		headers.forEach( ( { size, contentStart } ) => {
+			expect( result.slice( contentStart, contentStart + size ).length ).toBe( size );
+		} );
+
+		// a single separator newline must follow each section's content before the next header
+		headers.slice( 0, -1 ).forEach( ( { size, contentStart } ) => {
+			expect( result[ contentStart + size ] ).toBe( '\n' );
+			expect( result.slice( contentStart + size + 1, contentStart + size + 4 ) ).toBe( '-- ' );
+		} );
+
+		// final section runs exactly to end of stream
+		const lastHeader = headers[ headers.length - 1 ];
+		expect( lastHeader.contentStart + lastHeader.size ).toBe( result.length );
+
+		fs.unlinkSync( outputFileName );
+	} );
+
 	it( 'will remove whitespace from the beginning and end of pairs', async () => {
 		jest.spyOn( searchReplaceLib, 'replace' );
 		const replaceSpy = searchReplaceLib.replace;

diff --git a/src/commands/dev-env-sync-sql.ts b/src/commands/dev-env-sync-sql.ts
@@ -17,7 +17,12 @@ import API from '../lib/api';
 import { BackupStorageAvailability } from '../lib/backup-storage-availability/backup-storage-availability';
 import * as exit from '../lib/cli/exit';
 import { unzipFile } from '../lib/client-file-uploader';
-import { fixMyDumperTransform, getSqlDumpDetails, SqlDumpType } from '../lib/database';
+import {
+	MyDumperSectionSizeTransform,
+	getSqlDumpDetails,
+	patchMyDumperSectionSizes,
+	SqlDumpType,
+} from '../lib/database';
 import { LiveBackupCopyCLIOptions } from '../lib/live-backup-copy';
 import { makeTempDir } from '../lib/utils';
 import { getReadInterface } from '../lib/validations/line-by-line';
@@ -209,12 +214,20 @@ export class DevEnvSyncSQLCommand {
 
 		const outputFile = `${ this.tmpDir }/sql-export-sr.sql`;
 		const transforms: NodeJS.ReadWriteStream[] = [];
+		let myDumperTransform: MyDumperSectionSizeTransform | undefined;
 		if ( this.getSqlDumpType() === SqlDumpType.MYDUMPER ) {
-			transforms.push( fixMyDumperTransform() );
+			myDumperTransform = new MyDumperSectionSizeTransform();
+			transforms.push( myDumperTransform );
 		}
 
 		await pipeline( replacedStream, ...transforms, fs.createWriteStream( outputFile ) );
 
+		if ( myDumperTransform ) {
+			// Replace the size placeholders in the section headers with the recomputed
+			// sizes; myloader needs them to parse the stream correctly.
+			await patchMyDumperSectionSizes( outputFile, myDumperTransform );
+		}
+
 		fs.renameSync( outputFile, this.sqlFile );
 	}