diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/helpers/is-inline-node.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/helpers/is-inline-node.js index 32f754de9b..c674101d3c 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/helpers/is-inline-node.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/helpers/is-inline-node.js @@ -24,6 +24,7 @@ const INLINE_FALLBACK_TYPES = new Set([ 'endnoteReference', 'fieldAnnotation', 'structuredContent', + 'image', 'mathInline', 'passthroughInline', 'page-number', diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/helpers/is-inline-node.test.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/helpers/is-inline-node.test.js index 04bb399e29..a2e058ef4a 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/helpers/is-inline-node.test.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/helpers/is-inline-node.test.js @@ -8,6 +8,7 @@ describe('isInlineNode', () => { expect(isInlineNode({ type: 'bookmarkStart', attrs: { id: '1' } })).toBe(true); expect(isInlineNode({ type: 'bookmarkEnd', attrs: { id: '1' } })).toBe(true); expect(isInlineNode({ type: 'tab' })).toBe(true); + expect(isInlineNode({ type: 'image', attrs: { src: 'media/image1.png' } })).toBe(true); expect(isInlineNode({ type: 'footnoteReference', attrs: { id: '1' } })).toBe(true); }); diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/sdt/helpers/handle-structured-content-node.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/sdt/helpers/handle-structured-content-node.js index ee57751e11..4b34895035 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/sdt/helpers/handle-structured-content-node.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/sdt/helpers/handle-structured-content-node.js @@ -1,6 +1,56 @@ import { parseAnnotationMarks } from './handle-annotation-node'; import { parseStrictStOnOff } from '../../../utils.js'; import { BLOCK_FIELD_XML_NAMES } from '../../../sd/shared/block-field-xml-names.js'; +import { isInlineNode } from '../../../helpers/is-inline-node.js'; + +const INLINE_CONTEXT_XML_NAMES = new Set(['w:p', 'w:r', 'w:hyperlink', 'w:smartTag']); + +function hasDirectBlockSignal(sdtContent) { + return Boolean( + sdtContent?.elements?.some( + (el) => el?.name === 'w:p' || el?.name === 'w:tbl' || BLOCK_FIELD_XML_NAMES.has(el?.name), + ), + ); +} + +function canEmitInlineStructuredContent(path = []) { + return path.some((entry) => INLINE_CONTEXT_XML_NAMES.has(entry?.name) || entry?.name === 'w:sdtContent'); +} + +function hasTranslatedBlockContent(content = [], schema) { + return content.some((node) => node?.type && !isInlineNode(node, schema)); +} + +function wrapInlineRunsAsParagraphs(content = [], schema) { + const normalized = []; + let pendingInline = []; + + const flushInline = () => { + if (!pendingInline.length) return; + normalized.push({ + type: 'paragraph', + attrs: null, + content: pendingInline, + marks: [], + }); + pendingInline = []; + }; + + for (const node of content) { + if (!node) continue; + + if (isInlineNode(node, schema)) { + pendingInline.push(node); + continue; + } + + flushInline(); + normalized.push(node); + } + + flushInline(); + return normalized; +} /** * Detect the semantic control type from sdtPr child elements. @@ -113,12 +163,6 @@ export function handleStructuredContentNode(params) { return null; } - const paragraph = sdtContent.elements?.find((el) => el.name === 'w:p'); - const table = sdtContent.elements?.find((el) => el.name === 'w:tbl'); - // SD-3005: a content control wrapping a block field (e.g. BIBLIOGRAPHY) has - // no direct w:p after preprocessing — its child is an sd:* block node. It is - // block content and must not be emitted as an inline structuredContent. - const blockField = sdtContent.elements?.find((el) => BLOCK_FIELD_XML_NAMES.has(el?.name)); const { marks } = parseAnnotationMarks(sdtContent); const translatedContent = nodeListHandler.handler({ ...params, @@ -126,12 +170,18 @@ export function handleStructuredContentNode(params) { path: [...(params.path || []), sdtContent], }); - const isBlockNode = paragraph || table || blockField; + const schema = params.editor?.schema; + const content = Array.isArray(translatedContent) ? translatedContent : []; + const isBlockNode = + hasTranslatedBlockContent(content, schema) || + hasDirectBlockSignal(sdtContent) || + !canEmitInlineStructuredContent(params.path); const sdtContentType = isBlockNode ? 'structuredContentBlock' : 'structuredContent'; + const normalizedContent = isBlockNode ? wrapInlineRunsAsParagraphs(content, schema) : content; let result = { type: sdtContentType, - content: translatedContent, + content: normalizedContent, marks, attrs: { id: id?.attributes?.['w:val'] || null, diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/sdt/helpers/handle-structured-content-node.test.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/sdt/helpers/handle-structured-content-node.test.js index 92e14f098d..68ea753d9b 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/sdt/helpers/handle-structured-content-node.test.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/sdt/helpers/handle-structured-content-node.test.js @@ -1,6 +1,8 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { handleStructuredContentNode } from './handle-structured-content-node'; import { parseAnnotationMarks } from './handle-annotation-node'; +import { defaultNodeListHandler } from '../../../../../v2/importer/docxImporter.js'; +import { initTestEditor } from '@tests/helpers/helpers.js'; // Mock dependencies vi.mock('./handle-annotation-node', () => ({ @@ -31,6 +33,10 @@ describe('handleStructuredContentNode', () => { parseAnnotationMarks.mockReturnValue({ marks: [] }); }); + afterEach(() => { + vi.restoreAllMocks(); + }); + it('returns null when nodes array is empty', () => { const params = { nodes: [], nodeListHandler: mockNodeListHandler }; const result = handleStructuredContentNode(params); @@ -79,7 +85,7 @@ describe('handleStructuredContentNode', () => { const params = { nodes: [node], nodeListHandler: mockNodeListHandler, - path: [], + path: [{ name: 'w:p' }], }; parseAnnotationMarks.mockReturnValue({ marks: [{ type: 'bold' }] }); @@ -363,3 +369,177 @@ describe('handleStructuredContentNode', () => { }); }); }); + +describe('handleStructuredContentNode nested SDT import regression', () => { + let editor; + + const textRun = (text) => ({ + name: 'w:r', + elements: [{ name: 'w:t', elements: [{ type: 'text', text }] }], + }); + + const paragraph = (text) => ({ + name: 'w:p', + elements: [textRun(text)], + }); + + const sdtPr = ({ id, tag, alias, lockMode = 'unlocked', controlType = 'w:richText' }) => ({ + name: 'w:sdtPr', + elements: [ + { name: 'w:id', attributes: { 'w:val': id } }, + { name: 'w:tag', attributes: { 'w:val': tag } }, + { name: 'w:alias', attributes: { 'w:val': alias } }, + { name: 'w:lock', attributes: { 'w:val': lockMode } }, + { name: controlType }, + ], + }); + + const sdt = (props, contentElements) => ({ + name: 'w:sdt', + elements: [sdtPr(props), { name: 'w:sdtContent', elements: contentElements }], + }); + + const table = (text) => ({ + name: 'w:tbl', + elements: [ + { + name: 'w:tblPr', + elements: [{ name: 'w:tblW', attributes: { 'w:w': '2400', 'w:type': 'dxa' } }], + }, + { + name: 'w:tblGrid', + elements: [{ name: 'w:gridCol', attributes: { 'w:w': '2400' } }], + }, + { + name: 'w:tr', + elements: [ + { + name: 'w:tc', + elements: [ + { + name: 'w:tcPr', + elements: [{ name: 'w:tcW', attributes: { 'w:w': '2400', 'w:type': 'dxa' } }], + }, + paragraph(text), + ], + }, + ], + }, + ], + }); + + const importNodes = (nodes) => { + const nodeListHandler = defaultNodeListHandler(); + return nodeListHandler.handler({ + nodes, + nodeListHandler, + docx: {}, + editor, + path: [], + }); + }; + + const expectSchemaValid = (content) => { + let pmDoc; + expect(() => { + pmDoc = editor.schema.nodeFromJSON({ type: 'doc', content }); + pmDoc.check(); + }).not.toThrow(); + return pmDoc; + }; + + const findFirstJson = (node, predicate) => { + if (!node) return null; + if (predicate(node)) return node; + for (const child of node.content || []) { + const found = findFirstJson(child, predicate); + if (found) return found; + } + return null; + }; + + beforeEach(() => { + ({ editor } = initTestEditor({ + isHeadless: true, + loadFromSchema: true, + content: { type: 'doc', content: [{ type: 'paragraph' }] }, + })); + parseAnnotationMarks.mockReturnValue({ marks: [] }); + }); + + afterEach(() => { + editor?.destroy(); + editor = null; + vi.restoreAllMocks(); + }); + + it('imports nested block SDT when outer sdtContent directly contains w:sdt wrapping a paragraph', () => { + const inner = sdt({ id: 'inner-block', tag: 'inner-tag', alias: 'Inner Alias', lockMode: 'contentLocked' }, [ + paragraph('Nested paragraph'), + ]); + const outer = sdt({ id: 'outer-block', tag: 'outer-tag', alias: 'Outer Alias', lockMode: 'sdtLocked' }, [inner]); + + const result = importNodes([outer]); + + expect(result).toHaveLength(1); + expect(result[0].type).toBe('structuredContentBlock'); + expect(result[0].attrs).toMatchObject({ + id: 'outer-block', + tag: 'outer-tag', + alias: 'Outer Alias', + lockMode: 'sdtLocked', + controlType: 'richText', + }); + + const nested = result[0].content?.[0]; + expect(nested?.type).toBe('structuredContentBlock'); + expect(nested.attrs).toMatchObject({ + id: 'inner-block', + tag: 'inner-tag', + alias: 'Inner Alias', + lockMode: 'contentLocked', + controlType: 'richText', + }); + expect(nested.attrs.sdtPr?.elements?.find((el) => el.name === 'w:alias')?.attributes?.['w:val']).toBe( + 'Inner Alias', + ); + + expectSchemaValid(result); + }); + + it('wraps nested inline SDT safely when an outer block SDT also contains paragraph and table content', () => { + const inlineNested = sdt( + { id: 'inner-inline', tag: 'inline-tag', alias: 'Inline Alias', lockMode: 'sdtContentLocked' }, + [textRun('Inline value')], + ); + const outer = sdt({ id: 'outer-mixed', tag: 'outer-mixed-tag', alias: 'Outer Mixed', lockMode: 'sdtLocked' }, [ + inlineNested, + paragraph('Outer paragraph'), + table('Cell text'), + ]); + + const result = importNodes([outer]); + + expect(result).toHaveLength(1); + expect(result[0].type).toBe('structuredContentBlock'); + expect(result[0].content?.map((node) => node.type)).toEqual(['paragraph', 'paragraph', 'table']); + + const nested = findFirstJson( + result[0], + (node) => node.type === 'structuredContent' && node.attrs?.id === 'inner-inline', + ); + expect(nested).toBeTruthy(); + expect(nested.attrs).toMatchObject({ + id: 'inner-inline', + tag: 'inline-tag', + alias: 'Inline Alias', + lockMode: 'sdtContentLocked', + controlType: 'richText', + }); + expect(nested.attrs.sdtPr?.elements?.find((el) => el.name === 'w:lock')?.attributes?.['w:val']).toBe( + 'sdtContentLocked', + ); + + expectSchemaValid(result); + }); +}); diff --git a/packages/super-editor/src/editors/v1/tests/data/sdt-fixtures.README.md b/packages/super-editor/src/editors/v1/tests/data/sdt-fixtures.README.md new file mode 100644 index 0000000000..2892f6e49c --- /dev/null +++ b/packages/super-editor/src/editors/v1/tests/data/sdt-fixtures.README.md @@ -0,0 +1,48 @@ +# SDT classification fixtures (PR #3616) + +Real `.docx` fixtures that validate the nested content-control classifier in +`super-converter/v3/handlers/w/sdt/`. Exercised by +`tests/editor/sdt-nested-classification.test.js`. + +The claim under test: block vs run/inline SDT classification is driven by the +translated ProseMirror content shape plus import context, not only by the direct +XML child names of `w:sdtContent`. + +## Provenance and conformance + +Each fixture's surrounding package (content types, rels, styles, theme, fonts, and +image media) is taken verbatim from a Word-authored base already in this folder. +Only `word/document.xml` is hand-authored to encode the exact OOXML shape, so the +package stays valid while the structure is precise. All fixtures are therefore +**schema-only** (hand-authored structure, not produced or validated by Word). + +The `conformance` column distinguishes shapes that are valid ECMA-376 from one that +is deliberately malformed to exercise the PR's defensive normalization: + +| Fixture | Conformance | Base package | Shape under `w:body` | +|---|---|---|---| +| `sdt-nested-block.docx` | conformant | `blank-doc.docx` | block `w:sdt` whose `w:sdtContent` directly contains a nested block `w:sdt` (no direct `w:p`) wrapping a paragraph. Legal: `EG_ContentBlockContent` permits `sdt`. | +| `sdt-nested-inline.docx` | conformant | `blank-doc.docx` | `w:p` containing an inline `w:sdt` that contains a nested inline `w:sdt` of runs, between two text runs. Legal: `CT_SdtContentRun` is `EG_PContent`. | +| `sdt-mixed-block.docx` | **defensive (malformed)** | `blank-doc.docx` | block `w:sdt` whose `w:sdtContent` holds a bare inline `w:sdt`, a `w:p`, and a `w:tbl`. The bare inline `w:sdt` is **non-conformant**: a `w:sdt` directly under block content is positionally `CT_SdtBlock`, whose content may not be a bare `w:r` (`EG_ContentBlockContent` allows only `customXml/sdt/p/tbl/EG_RunLevelElts`, and `EG_RunLevelElts` excludes `w:r`). Included on purpose to drive `wrapInlineRunsAsParagraphs`, which the PR uses to normalize bare inline content inside a block SDT. | +| `sdt-inline-picture.docx` | conformant | `anchor_images.docx` (reuses `media/image1.png`, `rId4`) | `w:p` > inline `w:sdt` with `` marker > `w:sdtContent` > `w:r` > `w:drawing`. Legal per ECMA-376 §17.5.2.24 (picture content control wrapping a single DrawingML picture). | + +## Rebuild + +``` +node packages/super-editor/src/editors/v1/tests/data/sdt-fixtures.generate.cjs +``` + +The generator resolves all paths from its own location and reads the two base +packages (`blank-doc.docx`, `anchor_images.docx`) from this folder, so it is +portable. It re-reads each built file and asserts the intended shape. Set +`SDT_FIXTURE_OUT=/some/dir` to write to a scratch dir instead of overwriting the +committed fixtures (useful for a dry run). Regeneration is content-equivalent; +only zip metadata may differ. + +## Out of scope + +Row-level SDTs (`w:tbl > w:sdt > w:sdtContent > w:tr`, Google Docs `goog_rdk_*` +exports) are a separate table-walk concern tracked by SD-3118 / IT-1040. The real +Google Docs artifact attached to those tickets should be used as that fixture, and +its preservation checked through a Word round-trip before choosing transparent +unwrap vs. `rowSdt` metadata. Not covered by these fixtures. diff --git a/packages/super-editor/src/editors/v1/tests/data/sdt-fixtures.generate.cjs b/packages/super-editor/src/editors/v1/tests/data/sdt-fixtures.generate.cjs new file mode 100644 index 0000000000..c6659b4388 --- /dev/null +++ b/packages/super-editor/src/editors/v1/tests/data/sdt-fixtures.generate.cjs @@ -0,0 +1,121 @@ +/* + * Regenerate the SDT classification fixtures for the nested content-control + * classifier (PR #3616). Exercised by tests/editor/sdt-nested-classification.test.js. + * Provenance and per-fixture conformance are documented in sdt-fixtures.README.md. + * + * Each fixture derives from a Word-authored base in this folder (blank-doc.docx / + * anchor_images.docx) and replaces only word/document.xml with a hand-authored body + * that encodes a precise SDT shape; every other package part is inherited from the + * base, so the package stays valid. After writing, each built file is re-read and its + * intended shape is asserted. + * + * Portable: all paths resolve from this file's location. No external state. + * node packages/super-editor/src/editors/v1/tests/data/sdt-fixtures.generate.cjs + * SDT_FIXTURE_OUT=/tmp/sdt-verify node .../sdt-fixtures.generate.cjs # dry run + */ +const fs = require('fs'); +const os = require('os'); +const path = require('path'); +const { execSync } = require('child_process'); + +const DATA = __dirname; +const OUT = process.env.SDT_FIXTURE_OUT || __dirname; +const STAGE = fs.mkdtempSync(path.join(os.tmpdir(), 'sdt-fixture-')); +fs.mkdirSync(path.join(STAGE, 'word'), { recursive: true }); +fs.mkdirSync(OUT, { recursive: true }); + +// Exact opening tag (full namespaces) copied verbatim from blank-doc.docx. +const HEADER = + '\n' + + ''; + +const SECTPR = + ''; + +// Clean inline drawing referencing anchor_images.docx's small image1.png (rId4). +const INLINE_DRAWING = + ''; + +const fixtures = [ + { + name: 'sdt-nested-block.docx', + base: 'blank-doc.docx', + // Conformant: outer block w:sdt whose sdtContent's only direct child is a nested + // block w:sdt (no direct w:p). The inner SDT wraps a paragraph. + body: + '' + + '' + + 'Nested block content' + + '' + + '', + mustContain: ['OuterBlock', 'InnerBlock', 'Nested block content', ''], + }, + { + name: 'sdt-nested-inline.docx', + base: 'blank-doc.docx', + // Conformant: inline w:sdt (with a nested inline w:sdt) inside a paragraph, between + // two text runs. Proves the path/context gate keeps valid inline SDTs inline. + body: + '' + + 'Before ' + + '' + + 'outer ' + + '' + + 'inner' + + '' + + '' + + ' after' + + '', + mustContain: ['OuterInline', 'InnerInline', 'Before ', 'inner', ' after', ''], + }, + { + name: 'sdt-mixed-block.docx', + base: 'blank-doc.docx', + // DEFENSIVE / MALFORMED: a block w:sdt whose sdtContent mixes a bare inline w:sdt, + // a w:p, and a w:tbl. The bare inline w:sdt is non-conformant in block content + // (EG_ContentBlockContent excludes bare w:r). Drives wrapInlineRunsAsParagraphs. + body: + '' + + '' + + 'inline sdt' + + '' + + 'A paragraph' + + '' + + 'Cell' + + '', + mustContain: ['MixedBlock', 'InlineInMixed', 'inline sdt', 'A paragraph', '', 'Cell'], + }, + { + name: 'sdt-inline-picture.docx', + base: 'anchor_images.docx', + // Conformant (ECMA-376 17.5.2.24): inline picture content control. + // marker; content is a run with an inline drawing referencing the base image (rId4). + body: + '' + + INLINE_DRAWING + + '', + mustContain: ['PictureControl', '', 'r:embed="rId4"', ''], + }, +]; + +let allPass = true; +for (const f of fixtures) { + const documentXml = HEADER + '' + f.body + SECTPR + ''; + fs.writeFileSync(path.join(STAGE, 'word', 'document.xml'), documentXml); + + const outPath = path.join(OUT, f.name); + fs.copyFileSync(path.join(DATA, f.base), outPath); + // Replace only word/document.xml; every other part comes from the Word-authored base. + execSync(`zip -X -q "${outPath}" word/document.xml`, { cwd: STAGE }); + + const rebuilt = execSync(`unzip -p "${outPath}" word/document.xml`, { encoding: 'utf8' }); + const missing = f.mustContain.filter((s) => !rebuilt.includes(s)); + const ok = missing.length === 0; + allPass = allPass && ok; + console.log(`${ok ? 'PASS' : 'FAIL'} ${f.name} (${(fs.statSync(outPath).size / 1024).toFixed(1)} KB, base ${f.base})`); + if (!ok) console.log(` missing shape markers: ${JSON.stringify(missing)}`); +} + +fs.rmSync(STAGE, { recursive: true, force: true }); +console.log(allPass ? `\nAll fixtures generated + shape-verified -> ${OUT}` : '\nSHAPE VERIFICATION FAILED.'); +process.exit(allPass ? 0 : 1); diff --git a/packages/super-editor/src/editors/v1/tests/data/sdt-inline-picture.docx b/packages/super-editor/src/editors/v1/tests/data/sdt-inline-picture.docx new file mode 100644 index 0000000000..4214939d18 Binary files /dev/null and b/packages/super-editor/src/editors/v1/tests/data/sdt-inline-picture.docx differ diff --git a/packages/super-editor/src/editors/v1/tests/data/sdt-mixed-block.docx b/packages/super-editor/src/editors/v1/tests/data/sdt-mixed-block.docx new file mode 100644 index 0000000000..b2da580a83 Binary files /dev/null and b/packages/super-editor/src/editors/v1/tests/data/sdt-mixed-block.docx differ diff --git a/packages/super-editor/src/editors/v1/tests/data/sdt-nested-block.docx b/packages/super-editor/src/editors/v1/tests/data/sdt-nested-block.docx new file mode 100644 index 0000000000..9ed9441c22 Binary files /dev/null and b/packages/super-editor/src/editors/v1/tests/data/sdt-nested-block.docx differ diff --git a/packages/super-editor/src/editors/v1/tests/data/sdt-nested-inline.docx b/packages/super-editor/src/editors/v1/tests/data/sdt-nested-inline.docx new file mode 100644 index 0000000000..670a6c0f52 Binary files /dev/null and b/packages/super-editor/src/editors/v1/tests/data/sdt-nested-inline.docx differ diff --git a/packages/super-editor/src/editors/v1/tests/editor/sdt-nested-classification.test.js b/packages/super-editor/src/editors/v1/tests/editor/sdt-nested-classification.test.js new file mode 100644 index 0000000000..b3d66d92f4 --- /dev/null +++ b/packages/super-editor/src/editors/v1/tests/editor/sdt-nested-classification.test.js @@ -0,0 +1,210 @@ +import { describe, it, expect } from 'vitest'; +import { loadTestDataForEditorTests, initTestEditor, getTestDataByFileName } from '@tests/helpers/helpers'; +import { getExportedResult } from '@tests/export/export-helpers/export-helpers'; + +/** + * Fixture-backed integration coverage for PR #3616 (nested content controls). + * + * The architectural claim under test: block/run SDT classification is driven by the + * TRANSLATED ProseMirror content shape plus import context (path), NOT only by the + * direct XML child names of w:sdtContent. Each fixture is a real .docx (see + * tests/data/sdt-*.docx, provenance in tests/data/sdt-fixtures.README.md) so the full + * import + export pipeline runs, not a mocked translator. + * + * For every fixture we assert three things, because "opens without crashing" would + * miss quiet data loss: + * 1. XML shape - the intended OOXML actually exists in word/document.xml. + * 2. Import - the imported PM node types are correct and no content is lost. + * 3. Round-trip - export still produces the expected w:sdt wrapper / w:sdtPr shape. + * + * Scope: this validates the block/run classifier only. Row-level SDTs + * (w:tbl > w:sdt > w:tr, tracked by SD-3118 / IT-1040) are a separate table-walk + * concern and are intentionally NOT covered here. + */ + +const NESTED_BLOCK = 'sdt-nested-block.docx'; +const NESTED_INLINE = 'sdt-nested-inline.docx'; +const MIXED_BLOCK = 'sdt-mixed-block.docx'; +const INLINE_PICTURE = 'sdt-inline-picture.docx'; + +// ---- ProseMirror JSON traversal helpers ---- +const pmAll = (node, acc = []) => { + if (!node || typeof node !== 'object') return acc; + if (node.type) acc.push(node); + (node.content || []).forEach((c) => pmAll(c, acc)); + return acc; +}; +const pmCollect = (node, type) => pmAll(node).filter((n) => n.type === type); +const pmFirst = (node, type) => pmCollect(node, type)[0] || null; +const pmByAlias = (node, alias) => pmAll(node).find((n) => n.attrs?.alias === alias) || null; +const pmText = (node) => { + if (!node || typeof node !== 'object') return ''; + if (node.type === 'text') return node.text || ''; + return (node.content || []).map(pmText).join(''); +}; +const pmChildTypes = (node) => (node?.content || []).map((c) => c.type); +const BLOCK_TYPES = ['paragraph', 'table', 'structuredContentBlock']; +// Schema invariant: an inline structuredContent must never directly contain a block node. +const assertNoBlockInsideInline = (doc) => { + pmCollect(doc, 'structuredContent').forEach((sc) => { + expect(pmChildTypes(sc).some((t) => BLOCK_TYPES.includes(t))).toBe(false); + }); +}; + +// ---- OOXML JSON traversal helpers (parseXmlToJson / exportSchemaToJson shape) ---- +const xmlAll = (node, acc = []) => { + if (!node || typeof node !== 'object') return acc; + if (node.name) acc.push(node); + (node.elements || []).forEach((c) => xmlAll(c, acc)); + return acc; +}; +const xmlCollect = (node, name) => xmlAll(node).filter((n) => n.name === name); +const xmlFirst = (node, name) => xmlCollect(node, name)[0] || null; +const xmlDirectChildren = (node, name) => (node?.elements || []).filter((e) => e.name === name); +const xmlText = (node) => { + if (!node || typeof node !== 'object') return ''; + if (node.type === 'text') return node.text || ''; + return (node.elements || []).map(xmlText).join(''); +}; +const sdtAlias = (sdt) => { + const pr = xmlFirst(sdt, 'w:sdtPr'); + return xmlDirectChildren(pr, 'w:alias')[0]?.attributes?.['w:val']; +}; +const xmlSdtByAlias = (root, alias) => xmlCollect(root, 'w:sdt').find((sdt) => sdtAlias(sdt) === alias) || null; +const xmlParaContainingSdt = (root) => xmlCollect(root, 'w:p').find((p) => xmlCollect(p, 'w:sdt').length > 0) || null; + +// ---- pipeline helpers ---- +const importDoc = async (name) => { + const { docx, media, mediaFiles, fonts } = await loadTestDataForEditorTests(name); + const { editor } = initTestEditor({ content: docx, media, mediaFiles, fonts }); + return editor.getJSON(); +}; +const documentXmlOf = async (name) => (await getTestDataByFileName(name))['word/document.xml']; + +describe('SDT classification (PR #3616) - nested block content controls', () => { + it('XML shape: outer w:sdtContent has a direct nested w:sdt and no direct w:p', async () => { + const docXml = await documentXmlOf(NESTED_BLOCK); + const outer = xmlSdtByAlias(docXml, 'OuterBlock'); + expect(outer).toBeTruthy(); + const outerContent = xmlFirst(outer, 'w:sdtContent'); + expect(xmlDirectChildren(outerContent, 'w:sdt').length).toBe(1); + expect(xmlDirectChildren(outerContent, 'w:p').length).toBe(0); // the case the old check missed + expect(xmlText(xmlSdtByAlias(docXml, 'InnerBlock'))).toContain('Nested block content'); + }); + + it('Import: both SDTs classify as block via translated content, content preserved', async () => { + const doc = await importDoc(NESTED_BLOCK); + const outer = pmByAlias(doc, 'OuterBlock'); + const inner = pmByAlias(doc, 'InnerBlock'); + expect(outer?.type).toBe('structuredContentBlock'); + expect(inner?.type).toBe('structuredContentBlock'); + expect(pmText(outer)).toContain('Nested block content'); + assertNoBlockInsideInline(doc); + }); + + it('Round-trip: nested w:sdt wrappers and sdtPr survive export', async () => { + const exported = await getExportedResult(NESTED_BLOCK); + const outer = xmlSdtByAlias(exported, 'OuterBlock'); + expect(outer).toBeTruthy(); + expect(xmlFirst(outer, 'w:sdtPr')).toBeTruthy(); + expect(xmlSdtByAlias(exported, 'InnerBlock')).toBeTruthy(); + expect(xmlText(exported)).toContain('Nested block content'); + }); +}); + +describe('SDT classification (PR #3616) - nested inline content controls', () => { + it('XML shape: nested inline w:sdt lives inside a w:p', async () => { + const docXml = await documentXmlOf(NESTED_INLINE); + expect(xmlParaContainingSdt(docXml)).toBeTruthy(); + expect(xmlSdtByAlias(docXml, 'OuterInline')).toBeTruthy(); + expect(xmlSdtByAlias(docXml, 'InnerInline')).toBeTruthy(); + }); + + it('Import: inline SDTs stay inline (path gate does not force block)', async () => { + const doc = await importDoc(NESTED_INLINE); + expect(pmByAlias(doc, 'OuterInline')?.type).toBe('structuredContent'); + expect(pmByAlias(doc, 'InnerInline')?.type).toBe('structuredContent'); + expect(pmCollect(doc, 'structuredContentBlock').length).toBe(0); // nothing promoted to block + const text = pmText(doc); + expect(text).toContain('Before'); + expect(text).toContain('outer'); + expect(text).toContain('inner'); + expect(text).toContain('after'); + assertNoBlockInsideInline(doc); + }); + + it('Round-trip: inline w:sdt stays inside the paragraph on export', async () => { + const exported = await getExportedResult(NESTED_INLINE); + expect(xmlParaContainingSdt(exported)).toBeTruthy(); + expect(xmlSdtByAlias(exported, 'OuterInline')).toBeTruthy(); + expect(xmlSdtByAlias(exported, 'InnerInline')).toBeTruthy(); + expect(xmlText(exported)).toContain('inner'); + }); +}); + +describe('SDT classification (PR #3616) - mixed block content control', () => { + it('XML shape: block w:sdtContent mixes a bare inline w:sdt, a w:p, and a w:tbl', async () => { + const docXml = await documentXmlOf(MIXED_BLOCK); + const outerContent = xmlFirst(xmlSdtByAlias(docXml, 'MixedBlock'), 'w:sdtContent'); + expect(xmlDirectChildren(outerContent, 'w:sdt').length).toBe(1); + expect(xmlDirectChildren(outerContent, 'w:p').length).toBe(1); + expect(xmlDirectChildren(outerContent, 'w:tbl').length).toBe(1); + }); + + it('Import: bare inline SDT is wrapped into a paragraph; paragraph and table preserved', async () => { + const doc = await importDoc(MIXED_BLOCK); + const outer = pmByAlias(doc, 'MixedBlock'); + expect(outer?.type).toBe('structuredContentBlock'); + + const childTypes = pmChildTypes(outer); + expect(childTypes).not.toContain('structuredContent'); // inline child must be wrapped, never a direct block child + expect(childTypes.filter((t) => t === 'paragraph').length).toBeGreaterThanOrEqual(2); + expect(childTypes).toContain('table'); + + const wrappedInline = pmByAlias(outer, 'InlineInMixed'); + expect(wrappedInline?.type).toBe('structuredContent'); + expect(pmText(wrappedInline)).toContain('inline sdt'); + expect(pmText(outer)).toContain('A paragraph'); + expect(pmText(outer)).toContain('Cell'); + assertNoBlockInsideInline(doc); + }); + + it('Round-trip: paragraph, table, and inline SDT content all survive export', async () => { + const exported = await getExportedResult(MIXED_BLOCK); + expect(xmlSdtByAlias(exported, 'MixedBlock')).toBeTruthy(); + expect(xmlFirst(exported, 'w:tbl')).toBeTruthy(); + const text = xmlText(exported); + expect(text).toContain('A paragraph'); + expect(text).toContain('Cell'); + expect(text).toContain('inline sdt'); + }); +}); + +describe('SDT classification (PR #3616) - inline picture content control', () => { + it('XML shape: inline w:sdt has a w:picture marker and a w:drawing in its content', async () => { + const docXml = await documentXmlOf(INLINE_PICTURE); + const pic = xmlSdtByAlias(docXml, 'PictureControl'); + expect(xmlDirectChildren(xmlFirst(pic, 'w:sdtPr'), 'w:picture').length).toBe(1); + expect(xmlFirst(pic, 'w:drawing')).toBeTruthy(); + }); + + it('Import: image classifies inline; picture marker is not modeled but sdtPr is kept', async () => { + const doc = await importDoc(INLINE_PICTURE); + const pic = pmByAlias(doc, 'PictureControl'); + expect(pic?.type).toBe('structuredContent'); // inline, not block + expect(pmFirst(pic, 'image')).toBeTruthy(); // image preserved as inline content + expect(pic?.attrs?.controlType).not.toBe('picture'); // picture marker not modeled semantically + expect(pic?.attrs?.controlType ?? null).toBeNull(); + expect(pic?.attrs?.sdtPr).toBeTruthy(); // raw sdtPr retained for round-trip + expect(pmCollect(doc, 'structuredContentBlock').length).toBe(0); + assertNoBlockInsideInline(doc); + }); + + it('Round-trip: w:picture marker round-trips raw via sdtPr and the drawing survives', async () => { + const exported = await getExportedResult(INLINE_PICTURE); + const pic = xmlSdtByAlias(exported, 'PictureControl'); + expect(pic).toBeTruthy(); + expect(xmlDirectChildren(xmlFirst(pic, 'w:sdtPr'), 'w:picture').length).toBe(1); + expect(xmlFirst(pic, 'w:drawing')).toBeTruthy(); + }); +});