@@ -33,11 +33,7 @@ import { createDbClient, type DbClient } from "../../packages/shared/src/db/inde
3333import { nodeAttrs } from "./ast.ts" ;
3434import { parseSchemaSet } from "./parse-schema.ts" ;
3535import { resolveQNameAttr } from "./qname.ts" ;
36- import type {
37- Declaration ,
38- ParsedSchemaSet ,
39- PreserveOrderNode ,
40- } from "./types.ts" ;
36+ import type { Declaration , ParsedSchemaSet , PreserveOrderNode } from "./types.ts" ;
4137import { vocabularyForNamespace } from "./vocabulary.ts" ;
4238
4339// biome-ignore lint/suspicious/noExplicitAny: postgres library typing is intricate; helpers stay generic.
@@ -62,6 +58,12 @@ export interface IngestStats {
6258 profileMembershipsInserted : number ;
6359 inheritanceEdgesInserted : number ;
6460 inheritanceUnresolved : number ;
61+ compositorsInserted : number ;
62+ childEdgesInserted : number ;
63+ childEdgesUnresolved : number ;
64+ groupRefsInserted : number ;
65+ groupRefsUnresolved : number ;
66+ localElementsCreated : number ;
6567}
6668
6769export async function ingestSchemaSet ( opts : IngestSchemaSetOptions ) : Promise < IngestStats > {
@@ -78,6 +80,12 @@ export async function ingestSchemaSet(opts: IngestSchemaSetOptions): Promise<Ing
7880 profileMembershipsInserted : 0 ,
7981 inheritanceEdgesInserted : 0 ,
8082 inheritanceUnresolved : 0 ,
83+ compositorsInserted : 0 ,
84+ childEdgesInserted : 0 ,
85+ childEdgesUnresolved : 0 ,
86+ groupRefsInserted : 0 ,
87+ groupRefsUnresolved : 0 ,
88+ localElementsCreated : 0 ,
8189 } ;
8290
8391 await opts . db . sql . begin ( async ( sql : Sql ) => {
@@ -182,11 +190,231 @@ export async function ingestSchemaSet(opts: IngestSchemaSetOptions): Promise<Ing
182190 if ( inserted ) stats . inheritanceEdgesInserted ++ ;
183191 }
184192 }
193+
194+ // Pass 3: content models. Walk every complexType and group declaration,
195+ // emit xsd_compositors / xsd_child_edges / xsd_group_edges. Local element
196+ // declarations are deduped under (owner-vocab, name, element); cross-CT
197+ // reuse of a local name collapses to one symbol.
198+ for ( const decls of parseResult . declarationsByQName . values ( ) ) {
199+ for ( const decl of decls ) {
200+ if ( decl . kind !== "complexType" && decl . kind !== "group" ) continue ;
201+
202+ const ownerSymbolId = symbolIds . get (
203+ symbolKey ( decl . vocabularyId , decl . localName , decl . kind ) ,
204+ ) ;
205+ if ( ownerSymbolId == null ) continue ;
206+ const prefixMap = parseResult . namespaceByPrefix . get ( decl . documentPath ) ;
207+ if ( ! prefixMap ) continue ;
208+
209+ const ctx : WalkCtx = {
210+ sql,
211+ profileId,
212+ ownerSymbolId,
213+ ownerDecl : decl ,
214+ prefixMap,
215+ symbolIds,
216+ stats,
217+ } ;
218+
219+ const particleParents = findContentModelParents ( decl ) ;
220+ let topOrder = 0 ;
221+ for ( const parent of particleParents ) {
222+ for ( const child of nodeChildrenLocal ( parent ) ) {
223+ const tag = stripPrefixLocal ( nodeTagLocal ( child ) ) ;
224+ if ( tag === "sequence" || tag === "choice" || tag === "all" ) {
225+ await walkCompositor ( child , tag , null , topOrder , ctx ) ;
226+ topOrder ++ ;
227+ } else if ( tag === "group" ) {
228+ await handleGroupRef ( child , topOrder , ctx ) ;
229+ topOrder ++ ;
230+ }
231+ }
232+ }
233+ }
234+ }
185235 } ) ;
186236
187237 return stats ;
188238}
189239
240+ interface WalkCtx {
241+ sql : Sql ;
242+ profileId : number ;
243+ ownerSymbolId : number ;
244+ ownerDecl : Declaration ;
245+ prefixMap : Map < string , string > ;
246+ symbolIds : Map < string , number > ;
247+ stats : IngestStats ;
248+ }
249+
250+ /**
251+ * For a complexType: yield the node(s) whose direct children are particles
252+ * (sequence/choice/all/group). That's the complexType itself, OR (for derived
253+ * types) the inner xsd:extension or xsd:restriction beneath complexContent.
254+ *
255+ * For a group definition: yield the group node itself.
256+ *
257+ * simpleContent has no element particles; not yielded.
258+ */
259+ function findContentModelParents ( decl : Declaration ) : PreserveOrderNode [ ] {
260+ if ( decl . kind === "group" ) return [ decl . node ] ;
261+
262+ if ( decl . kind !== "complexType" ) return [ ] ;
263+
264+ const out : PreserveOrderNode [ ] = [ ] ;
265+ let sawComplexContent = false ;
266+ for ( const child of nodeChildrenLocal ( decl . node ) ) {
267+ const tag = stripPrefixLocal ( nodeTagLocal ( child ) ) ;
268+ if ( tag === "complexContent" ) {
269+ sawComplexContent = true ;
270+ for ( const inner of nodeChildrenLocal ( child ) ) {
271+ const innerTag = stripPrefixLocal ( nodeTagLocal ( inner ) ) ;
272+ if ( innerTag === "extension" || innerTag === "restriction" ) out . push ( inner ) ;
273+ }
274+ }
275+ }
276+ if ( sawComplexContent ) return out ;
277+ // No complexContent wrapper: particles live directly under complexType.
278+ return [ decl . node ] ;
279+ }
280+
281+ async function walkCompositor (
282+ node : PreserveOrderNode ,
283+ kind : "sequence" | "choice" | "all" ,
284+ parentCompositorId : number | null ,
285+ orderIndex : number ,
286+ ctx : WalkCtx ,
287+ ) : Promise < void > {
288+ const a = nodeAttrs ( node ) ;
289+ const compositorId = await insertCompositor (
290+ ctx . sql ,
291+ parentCompositorId === null ? ctx . ownerSymbolId : null ,
292+ parentCompositorId ,
293+ ctx . profileId ,
294+ kind ,
295+ parseMinOccurs ( a . minOccurs ) ,
296+ parseMaxOccurs ( a . maxOccurs ) ,
297+ orderIndex ,
298+ ) ;
299+ ctx . stats . compositorsInserted ++ ;
300+
301+ let childOrder = 0 ;
302+ for ( const child of nodeChildrenLocal ( node ) ) {
303+ const tag = stripPrefixLocal ( nodeTagLocal ( child ) ) ;
304+ if ( tag === "element" ) {
305+ await handleElement ( child , compositorId , childOrder , ctx ) ;
306+ childOrder ++ ;
307+ } else if ( tag === "sequence" || tag === "choice" || tag === "all" ) {
308+ await walkCompositor ( child , tag , compositorId , childOrder , ctx ) ;
309+ childOrder ++ ;
310+ } else if ( tag === "group" ) {
311+ await handleGroupRef ( child , childOrder , ctx , compositorId ) ;
312+ childOrder ++ ;
313+ }
314+ // xsd:any: skipped for now.
315+ }
316+ }
317+
318+ async function handleElement (
319+ node : PreserveOrderNode ,
320+ compositorId : number ,
321+ orderIndex : number ,
322+ ctx : WalkCtx ,
323+ ) : Promise < void > {
324+ const a = nodeAttrs ( node ) ;
325+ let childSymbolId : number | null = null ;
326+
327+ if ( a . ref ) {
328+ const resolved = resolveQNameAttr ( a . ref , ctx . prefixMap , ctx . ownerDecl . namespace ) ;
329+ if ( ! resolved . resolved || ! resolved . qname . vocabularyId ) {
330+ ctx . stats . childEdgesUnresolved ++ ;
331+ return ;
332+ }
333+ const id = ctx . symbolIds . get (
334+ symbolKey ( resolved . qname . vocabularyId , resolved . qname . localName , "element" ) ,
335+ ) ;
336+ if ( id == null ) {
337+ ctx . stats . childEdgesUnresolved ++ ;
338+ return ;
339+ }
340+ childSymbolId = id ;
341+ } else if ( a . name ) {
342+ const key = symbolKey ( ctx . ownerDecl . vocabularyId , a . name , "element" ) ;
343+ let id = ctx . symbolIds . get ( key ) ;
344+ if ( id == null ) {
345+ const res = await upsertSymbol ( ctx . sql , ctx . ownerDecl . vocabularyId , a . name , "element" ) ;
346+ ctx . symbolIds . set ( key , res . id ) ;
347+ if ( res . inserted ) {
348+ ctx . stats . symbolsInserted ++ ;
349+ ctx . stats . localElementsCreated ++ ;
350+ } else {
351+ ctx . stats . symbolsExisting ++ ;
352+ }
353+ id = res . id ;
354+ }
355+ childSymbolId = id ;
356+ }
357+
358+ if ( childSymbolId == null ) return ;
359+
360+ await insertChildEdge (
361+ ctx . sql ,
362+ ctx . ownerSymbolId ,
363+ compositorId ,
364+ childSymbolId ,
365+ ctx . profileId ,
366+ parseMinOccurs ( a . minOccurs ) ,
367+ parseMaxOccurs ( a . maxOccurs ) ,
368+ orderIndex ,
369+ ) ;
370+ ctx . stats . childEdgesInserted ++ ;
371+ }
372+
373+ async function handleGroupRef (
374+ node : PreserveOrderNode ,
375+ orderIndex : number ,
376+ ctx : WalkCtx ,
377+ _compositorId : number | null = null ,
378+ ) : Promise < void > {
379+ void _compositorId ; // group_edges aren't compositor-scoped in our schema; refs hang off the parent symbol.
380+ const a = nodeAttrs ( node ) ;
381+ if ( ! a . ref ) return ;
382+ const resolved = resolveQNameAttr ( a . ref , ctx . prefixMap , ctx . ownerDecl . namespace ) ;
383+ if ( ! resolved . resolved || ! resolved . qname . vocabularyId ) {
384+ ctx . stats . groupRefsUnresolved ++ ;
385+ return ;
386+ }
387+ const groupSymbolId = ctx . symbolIds . get (
388+ symbolKey ( resolved . qname . vocabularyId , resolved . qname . localName , "group" ) ,
389+ ) ;
390+ if ( groupSymbolId == null ) {
391+ ctx . stats . groupRefsUnresolved ++ ;
392+ return ;
393+ }
394+ await insertGroupEdge (
395+ ctx . sql ,
396+ ctx . ownerSymbolId ,
397+ groupSymbolId ,
398+ ctx . profileId ,
399+ "group" ,
400+ orderIndex ,
401+ ) ;
402+ ctx . stats . groupRefsInserted ++ ;
403+ }
404+
405+ function parseMinOccurs ( raw : string | undefined ) : number {
406+ if ( raw === undefined ) return 1 ;
407+ const n = parseInt ( raw , 10 ) ;
408+ return Number . isFinite ( n ) ? n : 1 ;
409+ }
410+
411+ function parseMaxOccurs ( raw : string | undefined ) : number | null {
412+ if ( raw === undefined ) return 1 ;
413+ if ( raw === "unbounded" ) return null ;
414+ const n = parseInt ( raw , 10 ) ;
415+ return Number . isFinite ( n ) ? n : 1 ;
416+ }
417+
190418// --- DB helpers ----------------------------------------------------------
191419
192420async function ensureProfile ( sql : Sql , name : string ) : Promise < number > {
@@ -200,7 +428,10 @@ async function ensureProfile(sql: Sql, name: string): Promise<number> {
200428
201429async function lookupSourceId ( sql : Sql , name : string ) : Promise < number > {
202430 const [ row ] = await sql `SELECT id FROM reference_sources WHERE name = ${ name } LIMIT 1` ;
203- if ( ! row ) throw new Error ( `reference_sources row not found for name='${ name } '. Run db:sync-sources first.` ) ;
431+ if ( ! row )
432+ throw new Error (
433+ `reference_sources row not found for name='${ name } '. Run db:sync-sources first.` ,
434+ ) ;
204435 return row . id ;
205436}
206437
@@ -260,6 +491,60 @@ async function insertInheritance(
260491 return rows . length > 0 ;
261492}
262493
494+ async function insertCompositor (
495+ sql : Sql ,
496+ parentSymbolId : number | null ,
497+ parentCompositorId : number | null ,
498+ profileId : number ,
499+ kind : "sequence" | "choice" | "all" ,
500+ minOccurs : number ,
501+ maxOccurs : number | null ,
502+ orderIndex : number ,
503+ ) : Promise < number > {
504+ const [ row ] = await sql `
505+ INSERT INTO xsd_compositors
506+ (parent_symbol_id, parent_compositor_id, profile_id, kind, min_occurs, max_occurs, order_index)
507+ VALUES
508+ (${ parentSymbolId } , ${ parentCompositorId } , ${ profileId } , ${ kind } , ${ minOccurs } , ${ maxOccurs } , ${ orderIndex } )
509+ RETURNING id
510+ ` ;
511+ return row . id ;
512+ }
513+
514+ async function insertChildEdge (
515+ sql : Sql ,
516+ parentSymbolId : number ,
517+ compositorId : number ,
518+ childSymbolId : number ,
519+ profileId : number ,
520+ minOccurs : number ,
521+ maxOccurs : number | null ,
522+ orderIndex : number ,
523+ ) : Promise < void > {
524+ await sql `
525+ INSERT INTO xsd_child_edges
526+ (parent_symbol_id, compositor_id, child_symbol_id, profile_id, min_occurs, max_occurs, order_index)
527+ VALUES
528+ (${ parentSymbolId } , ${ compositorId } , ${ childSymbolId } , ${ profileId } , ${ minOccurs } , ${ maxOccurs } , ${ orderIndex } )
529+ ` ;
530+ }
531+
532+ async function insertGroupEdge (
533+ sql : Sql ,
534+ parentSymbolId : number ,
535+ groupSymbolId : number ,
536+ profileId : number ,
537+ refKind : "group" | "attributeGroup" ,
538+ orderIndex : number ,
539+ ) : Promise < void > {
540+ await sql `
541+ INSERT INTO xsd_group_edges
542+ (parent_symbol_id, group_symbol_id, profile_id, ref_kind, order_index)
543+ VALUES
544+ (${ parentSymbolId } , ${ groupSymbolId } , ${ profileId } , ${ refKind } , ${ orderIndex } )
545+ ` ;
546+ }
547+
263548// --- Inheritance discovery from AST -------------------------------------
264549
265550interface InheritanceFinding {
@@ -362,6 +647,12 @@ async function main() {
362647 console . log ( `profile memberships: ${ stats . profileMembershipsInserted } ` ) ;
363648 console . log ( `inheritance edges: ${ stats . inheritanceEdgesInserted } ` ) ;
364649 console . log ( `inheritance unres.: ${ stats . inheritanceUnresolved } ` ) ;
650+ console . log ( `compositors: ${ stats . compositorsInserted } ` ) ;
651+ console . log ( `child edges: ${ stats . childEdgesInserted } ` ) ;
652+ console . log ( `child edges unres.: ${ stats . childEdgesUnresolved } ` ) ;
653+ console . log ( `group refs: ${ stats . groupRefsInserted } ` ) ;
654+ console . log ( `group refs unres.: ${ stats . groupRefsUnresolved } ` ) ;
655+ console . log ( `local elements: ${ stats . localElementsCreated } ` ) ;
365656 console . log ( `elapsed: ${ ms } ms` ) ;
366657 } finally {
367658 await db . close ( ) ;
0 commit comments