Skip to content

Commit 33072f5

Browse files
committed
feat(xsd): attributes, attributeGroup refs, and enums (Phase 3e)
Pass 4 of ingestSchemaSet walks every complexType and attributeGroup declaration and writes: - xsd_attr_edges one row per direct or extension/restriction attribute. attr_use enum locked to required / optional / prohibited; default 'optional'. type_ref stores the Clark-style {namespace}localName so Phase 4 lookups can join across vocabularies, with the raw qname as a fallback when unresolvable. - xsd_group_edges additional rows with ref_kind='attributeGroup' for every <xsd:attributeGroup ref="..."/> on a complexType or another attributeGroup body. - xsd_enums one row per <xsd:enumeration value="..."/> beneath a simpleType restriction; order_index preserved. Idempotency: same delete-and-rewrite-per-profile pattern as Pass 3. xsd_group_edges already gets cleared by Pass 3 so attributeGroup ref inserts here run on a fresh slate. attribute parents handled: - complexType direct (no wrapper) - complexContent / extension|restriction - simpleContent / extension|restriction - attributeGroup body (top-level) WML closure ingest stats: - 1114 attr edges (2 unresolved: xml:space / xml:lang) - 17 attributeGroup refs (0 unresolved) - 2189 enum values - elapsed ~3s - all unresolved counters elsewhere still 0 Real-data sanity: top attribute-heavy types match expectation (CT_ElemPropSet 28, CT_TextBodyProperties 19, ...). type_ref distribution shows xsd:boolean, ST_OnOff, ST_DecimalNumber, etc resolved to the right namespaces. Fixture main.xsd gains CT_TableUser to exercise an attributeGroup ref + a required attribute, alongside the existing direct, extension, and attributeGroup-body attribute paths and the ST_Jc enum.
1 parent 280e76f commit 33072f5

4 files changed

Lines changed: 329 additions & 5 deletions

File tree

scripts/ingest-xsd/ingest.ts

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ export interface IngestStats {
6464
groupRefsInserted: number;
6565
groupRefsUnresolved: number;
6666
localElementsCreated: number;
67+
attrEdgesInserted: number;
68+
attrEdgesUnresolved: number;
69+
attrGroupRefsInserted: number;
70+
attrGroupRefsUnresolved: number;
71+
enumsInserted: number;
6772
}
6873

6974
export async function ingestSchemaSet(opts: IngestSchemaSetOptions): Promise<IngestStats> {
@@ -86,6 +91,11 @@ export async function ingestSchemaSet(opts: IngestSchemaSetOptions): Promise<Ing
8691
groupRefsInserted: 0,
8792
groupRefsUnresolved: 0,
8893
localElementsCreated: 0,
94+
attrEdgesInserted: 0,
95+
attrEdgesUnresolved: 0,
96+
attrGroupRefsInserted: 0,
97+
attrGroupRefsUnresolved: 0,
98+
enumsInserted: 0,
8999
};
90100

91101
await opts.db.sql.begin(async (sql: Sql) => {
@@ -242,6 +252,79 @@ export async function ingestSchemaSet(opts: IngestSchemaSetOptions): Promise<Ing
242252
}
243253
}
244254
}
255+
256+
// Pass 4: attributes, attributeGroup refs, and simpleType enumerations.
257+
// Same delete-and-rewrite strategy as Pass 3. xsd_group_edges already
258+
// cleared by Pass 3, so attributeGroup ref inserts here are fresh.
259+
await sql`DELETE FROM xsd_attr_edges WHERE profile_id = ${profileId}`;
260+
await sql`DELETE FROM xsd_enums WHERE profile_id = ${profileId}`;
261+
262+
for (const decls of parseResult.declarationsByQName.values()) {
263+
for (const decl of decls) {
264+
const ownerSymbolId = symbolIds.get(
265+
symbolKey(decl.vocabularyId, decl.localName, decl.kind),
266+
);
267+
if (ownerSymbolId == null) continue;
268+
const prefixMap = parseResult.namespaceByPrefix.get(decl.documentPath);
269+
if (!prefixMap) continue;
270+
271+
if (decl.kind === "complexType" || decl.kind === "attributeGroup") {
272+
const parents = findAttributeParents(decl);
273+
let order = 0;
274+
for (const parent of parents) {
275+
for (const child of nodeChildrenLocal(parent)) {
276+
const tag = stripPrefixLocal(nodeTagLocal(child));
277+
if (tag === "attribute") {
278+
await handleAttribute(
279+
sql,
280+
child,
281+
ownerSymbolId,
282+
profileId,
283+
prefixMap,
284+
decl.namespace,
285+
symbolIds,
286+
order,
287+
stats,
288+
);
289+
order++;
290+
} else if (tag === "attributeGroup") {
291+
const a = nodeAttrs(child);
292+
if (!a.ref) continue;
293+
const resolved = resolveQNameAttr(a.ref, prefixMap, decl.namespace);
294+
if (!resolved.resolved || !resolved.qname.vocabularyId) {
295+
stats.attrGroupRefsUnresolved++;
296+
continue;
297+
}
298+
const groupSymbolId = symbolIds.get(
299+
symbolKey(resolved.qname.vocabularyId, resolved.qname.localName, "attributeGroup"),
300+
);
301+
if (groupSymbolId == null) {
302+
stats.attrGroupRefsUnresolved++;
303+
continue;
304+
}
305+
await insertGroupEdge(
306+
sql,
307+
ownerSymbolId,
308+
groupSymbolId,
309+
profileId,
310+
"attributeGroup",
311+
order,
312+
);
313+
stats.attrGroupRefsInserted++;
314+
order++;
315+
}
316+
}
317+
}
318+
} else if (decl.kind === "simpleType") {
319+
let order = 0;
320+
for (const value of findEnumValues(decl)) {
321+
await insertEnum(sql, ownerSymbolId, profileId, value, order);
322+
stats.enumsInserted++;
323+
order++;
324+
}
325+
}
326+
}
327+
}
245328
});
246329

247330
return stats;
@@ -555,6 +638,143 @@ async function insertGroupEdge(
555638
`;
556639
}
557640

641+
async function insertAttrEdge(
642+
sql: Sql,
643+
symbolId: number,
644+
attrSymbolId: number | null,
645+
localName: string,
646+
profileId: number,
647+
attrUse: "required" | "optional" | "prohibited",
648+
defaultValue: string | null,
649+
fixedValue: string | null,
650+
typeRef: string | null,
651+
orderIndex: number,
652+
): Promise<void> {
653+
await sql`
654+
INSERT INTO xsd_attr_edges
655+
(symbol_id, attr_symbol_id, local_name, profile_id, attr_use, default_value, fixed_value, type_ref, order_index)
656+
VALUES
657+
(${symbolId}, ${attrSymbolId}, ${localName}, ${profileId}, ${attrUse}, ${defaultValue}, ${fixedValue}, ${typeRef}, ${orderIndex})
658+
`;
659+
}
660+
661+
async function insertEnum(
662+
sql: Sql,
663+
symbolId: number,
664+
profileId: number,
665+
value: string,
666+
orderIndex: number,
667+
): Promise<void> {
668+
await sql`
669+
INSERT INTO xsd_enums (symbol_id, profile_id, value, order_index)
670+
VALUES (${symbolId}, ${profileId}, ${value}, ${orderIndex})
671+
`;
672+
}
673+
674+
/**
675+
* Locate the nodes whose direct children are xsd:attribute / xsd:attributeGroup.
676+
* For complexType: the type itself when there's no complexContent/simpleContent
677+
* wrapper, otherwise the inner extension/restriction nodes.
678+
* For attributeGroup: the group node itself.
679+
*/
680+
function findAttributeParents(decl: Declaration): PreserveOrderNode[] {
681+
if (decl.kind === "attributeGroup") return [decl.node];
682+
if (decl.kind !== "complexType") return [];
683+
684+
const out: PreserveOrderNode[] = [];
685+
let sawWrapper = false;
686+
for (const child of nodeChildrenLocal(decl.node)) {
687+
const tag = stripPrefixLocal(nodeTagLocal(child));
688+
if (tag === "complexContent" || tag === "simpleContent") {
689+
sawWrapper = true;
690+
for (const inner of nodeChildrenLocal(child)) {
691+
const innerTag = stripPrefixLocal(nodeTagLocal(inner));
692+
if (innerTag === "extension" || innerTag === "restriction") out.push(inner);
693+
}
694+
}
695+
}
696+
if (!sawWrapper) out.push(decl.node);
697+
return out;
698+
}
699+
700+
/** xsd:simpleType > xsd:restriction > xsd:enumeration values, in order. */
701+
function findEnumValues(decl: Declaration): string[] {
702+
const values: string[] = [];
703+
for (const child of nodeChildrenLocal(decl.node)) {
704+
const tag = stripPrefixLocal(nodeTagLocal(child));
705+
if (tag !== "restriction") continue;
706+
for (const e of nodeChildrenLocal(child)) {
707+
const eTag = stripPrefixLocal(nodeTagLocal(e));
708+
if (eTag !== "enumeration") continue;
709+
const a = nodeAttrs(e);
710+
if (a.value !== undefined) values.push(a.value);
711+
}
712+
}
713+
return values;
714+
}
715+
716+
async function handleAttribute(
717+
sql: Sql,
718+
node: PreserveOrderNode,
719+
ownerSymbolId: number,
720+
profileId: number,
721+
prefixMap: Map<string, string>,
722+
defaultNamespace: string,
723+
symbolIds: Map<string, number>,
724+
orderIndex: number,
725+
stats: IngestStats,
726+
): Promise<void> {
727+
const a = nodeAttrs(node);
728+
let localName: string | null = null;
729+
let attrSymbolId: number | null = null;
730+
let typeRef: string | null = null;
731+
732+
if (a.ref) {
733+
const resolved = resolveQNameAttr(a.ref, prefixMap, defaultNamespace);
734+
if (!resolved.resolved || !resolved.qname.vocabularyId) {
735+
stats.attrEdgesUnresolved++;
736+
return;
737+
}
738+
localName = resolved.qname.localName;
739+
const id = symbolIds.get(
740+
symbolKey(resolved.qname.vocabularyId, resolved.qname.localName, "attribute"),
741+
);
742+
if (id != null) attrSymbolId = id;
743+
} else if (a.name) {
744+
localName = a.name;
745+
if (a.type) {
746+
const resolved = resolveQNameAttr(a.type, prefixMap, defaultNamespace);
747+
if (resolved.resolved) {
748+
typeRef = `{${resolved.qname.namespace}}${resolved.qname.localName}`;
749+
} else {
750+
typeRef = a.type; // store raw if unresolvable; never lose info
751+
}
752+
}
753+
}
754+
755+
if (!localName) return;
756+
757+
const rawUse = a.use;
758+
const attrUse: "required" | "optional" | "prohibited" =
759+
rawUse === "required" || rawUse === "optional" || rawUse === "prohibited"
760+
? rawUse
761+
: "optional";
762+
763+
await insertAttrEdge(
764+
sql,
765+
ownerSymbolId,
766+
attrSymbolId,
767+
localName,
768+
profileId,
769+
attrUse,
770+
a.default ?? null,
771+
a.fixed ?? null,
772+
typeRef,
773+
orderIndex,
774+
);
775+
stats.attrEdgesInserted++;
776+
}
777+
558778
// --- Inheritance discovery from AST -------------------------------------
559779

560780
interface InheritanceFinding {
@@ -663,6 +883,11 @@ async function main() {
663883
console.log(`group refs: ${stats.groupRefsInserted}`);
664884
console.log(`group refs unres.: ${stats.groupRefsUnresolved}`);
665885
console.log(`local elements: ${stats.localElementsCreated}`);
886+
console.log(`attr edges: ${stats.attrEdgesInserted}`);
887+
console.log(`attr edges unres.: ${stats.attrEdgesUnresolved}`);
888+
console.log(`attrGroup refs: ${stats.attrGroupRefsInserted}`);
889+
console.log(`attrGroup refs unr.: ${stats.attrGroupRefsUnresolved}`);
890+
console.log(`enums: ${stats.enumsInserted}`);
666891
console.log(`elapsed: ${ms}ms`);
667892
} finally {
668893
await db.close();

tests/ingest-xsd/fixtures/main.xsd

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,9 @@
4848
<xsd:attributeGroup name="AG_TableProps">
4949
<xsd:attribute name="cols" type="xsd:int"/>
5050
</xsd:attributeGroup>
51+
<xsd:complexType name="CT_TableUser">
52+
<xsd:attributeGroup ref="AG_TableProps"/>
53+
<xsd:attribute name="caption" type="xsd:string" use="required"/>
54+
</xsd:complexType>
5155
<xsd:element name="document" type="CT_Empty"/>
5256
</xsd:schema>

0 commit comments

Comments
 (0)