Skip to content

Commit 8b7c441

Browse files
authored
Fix --reuse-model --collapse-reuse-models to deduplicate identical inline definitions (#2903)
1 parent a310b6f commit 8b7c441

8 files changed

Lines changed: 292 additions & 0 deletions

File tree

src/datamodel_code_generator/parser/base.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,6 +1125,38 @@ def __delete_duplicate_models(self, models: list[DataModel]) -> None: # noqa: P
11251125
{f"{c.module_name}.{c.type_hint}": c for c in child.base_classes}.values()
11261126
)
11271127
models_to_remove.add(duplicate_model)
1128+
1129+
if self.reuse_model and self.collapse_reuse_models:
1130+
max_iterations, iteration = len(models), 0
1131+
while True:
1132+
iteration += 1
1133+
if iteration > max_iterations: # pragma: no cover
1134+
msg = f"Deduplication exceeded max iterations ({max_iterations})"
1135+
raise RuntimeError(msg)
1136+
1137+
content_key_to_models: dict[tuple[Any, ...], list[DataModel]] = defaultdict(list)
1138+
for model in models:
1139+
if model not in models_to_remove and not isinstance(model, self.data_model_root_type):
1140+
model._dedup_key_cache.clear() # noqa: SLF001
1141+
content_key_to_models[model.get_dedup_key(None, use_default=True)].append(model)
1142+
1143+
if not (
1144+
duplicates := [
1145+
(canonical := group[0], dup)
1146+
for group in content_key_to_models.values()
1147+
if len(group) > 1
1148+
for dup in group[1:]
1149+
if dup not in models_to_remove
1150+
]
1151+
):
1152+
break
1153+
1154+
for canonical, duplicate in duplicates:
1155+
duplicate.replace_children_in_models(models, canonical.reference)
1156+
for child in duplicate.reference.iter_data_model_children(): # pragma: no cover
1157+
child.base_classes = list({c.reference: c for c in child.base_classes}.values())
1158+
models_to_remove.add(duplicate)
1159+
11281160
# Batch removal: O(n) instead of O(n²)
11291161
if models_to_remove:
11301162
models[:] = [m for m in models if m not in models_to_remove]
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# generated by datamodel-codegen:
2+
# filename: reuse_model_inline_definitions.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from pydantic import BaseModel
8+
9+
10+
class Pos(BaseModel):
11+
start: int | None = None
12+
end: int | None = None
13+
14+
15+
class Node1(BaseModel):
16+
pos: Pos | None = None
17+
18+
19+
class Model(BaseModel):
20+
node1: Node1 | None = None
21+
node2: Node1 | None = None
22+
node3: Node1 | None = None
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# generated by datamodel-codegen:
2+
# filename: reuse_model_collapse_nested.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from pydantic import BaseModel
8+
9+
10+
class Start(BaseModel):
11+
line: int | None = None
12+
col: int | None = None
13+
14+
15+
class Pos(BaseModel):
16+
start: Start | None = None
17+
end: Start | None = None
18+
19+
20+
class Block1(BaseModel):
21+
pos: Pos | None = None
22+
23+
24+
class Model(BaseModel):
25+
block1: Block1 | None = None
26+
block2: Block1 | None = None
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# generated by datamodel-codegen:
2+
# filename: reuse_model_collapse_with_root.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from pydantic import BaseModel, RootModel
8+
9+
10+
class Pos(BaseModel):
11+
x: int | None = None
12+
y: int | None = None
13+
14+
15+
class Nested1(BaseModel):
16+
pos: Pos | None = None
17+
18+
19+
class StringType(RootModel[str]):
20+
root: str
21+
22+
23+
class Model(BaseModel):
24+
field1: StringType | None = None
25+
field2: StringType | None = None
26+
nested1: Nested1 | None = None
27+
nested2: Nested1 | None = None
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema",
3+
"type": "object",
4+
"properties": {
5+
"block1": {
6+
"type": "object",
7+
"properties": {
8+
"pos": {
9+
"type": "object",
10+
"properties": {
11+
"start": {
12+
"type": "object",
13+
"properties": {
14+
"line": {"type": "integer"},
15+
"col": {"type": "integer"}
16+
}
17+
},
18+
"end": {
19+
"type": "object",
20+
"properties": {
21+
"line": {"type": "integer"},
22+
"col": {"type": "integer"}
23+
}
24+
}
25+
}
26+
}
27+
}
28+
},
29+
"block2": {
30+
"type": "object",
31+
"properties": {
32+
"pos": {
33+
"type": "object",
34+
"properties": {
35+
"start": {
36+
"type": "object",
37+
"properties": {
38+
"line": {"type": "integer"},
39+
"col": {"type": "integer"}
40+
}
41+
},
42+
"end": {
43+
"type": "object",
44+
"properties": {
45+
"line": {"type": "integer"},
46+
"col": {"type": "integer"}
47+
}
48+
}
49+
}
50+
}
51+
}
52+
}
53+
}
54+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema",
3+
"type": "object",
4+
"definitions": {
5+
"StringType": {
6+
"type": "string"
7+
}
8+
},
9+
"properties": {
10+
"field1": {
11+
"$ref": "#/definitions/StringType"
12+
},
13+
"field2": {
14+
"$ref": "#/definitions/StringType"
15+
},
16+
"nested1": {
17+
"type": "object",
18+
"properties": {
19+
"pos": {
20+
"type": "object",
21+
"properties": {
22+
"x": {"type": "integer"},
23+
"y": {"type": "integer"}
24+
}
25+
}
26+
}
27+
},
28+
"nested2": {
29+
"type": "object",
30+
"properties": {
31+
"pos": {
32+
"type": "object",
33+
"properties": {
34+
"x": {"type": "integer"},
35+
"y": {"type": "integer"}
36+
}
37+
}
38+
}
39+
}
40+
}
41+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema",
3+
"type": "object",
4+
"properties": {
5+
"node1": {
6+
"type": "object",
7+
"properties": {
8+
"pos": {
9+
"type": "object",
10+
"properties": {
11+
"start": {"type": "integer"},
12+
"end": {"type": "integer"}
13+
}
14+
}
15+
}
16+
},
17+
"node2": {
18+
"type": "object",
19+
"properties": {
20+
"pos": {
21+
"type": "object",
22+
"properties": {
23+
"start": {"type": "integer"},
24+
"end": {"type": "integer"}
25+
}
26+
}
27+
}
28+
},
29+
"node3": {
30+
"type": "object",
31+
"properties": {
32+
"pos": {
33+
"type": "object",
34+
"properties": {
35+
"start": {"type": "integer"},
36+
"end": {"type": "integer"}
37+
}
38+
}
39+
}
40+
}
41+
}
42+
}

tests/main/jsonschema/test_main_jsonschema.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,6 +1132,54 @@ def test_main_json_reuse_enum(output_file: Path) -> None:
11321132
)
11331133

11341134

1135+
def test_main_reuse_model_collapse_inline_definitions(output_file: Path) -> None:
1136+
"""Test --reuse-model --collapse-reuse-models deduplicates identical inline definitions."""
1137+
run_main_and_assert(
1138+
input_path=JSON_SCHEMA_DATA_PATH / "reuse_model_inline_definitions.json",
1139+
output_path=output_file,
1140+
input_file_type="jsonschema",
1141+
assert_func=assert_file_content,
1142+
extra_args=[
1143+
"--reuse-model",
1144+
"--collapse-reuse-models",
1145+
"--output-model-type",
1146+
"pydantic_v2.BaseModel",
1147+
],
1148+
)
1149+
1150+
1151+
def test_main_reuse_model_collapse_with_root(output_file: Path) -> None:
1152+
"""Test --reuse-model --collapse-reuse-models skips RootModel deduplication."""
1153+
run_main_and_assert(
1154+
input_path=JSON_SCHEMA_DATA_PATH / "reuse_model_collapse_with_root.json",
1155+
output_path=output_file,
1156+
input_file_type="jsonschema",
1157+
assert_func=assert_file_content,
1158+
extra_args=[
1159+
"--reuse-model",
1160+
"--collapse-reuse-models",
1161+
"--output-model-type",
1162+
"pydantic_v2.BaseModel",
1163+
],
1164+
)
1165+
1166+
1167+
def test_main_reuse_model_collapse_nested(output_file: Path) -> None:
1168+
"""Test --reuse-model --collapse-reuse-models with deeply nested identical structures."""
1169+
run_main_and_assert(
1170+
input_path=JSON_SCHEMA_DATA_PATH / "reuse_model_collapse_nested.json",
1171+
output_path=output_file,
1172+
input_file_type="jsonschema",
1173+
assert_func=assert_file_content,
1174+
extra_args=[
1175+
"--reuse-model",
1176+
"--collapse-reuse-models",
1177+
"--output-model-type",
1178+
"pydantic_v2.BaseModel",
1179+
],
1180+
)
1181+
1182+
11351183
@pytest.mark.cli_doc(
11361184
options=["--capitalize-enum-members"],
11371185
option_description="""Capitalize enum member names to UPPER_CASE format.

0 commit comments

Comments
 (0)