From 3c9fd8515d2523deb356b6cd56380f37f3b91b37 Mon Sep 17 00:00:00 2001 From: Koudai Aono Date: Fri, 2 Jan 2026 17:21:34 +0000 Subject: [PATCH] Fix --reuse-model --collapse-reuse-models to deduplicate identical inline definitions --- src/datamodel_code_generator/parser/base.py | 32 +++++++++++ ...reuse_model_collapse_inline_definitions.py | 22 ++++++++ .../jsonschema/reuse_model_collapse_nested.py | 26 +++++++++ .../reuse_model_collapse_with_root.py | 27 ++++++++++ .../reuse_model_collapse_nested.json | 54 +++++++++++++++++++ .../reuse_model_collapse_with_root.json | 41 ++++++++++++++ .../reuse_model_inline_definitions.json | 42 +++++++++++++++ tests/main/jsonschema/test_main_jsonschema.py | 48 +++++++++++++++++ 8 files changed, 292 insertions(+) create mode 100644 tests/data/expected/main/jsonschema/reuse_model_collapse_inline_definitions.py create mode 100644 tests/data/expected/main/jsonschema/reuse_model_collapse_nested.py create mode 100644 tests/data/expected/main/jsonschema/reuse_model_collapse_with_root.py create mode 100644 tests/data/jsonschema/reuse_model_collapse_nested.json create mode 100644 tests/data/jsonschema/reuse_model_collapse_with_root.json create mode 100644 tests/data/jsonschema/reuse_model_inline_definitions.json diff --git a/src/datamodel_code_generator/parser/base.py b/src/datamodel_code_generator/parser/base.py index a2c1b67e2..c19cfa4c6 100644 --- a/src/datamodel_code_generator/parser/base.py +++ b/src/datamodel_code_generator/parser/base.py @@ -1125,6 +1125,38 @@ def __delete_duplicate_models(self, models: list[DataModel]) -> None: # noqa: P {f"{c.module_name}.{c.type_hint}": c for c in child.base_classes}.values() ) models_to_remove.add(duplicate_model) + + if self.reuse_model and self.collapse_reuse_models: + max_iterations, iteration = len(models), 0 + while True: + iteration += 1 + if iteration > max_iterations: # pragma: no cover + msg = f"Deduplication exceeded max iterations ({max_iterations})" + raise RuntimeError(msg) + + content_key_to_models: dict[tuple[Any, ...], list[DataModel]] = defaultdict(list) + for model in models: + if model not in models_to_remove and not isinstance(model, self.data_model_root_type): + model._dedup_key_cache.clear() # noqa: SLF001 + content_key_to_models[model.get_dedup_key(None, use_default=True)].append(model) + + if not ( + duplicates := [ + (canonical := group[0], dup) + for group in content_key_to_models.values() + if len(group) > 1 + for dup in group[1:] + if dup not in models_to_remove + ] + ): + break + + for canonical, duplicate in duplicates: + duplicate.replace_children_in_models(models, canonical.reference) + for child in duplicate.reference.iter_data_model_children(): # pragma: no cover + child.base_classes = list({c.reference: c for c in child.base_classes}.values()) + models_to_remove.add(duplicate) + # Batch removal: O(n) instead of O(n²) if models_to_remove: models[:] = [m for m in models if m not in models_to_remove] diff --git a/tests/data/expected/main/jsonschema/reuse_model_collapse_inline_definitions.py b/tests/data/expected/main/jsonschema/reuse_model_collapse_inline_definitions.py new file mode 100644 index 000000000..911679989 --- /dev/null +++ b/tests/data/expected/main/jsonschema/reuse_model_collapse_inline_definitions.py @@ -0,0 +1,22 @@ +# generated by datamodel-codegen: +# filename: reuse_model_inline_definitions.json +# timestamp: 2019-07-26T00:00:00+00:00 + +from __future__ import annotations + +from pydantic import BaseModel + + +class Pos(BaseModel): + start: int | None = None + end: int | None = None + + +class Node1(BaseModel): + pos: Pos | None = None + + +class Model(BaseModel): + node1: Node1 | None = None + node2: Node1 | None = None + node3: Node1 | None = None diff --git a/tests/data/expected/main/jsonschema/reuse_model_collapse_nested.py b/tests/data/expected/main/jsonschema/reuse_model_collapse_nested.py new file mode 100644 index 000000000..ad4fcc951 --- /dev/null +++ b/tests/data/expected/main/jsonschema/reuse_model_collapse_nested.py @@ -0,0 +1,26 @@ +# generated by datamodel-codegen: +# filename: reuse_model_collapse_nested.json +# timestamp: 2019-07-26T00:00:00+00:00 + +from __future__ import annotations + +from pydantic import BaseModel + + +class Start(BaseModel): + line: int | None = None + col: int | None = None + + +class Pos(BaseModel): + start: Start | None = None + end: Start | None = None + + +class Block1(BaseModel): + pos: Pos | None = None + + +class Model(BaseModel): + block1: Block1 | None = None + block2: Block1 | None = None diff --git a/tests/data/expected/main/jsonschema/reuse_model_collapse_with_root.py b/tests/data/expected/main/jsonschema/reuse_model_collapse_with_root.py new file mode 100644 index 000000000..790f27713 --- /dev/null +++ b/tests/data/expected/main/jsonschema/reuse_model_collapse_with_root.py @@ -0,0 +1,27 @@ +# generated by datamodel-codegen: +# filename: reuse_model_collapse_with_root.json +# timestamp: 2019-07-26T00:00:00+00:00 + +from __future__ import annotations + +from pydantic import BaseModel, RootModel + + +class Pos(BaseModel): + x: int | None = None + y: int | None = None + + +class Nested1(BaseModel): + pos: Pos | None = None + + +class StringType(RootModel[str]): + root: str + + +class Model(BaseModel): + field1: StringType | None = None + field2: StringType | None = None + nested1: Nested1 | None = None + nested2: Nested1 | None = None diff --git a/tests/data/jsonschema/reuse_model_collapse_nested.json b/tests/data/jsonschema/reuse_model_collapse_nested.json new file mode 100644 index 000000000..6c31469d4 --- /dev/null +++ b/tests/data/jsonschema/reuse_model_collapse_nested.json @@ -0,0 +1,54 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "properties": { + "block1": { + "type": "object", + "properties": { + "pos": { + "type": "object", + "properties": { + "start": { + "type": "object", + "properties": { + "line": {"type": "integer"}, + "col": {"type": "integer"} + } + }, + "end": { + "type": "object", + "properties": { + "line": {"type": "integer"}, + "col": {"type": "integer"} + } + } + } + } + } + }, + "block2": { + "type": "object", + "properties": { + "pos": { + "type": "object", + "properties": { + "start": { + "type": "object", + "properties": { + "line": {"type": "integer"}, + "col": {"type": "integer"} + } + }, + "end": { + "type": "object", + "properties": { + "line": {"type": "integer"}, + "col": {"type": "integer"} + } + } + } + } + } + } + } +} diff --git a/tests/data/jsonschema/reuse_model_collapse_with_root.json b/tests/data/jsonschema/reuse_model_collapse_with_root.json new file mode 100644 index 000000000..d8de917df --- /dev/null +++ b/tests/data/jsonschema/reuse_model_collapse_with_root.json @@ -0,0 +1,41 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "definitions": { + "StringType": { + "type": "string" + } + }, + "properties": { + "field1": { + "$ref": "#/definitions/StringType" + }, + "field2": { + "$ref": "#/definitions/StringType" + }, + "nested1": { + "type": "object", + "properties": { + "pos": { + "type": "object", + "properties": { + "x": {"type": "integer"}, + "y": {"type": "integer"} + } + } + } + }, + "nested2": { + "type": "object", + "properties": { + "pos": { + "type": "object", + "properties": { + "x": {"type": "integer"}, + "y": {"type": "integer"} + } + } + } + } + } +} diff --git a/tests/data/jsonschema/reuse_model_inline_definitions.json b/tests/data/jsonschema/reuse_model_inline_definitions.json new file mode 100644 index 000000000..f6364f9f5 --- /dev/null +++ b/tests/data/jsonschema/reuse_model_inline_definitions.json @@ -0,0 +1,42 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "properties": { + "node1": { + "type": "object", + "properties": { + "pos": { + "type": "object", + "properties": { + "start": {"type": "integer"}, + "end": {"type": "integer"} + } + } + } + }, + "node2": { + "type": "object", + "properties": { + "pos": { + "type": "object", + "properties": { + "start": {"type": "integer"}, + "end": {"type": "integer"} + } + } + } + }, + "node3": { + "type": "object", + "properties": { + "pos": { + "type": "object", + "properties": { + "start": {"type": "integer"}, + "end": {"type": "integer"} + } + } + } + } + } +} diff --git a/tests/main/jsonschema/test_main_jsonschema.py b/tests/main/jsonschema/test_main_jsonschema.py index bb177ea31..796fea4f9 100644 --- a/tests/main/jsonschema/test_main_jsonschema.py +++ b/tests/main/jsonschema/test_main_jsonschema.py @@ -1132,6 +1132,54 @@ def test_main_json_reuse_enum(output_file: Path) -> None: ) +def test_main_reuse_model_collapse_inline_definitions(output_file: Path) -> None: + """Test --reuse-model --collapse-reuse-models deduplicates identical inline definitions.""" + run_main_and_assert( + input_path=JSON_SCHEMA_DATA_PATH / "reuse_model_inline_definitions.json", + output_path=output_file, + input_file_type="jsonschema", + assert_func=assert_file_content, + extra_args=[ + "--reuse-model", + "--collapse-reuse-models", + "--output-model-type", + "pydantic_v2.BaseModel", + ], + ) + + +def test_main_reuse_model_collapse_with_root(output_file: Path) -> None: + """Test --reuse-model --collapse-reuse-models skips RootModel deduplication.""" + run_main_and_assert( + input_path=JSON_SCHEMA_DATA_PATH / "reuse_model_collapse_with_root.json", + output_path=output_file, + input_file_type="jsonschema", + assert_func=assert_file_content, + extra_args=[ + "--reuse-model", + "--collapse-reuse-models", + "--output-model-type", + "pydantic_v2.BaseModel", + ], + ) + + +def test_main_reuse_model_collapse_nested(output_file: Path) -> None: + """Test --reuse-model --collapse-reuse-models with deeply nested identical structures.""" + run_main_and_assert( + input_path=JSON_SCHEMA_DATA_PATH / "reuse_model_collapse_nested.json", + output_path=output_file, + input_file_type="jsonschema", + assert_func=assert_file_content, + extra_args=[ + "--reuse-model", + "--collapse-reuse-models", + "--output-model-type", + "pydantic_v2.BaseModel", + ], + ) + + @pytest.mark.cli_doc( options=["--capitalize-enum-members"], option_description="""Capitalize enum member names to UPPER_CASE format.