Skip to content

Commit cca64e8

Browse files
authored
Fix allOf with multiple refs having same property name generates broken models (#2579)
* Add support for merging schemas with duplicate property names and boolean properties * Fix normalization of property schema for allOf merging to handle non-serializable types
1 parent ae88e3a commit cca64e8

6 files changed

Lines changed: 194 additions & 0 deletions

File tree

src/datamodel_code_generator/parser/jsonschema.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from __future__ import annotations
88

99
import enum as _enum
10+
import json
1011
from collections import defaultdict
1112
from contextlib import contextmanager
1213
from functools import cached_property, lru_cache
@@ -784,6 +785,53 @@ def _deep_merge(self, dict1: dict[Any, Any], dict2: dict[Any, Any]) -> dict[Any,
784785
result[key] = value
785786
return result
786787

788+
def _load_ref_schema_object(self, ref: str) -> JsonSchemaObject:
789+
"""Load a JsonSchemaObject from a $ref using standard resolve/load pipeline."""
790+
resolved_ref = self.model_resolver.resolve_ref(ref)
791+
file_part, fragment = ([*resolved_ref.split("#", 1), ""])[:2]
792+
raw_doc = self._get_ref_body(file_part) if file_part else self.raw_obj
793+
794+
target_schema: dict[str, YamlValue] | YamlValue = raw_doc
795+
if fragment:
796+
pointer = [p for p in fragment.split("/") if p]
797+
target_schema = get_model_by_path(raw_doc, pointer)
798+
799+
return self.SCHEMA_OBJECT_TYPE.parse_obj(target_schema)
800+
801+
def _schema_signature(self, prop_schema: JsonSchemaObject | bool) -> str | bool: # noqa: FBT001, PLR6301
802+
"""Normalize property schema for comparison across allOf items."""
803+
if isinstance(prop_schema, bool):
804+
return prop_schema
805+
return json.dumps(prop_schema.dict(exclude_unset=True, by_alias=True), sort_keys=True, default=repr)
806+
807+
def _merge_all_of_object(self, obj: JsonSchemaObject) -> JsonSchemaObject | None:
808+
"""Merge allOf items when they share object properties to avoid duplicate models."""
809+
resolved_items: list[JsonSchemaObject] = []
810+
property_signatures: dict[str, set[str | bool]] = {}
811+
for item in obj.allOf:
812+
resolved_item = self._load_ref_schema_object(item.ref) if item.ref else item
813+
resolved_items.append(resolved_item)
814+
if resolved_item.properties:
815+
for prop_name, prop_schema in resolved_item.properties.items():
816+
property_signatures.setdefault(prop_name, set()).add(self._schema_signature(prop_schema))
817+
818+
if obj.properties:
819+
for prop_name, prop_schema in obj.properties.items():
820+
property_signatures.setdefault(prop_name, set()).add(self._schema_signature(prop_schema))
821+
822+
if not any(len(signatures) > 1 for signatures in property_signatures.values()):
823+
return None
824+
825+
merged_schema: dict[str, Any] = obj.dict(exclude={"allOf"}, exclude_unset=True, by_alias=True)
826+
for resolved_item in resolved_items:
827+
merged_schema = self._deep_merge(merged_schema, resolved_item.dict(exclude_unset=True, by_alias=True))
828+
829+
if "required" in merged_schema and isinstance(merged_schema["required"], list):
830+
merged_schema["required"] = list(dict.fromkeys(merged_schema["required"]))
831+
832+
merged_schema.pop("allOf", None)
833+
return self.SCHEMA_OBJECT_TYPE.parse_obj(merged_schema)
834+
787835
def parse_combined_schema(
788836
self,
789837
name: str,
@@ -986,6 +1034,10 @@ def parse_all_of(
9861034
and get_model_by_path(self.raw_obj, single_obj.ref[2:].split("/")).get("enum")
9871035
):
9881036
return self.get_ref_data_type(single_obj.ref)
1037+
1038+
merged_all_of_obj = self._merge_all_of_object(obj)
1039+
if merged_all_of_obj:
1040+
return self._parse_object_common_part(name, merged_all_of_obj, path, ignore_duplicate_model, [], [], [])
9891041
fields: list[DataModelFieldBase] = []
9901042
base_classes: list[Reference] = []
9911043
required: list[str] = []
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# generated by datamodel-codegen:
2+
# filename: all_of_merge_boolean_property.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from typing import Any, Optional
8+
9+
from pydantic import BaseModel
10+
11+
12+
class Model(BaseModel):
13+
data: Optional[Any] = None
14+
15+
16+
class Data(BaseModel):
17+
value: Optional[str] = None
18+
19+
20+
class Base1(BaseModel):
21+
data: Optional[Data] = None
22+
23+
24+
class Base2(BaseModel):
25+
data: Optional[Any] = None
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# generated by datamodel-codegen:
2+
# filename: all_of_merge_same_property.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from typing import Optional
8+
9+
from pydantic import BaseModel
10+
11+
12+
class Links(BaseModel):
13+
self: Optional[str] = None
14+
collection: Optional[str] = None
15+
16+
17+
class Model(BaseModel):
18+
links: Links
19+
20+
21+
class Links1(BaseModel):
22+
self: Optional[str] = None
23+
24+
25+
class SelfLink(BaseModel):
26+
links: Links1
27+
28+
29+
class Links2(BaseModel):
30+
collection: Optional[str] = None
31+
32+
33+
class CollectionLink(BaseModel):
34+
links: Links2
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"$defs": {
3+
"Base1": {
4+
"type": "object",
5+
"properties": {
6+
"data": {
7+
"type": "object",
8+
"properties": {
9+
"value": { "type": "string" }
10+
}
11+
}
12+
}
13+
},
14+
"Base2": {
15+
"type": "object",
16+
"properties": {
17+
"data": false
18+
}
19+
}
20+
},
21+
"allOf": [
22+
{ "$ref": "#/$defs/Base1" },
23+
{ "$ref": "#/$defs/Base2" }
24+
]
25+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
{
2+
"$defs": {
3+
"SelfLink": {
4+
"type": "object",
5+
"required": ["links"],
6+
"properties": {
7+
"links": {
8+
"type": "object",
9+
"properties": {
10+
"self": { "type": "string" }
11+
}
12+
}
13+
}
14+
},
15+
"CollectionLink": {
16+
"type": "object",
17+
"required": ["links"],
18+
"properties": {
19+
"links": {
20+
"type": "object",
21+
"properties": {
22+
"collection": { "type": "string" }
23+
}
24+
}
25+
}
26+
}
27+
},
28+
"allOf": [
29+
{ "$ref": "#/$defs/SelfLink" },
30+
{ "$ref": "#/$defs/CollectionLink" }
31+
]
32+
}

tests/main/jsonschema/test_main_jsonschema.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,32 @@ def test_main_all_of_with_object(output_file: Path) -> None:
908908
)
909909

910910

911+
def test_main_all_of_merge_same_property(output_file: Path) -> None:
912+
"""Test allOf merging when duplicate property names exist across refs."""
913+
with chdir(JSON_SCHEMA_DATA_PATH):
914+
run_main_and_assert(
915+
input_path=Path("all_of_merge_same_property.json"),
916+
output_path=output_file,
917+
input_file_type="jsonschema",
918+
assert_func=assert_file_content,
919+
expected_file="all_of_merge_same_property.py",
920+
extra_args=["--class-name", "Model"],
921+
)
922+
923+
924+
def test_main_all_of_merge_boolean_property(output_file: Path) -> None:
925+
"""Test allOf merging when a property has a boolean schema (false)."""
926+
with chdir(JSON_SCHEMA_DATA_PATH):
927+
run_main_and_assert(
928+
input_path=Path("all_of_merge_boolean_property.json"),
929+
output_path=output_file,
930+
input_file_type="jsonschema",
931+
assert_func=assert_file_content,
932+
expected_file="all_of_merge_boolean_property.py",
933+
extra_args=["--class-name", "Model"],
934+
)
935+
936+
911937
@pytest.mark.skipif(
912938
black.__version__.split(".")[0] >= "24",
913939
reason="Installed black doesn't support the old style",

0 commit comments

Comments
 (0)