Skip to content

Commit 13e6fb1

Browse files
Fix $ref not merging with additional schema keywords (#2635)
* Add Enum types for Organization and ContactPoint, update schema merging logic * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Refactor schema tests to improve clarity and coverage for $ref handling * Refactor jsonschema.py to improve code clarity by removing redundant comments and organizing metadata-only fields --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent a37b482 commit 13e6fb1

11 files changed

Lines changed: 233 additions & 8 deletions

File tree

src/datamodel_code_generator/parser/jsonschema.py

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,23 @@ def model_rebuild(cls) -> None:
219219
"uniqueItems",
220220
}
221221
__extra_key__: str = SPECIAL_PATH_FORMAT.format("extras")
222+
__metadata_only_fields__: set[str] = { # noqa: RUF012
223+
"title",
224+
"description",
225+
"id",
226+
"$id",
227+
"$schema",
228+
"$comment",
229+
"examples",
230+
"example",
231+
"x_enum_varnames",
232+
"definitions",
233+
"$defs",
234+
"default",
235+
"readOnly",
236+
"writeOnly",
237+
"deprecated",
238+
}
222239

223240
@model_validator(mode="before")
224241
def validate_exclusive_maximum_and_exclusive_minimum(cls, values: Any) -> Any: # noqa: N805
@@ -413,6 +430,23 @@ def has_multiple_types(self) -> bool:
413430
non_null_types = [t for t in self.type if t != "null"]
414431
return len(non_null_types) > 1
415432

433+
@cached_property
434+
def has_ref_with_schema_keywords(self) -> bool:
435+
"""Check if schema has $ref combined with schema-affecting keywords.
436+
437+
Metadata-only keywords (title, description, etc.) are excluded
438+
as they don't affect the schema structure.
439+
"""
440+
if not self.ref:
441+
return False
442+
other_fields = self.__fields_set__ - {"ref"}
443+
schema_affecting_fields = other_fields - self.__metadata_only_fields__ - {"extras"}
444+
if self.extras:
445+
schema_affecting_extras = {k for k in self.extras if k not in self.__metadata_only_fields__}
446+
if schema_affecting_extras:
447+
schema_affecting_fields |= {"extras"}
448+
return bool(schema_affecting_fields)
449+
416450

417451
@lru_cache
418452
def get_ref_type(ref: str) -> JSONReference:
@@ -1043,6 +1077,25 @@ def _load_ref_schema_object(self, ref: str) -> JsonSchemaObject:
10431077

10441078
return self.SCHEMA_OBJECT_TYPE.parse_obj(target_schema)
10451079

1080+
def _merge_ref_with_schema(self, obj: JsonSchemaObject) -> JsonSchemaObject:
1081+
"""Merge $ref schema with current schema's additional keywords.
1082+
1083+
JSON Schema 2020-12 allows $ref alongside other keywords,
1084+
which should be merged together.
1085+
1086+
The local keywords take precedence over referenced schema.
1087+
"""
1088+
if not obj.ref:
1089+
return obj
1090+
1091+
ref_schema = self._load_ref_schema_object(obj.ref)
1092+
ref_dict = ref_schema.dict(exclude_unset=True, by_alias=True)
1093+
current_dict = obj.dict(exclude={"ref"}, exclude_unset=True, by_alias=True)
1094+
merged = self._deep_merge(ref_dict, current_dict)
1095+
merged.pop("$ref", None)
1096+
1097+
return self.SCHEMA_OBJECT_TYPE.parse_obj(merged)
1098+
10461099
def _merge_primitive_schemas(self, items: list[JsonSchemaObject]) -> JsonSchemaObject:
10471100
"""Merge multiple primitive schemas by computing the intersection of their constraints."""
10481101
if len(items) == 1:
@@ -1323,9 +1376,16 @@ def parse_combined_schema(
13231376
refs = []
13241377
for index, target_attribute in enumerate(getattr(obj, target_attribute_name, [])):
13251378
if target_attribute.ref:
1326-
combined_schemas.append(target_attribute)
1327-
refs.append(index)
1328-
# TODO: support partial ref
1379+
if target_attribute.has_ref_with_schema_keywords:
1380+
merged_attr = self._merge_ref_with_schema(target_attribute)
1381+
combined_schemas.append(
1382+
self.SCHEMA_OBJECT_TYPE.parse_obj(
1383+
self._deep_merge(base_object, merged_attr.dict(exclude_unset=True, by_alias=True))
1384+
)
1385+
)
1386+
else:
1387+
combined_schemas.append(target_attribute)
1388+
refs.append(index)
13291389
else:
13301390
combined_schemas.append(
13311391
self.SCHEMA_OBJECT_TYPE.parse_obj(
@@ -1878,6 +1938,8 @@ def parse_item( # noqa: PLR0911, PLR0912
18781938
item,
18791939
root_type_path,
18801940
)
1941+
if item.has_ref_with_schema_keywords:
1942+
item = self._merge_ref_with_schema(item)
18811943
if item.ref:
18821944
return self.get_ref_data_type(item.ref)
18831945
if item.custom_type_path: # pragma: no cover
@@ -2540,6 +2602,9 @@ def parse_obj( # noqa: PLR0912
25402602
path: list[str],
25412603
) -> None:
25422604
"""Parse a JsonSchemaObject by dispatching to appropriate parse methods."""
2605+
if obj.has_ref_with_schema_keywords:
2606+
obj = self._merge_ref_with_schema(obj)
2607+
25432608
if obj.is_array:
25442609
self.parse_array(name, obj, path)
25452610
elif obj.allOf:

tests/data/expected/main/jsonschema/ids/ContactPoint.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,18 @@
44

55
from __future__ import annotations
66

7+
from enum import Enum
78
from typing import Optional
89

910
from pydantic import BaseModel, EmailStr
1011

11-
from . import type as type_1
12+
13+
class Type(Enum):
14+
ContactPoint = 'ContactPoint'
1215

1316

1417
class Schema(BaseModel):
15-
type: type_1.Schema
18+
type: Type
1619
contactType: Optional[str] = None
1720
email: EmailStr
1821
telephone: Optional[str] = None

tests/data/expected/main/jsonschema/ids/__init__.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from __future__ import annotations
66

7+
from enum import Enum
78
from typing import Optional
89

910
from pydantic import BaseModel
@@ -12,12 +13,15 @@
1213
from . import id as id_1
1314
from . import name as name_1
1415
from . import sameAs as sameAs_1
15-
from . import type as type_1
16+
17+
18+
class Type(Enum):
19+
Organization = 'Organization'
1620

1721

1822
class Organization(BaseModel):
1923
id: Optional[id_1.Schema] = None
20-
type: type_1.Schema
24+
type: Type
2125
name: name_1.Schema
2226
contactPoint: Optional[ContactPoint.Schema] = None
2327
sameAs: Optional[sameAs_1.Schema] = None
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# generated by datamodel-codegen:
2+
# filename: ref_with_additional_keywords
3+
# timestamp: 2019-07-26T00:00:00+00:00
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# generated by datamodel-codegen:
2+
# filename: commons.schema.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from typing import Any, List
8+
9+
from pydantic import Field, RootModel
10+
11+
12+
class Commons(RootModel[Any]):
13+
root: Any = Field(..., description='Commons objects', title='Commons')
14+
15+
16+
class DefaultArray(RootModel[List[Any]]):
17+
root: List[Any] = Field(..., max_length=100, min_length=1)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# generated by datamodel-codegen:
2+
# filename: products.schema.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from typing import List
8+
9+
from pydantic import Field, RootModel
10+
11+
12+
class Products(RootModel[List[str]]):
13+
root: List[str] = Field(
14+
...,
15+
description='The products in the catalog',
16+
max_length=100,
17+
min_length=1,
18+
title='Products',
19+
)

tests/data/expected/main/openapi/collapse_root_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class FileRequest(BaseModel):
2525

2626
class ImageRequest(BaseModel):
2727
image_hash: Optional[
28-
constr(regex=r'^[a-fA-F\d]{32}$', min_length=32, max_length=32)
28+
constr(regex=r'^[a-fA-F\d]{32}$', min_length=64, max_length=64)
2929
] = Field(None, description='For image')
3030

3131

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3+
"$id": "https://example.com/commons.schema.json",
4+
"title": "Commons",
5+
"description": "Commons objects",
6+
"$defs": {
7+
"defaultArray": {
8+
"type": "array",
9+
"minItems": 1,
10+
"maxItems": 100
11+
}
12+
}
13+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3+
"$id": "https://example.com/products.schema.json",
4+
"title": "Products",
5+
"description": "The products in the catalog",
6+
"$ref": "commons.schema.json#/$defs/defaultArray",
7+
"items": {
8+
"type": "string"
9+
}
10+
}

tests/main/jsonschema/test_main_jsonschema.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3541,6 +3541,20 @@ def test_main_jsonschema_extras_in_oneof(output_file: Path) -> None:
35413541
)
35423542

35433543

3544+
def test_main_jsonschema_ref_with_additional_keywords(output_dir: Path) -> None:
3545+
"""Test that $ref combined with additional keywords merges properties (Issue #2330)."""
3546+
run_main_and_assert(
3547+
input_path=JSON_SCHEMA_DATA_PATH / "ref_with_additional_keywords",
3548+
output_path=output_dir,
3549+
expected_directory=EXPECTED_JSON_SCHEMA_PATH / "ref_with_additional_keywords",
3550+
input_file_type="jsonschema",
3551+
extra_args=[
3552+
"--output-model-type",
3553+
"pydantic_v2.BaseModel",
3554+
],
3555+
)
3556+
3557+
35443558
@pytest.mark.benchmark
35453559
@LEGACY_BLACK_SKIP
35463560
def test_main_jsonschema_reserved_field_name_typed_dict(output_file: Path) -> None:

0 commit comments

Comments
 (0)