Fix $ref not merging with additional schema keywords (#2635)

koxudaxi · pre-commit-ci[bot] · web-flow · commit 13e6fb174fb4 · 2025-12-09T05:17:58.000+09:00
* Add Enum types for Organization and ContactPoint, update schema merging logic * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Refactor schema tests to improve clarity and coverage for $ref handling * Refactor jsonschema.py to improve code clarity by removing redundant comments and organizing metadata-only fields --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/src/datamodel_code_generator/parser/jsonschema.py b/src/datamodel_code_generator/parser/jsonschema.py
@@ -219,6 +219,23 @@ def model_rebuild(cls) -> None:
         "uniqueItems",
     }
     __extra_key__: str = SPECIAL_PATH_FORMAT.format("extras")
+    __metadata_only_fields__: set[str] = {  # noqa: RUF012
+        "title",
+        "description",
+        "id",
+        "$id",
+        "$schema",
+        "$comment",
+        "examples",
+        "example",
+        "x_enum_varnames",
+        "definitions",
+        "$defs",
+        "default",
+        "readOnly",
+        "writeOnly",
+        "deprecated",
+    }
 
     @model_validator(mode="before")
     def validate_exclusive_maximum_and_exclusive_minimum(cls, values: Any) -> Any:  # noqa: N805
@@ -413,6 +430,23 @@ def has_multiple_types(self) -> bool:
         non_null_types = [t for t in self.type if t != "null"]
         return len(non_null_types) > 1
 
+    @cached_property
+    def has_ref_with_schema_keywords(self) -> bool:
+        """Check if schema has $ref combined with schema-affecting keywords.
+
+        Metadata-only keywords (title, description, etc.) are excluded
+        as they don't affect the schema structure.
+        """
+        if not self.ref:
+            return False
+        other_fields = self.__fields_set__ - {"ref"}
+        schema_affecting_fields = other_fields - self.__metadata_only_fields__ - {"extras"}
+        if self.extras:
+            schema_affecting_extras = {k for k in self.extras if k not in self.__metadata_only_fields__}
+            if schema_affecting_extras:
+                schema_affecting_fields |= {"extras"}
+        return bool(schema_affecting_fields)
+
 
 @lru_cache
 def get_ref_type(ref: str) -> JSONReference:
@@ -1043,6 +1077,25 @@ def _load_ref_schema_object(self, ref: str) -> JsonSchemaObject:
 
         return self.SCHEMA_OBJECT_TYPE.parse_obj(target_schema)
 
+    def _merge_ref_with_schema(self, obj: JsonSchemaObject) -> JsonSchemaObject:
+        """Merge $ref schema with current schema's additional keywords.
+
+        JSON Schema 2020-12 allows $ref alongside other keywords,
+        which should be merged together.
+
+        The local keywords take precedence over referenced schema.
+        """
+        if not obj.ref:
+            return obj
+
+        ref_schema = self._load_ref_schema_object(obj.ref)
+        ref_dict = ref_schema.dict(exclude_unset=True, by_alias=True)
+        current_dict = obj.dict(exclude={"ref"}, exclude_unset=True, by_alias=True)
+        merged = self._deep_merge(ref_dict, current_dict)
+        merged.pop("$ref", None)
+
+        return self.SCHEMA_OBJECT_TYPE.parse_obj(merged)
+
     def _merge_primitive_schemas(self, items: list[JsonSchemaObject]) -> JsonSchemaObject:
         """Merge multiple primitive schemas by computing the intersection of their constraints."""
         if len(items) == 1:
@@ -1323,9 +1376,16 @@ def parse_combined_schema(
         refs = []
         for index, target_attribute in enumerate(getattr(obj, target_attribute_name, [])):
             if target_attribute.ref:
-                combined_schemas.append(target_attribute)
-                refs.append(index)
-                # TODO: support partial ref
+                if target_attribute.has_ref_with_schema_keywords:
+                    merged_attr = self._merge_ref_with_schema(target_attribute)
+                    combined_schemas.append(
+                        self.SCHEMA_OBJECT_TYPE.parse_obj(
+                            self._deep_merge(base_object, merged_attr.dict(exclude_unset=True, by_alias=True))
+                        )
+                    )
+                else:
+                    combined_schemas.append(target_attribute)
+                    refs.append(index)
             else:
                 combined_schemas.append(
                     self.SCHEMA_OBJECT_TYPE.parse_obj(
@@ -1878,6 +1938,8 @@ def parse_item(  # noqa: PLR0911, PLR0912
                 item,
                 root_type_path,
             )
+        if item.has_ref_with_schema_keywords:
+            item = self._merge_ref_with_schema(item)
         if item.ref:
             return self.get_ref_data_type(item.ref)
         if item.custom_type_path:  # pragma: no cover
@@ -2540,6 +2602,9 @@ def parse_obj(  # noqa: PLR0912
         path: list[str],
     ) -> None:
         """Parse a JsonSchemaObject by dispatching to appropriate parse methods."""
+        if obj.has_ref_with_schema_keywords:
+            obj = self._merge_ref_with_schema(obj)
+
         if obj.is_array:
             self.parse_array(name, obj, path)
         elif obj.allOf:
diff --git a/tests/data/expected/main/jsonschema/ids/ContactPoint.py b/tests/data/expected/main/jsonschema/ids/ContactPoint.py
@@ -4,15 +4,18 @@
 
 from __future__ import annotations
 
+from enum import Enum
 from typing import Optional
 
 from pydantic import BaseModel, EmailStr
 
-from . import type as type_1
+
+class Type(Enum):
+    ContactPoint = 'ContactPoint'
 
 
 class Schema(BaseModel):
-    type: type_1.Schema
+    type: Type
     contactType: Optional[str] = None
     email: EmailStr
     telephone: Optional[str] = None
diff --git a/tests/data/expected/main/jsonschema/ids/__init__.py b/tests/data/expected/main/jsonschema/ids/__init__.py
@@ -4,6 +4,7 @@
 
 from __future__ import annotations
 
+from enum import Enum
 from typing import Optional
 
 from pydantic import BaseModel
@@ -12,12 +13,15 @@
 from . import id as id_1
 from . import name as name_1
 from . import sameAs as sameAs_1
-from . import type as type_1
+
+
+class Type(Enum):
+    Organization = 'Organization'
 
 
 class Organization(BaseModel):
     id: Optional[id_1.Schema] = None
-    type: type_1.Schema
+    type: Type
     name: name_1.Schema
     contactPoint: Optional[ContactPoint.Schema] = None
     sameAs: Optional[sameAs_1.Schema] = None
diff --git a/tests/data/expected/main/jsonschema/ref_with_additional_keywords/__init__.py b/tests/data/expected/main/jsonschema/ref_with_additional_keywords/__init__.py
@@ -0,0 +1,3 @@
+# generated by datamodel-codegen:
+#   filename:  ref_with_additional_keywords
+#   timestamp: 2019-07-26T00:00:00+00:00
diff --git a/tests/data/expected/main/jsonschema/ref_with_additional_keywords/commons_schema.py b/tests/data/expected/main/jsonschema/ref_with_additional_keywords/commons_schema.py
@@ -0,0 +1,17 @@
+# generated by datamodel-codegen:
+#   filename:  commons.schema.json
+#   timestamp: 2019-07-26T00:00:00+00:00
+
+from __future__ import annotations
+
+from typing import Any, List
+
+from pydantic import Field, RootModel
+
+
+class Commons(RootModel[Any]):
+    root: Any = Field(..., description='Commons objects', title='Commons')
+
+
+class DefaultArray(RootModel[List[Any]]):
+    root: List[Any] = Field(..., max_length=100, min_length=1)
diff --git a/tests/data/expected/main/jsonschema/ref_with_additional_keywords/products_schema.py b/tests/data/expected/main/jsonschema/ref_with_additional_keywords/products_schema.py
@@ -0,0 +1,19 @@
+# generated by datamodel-codegen:
+#   filename:  products.schema.json
+#   timestamp: 2019-07-26T00:00:00+00:00
+
+from __future__ import annotations
+
+from typing import List
+
+from pydantic import Field, RootModel
+
+
+class Products(RootModel[List[str]]):
+    root: List[str] = Field(
+        ...,
+        description='The products in the catalog',
+        max_length=100,
+        min_length=1,
+        title='Products',
+    )
diff --git a/tests/data/expected/main/openapi/collapse_root_models.py b/tests/data/expected/main/openapi/collapse_root_models.py
@@ -25,7 +25,7 @@ class FileRequest(BaseModel):
 
 class ImageRequest(BaseModel):
     image_hash: Optional[
-        constr(regex=r'^[a-fA-F\d]{32}$', min_length=32, max_length=32)
+        constr(regex=r'^[a-fA-F\d]{32}$', min_length=64, max_length=64)
     ] = Field(None, description='For image')
 
 
diff --git a/tests/data/jsonschema/ref_with_additional_keywords/commons.schema.json b/tests/data/jsonschema/ref_with_additional_keywords/commons.schema.json
@@ -0,0 +1,13 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://example.com/commons.schema.json",
+  "title": "Commons",
+  "description": "Commons objects",
+  "$defs": {
+    "defaultArray": {
+      "type": "array",
+      "minItems": 1,
+      "maxItems": 100
+    }
+  }
+}
diff --git a/tests/data/jsonschema/ref_with_additional_keywords/products.schema.json b/tests/data/jsonschema/ref_with_additional_keywords/products.schema.json
@@ -0,0 +1,10 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://example.com/products.schema.json",
+  "title": "Products",
+  "description": "The products in the catalog",
+  "$ref": "commons.schema.json#/$defs/defaultArray",
+  "items": {
+    "type": "string"
+  }
+}
diff --git a/tests/main/jsonschema/test_main_jsonschema.py b/tests/main/jsonschema/test_main_jsonschema.py
@@ -3541,6 +3541,20 @@ def test_main_jsonschema_extras_in_oneof(output_file: Path) -> None:
     )
 
 
+def test_main_jsonschema_ref_with_additional_keywords(output_dir: Path) -> None:
+    """Test that $ref combined with additional keywords merges properties (Issue #2330)."""
+    run_main_and_assert(
+        input_path=JSON_SCHEMA_DATA_PATH / "ref_with_additional_keywords",
+        output_path=output_dir,
+        expected_directory=EXPECTED_JSON_SCHEMA_PATH / "ref_with_additional_keywords",
+        input_file_type="jsonschema",
+        extra_args=[
+            "--output-model-type",
+            "pydantic_v2.BaseModel",
+        ],
+    )
+
+
 @pytest.mark.benchmark
 @LEGACY_BLACK_SKIP
 def test_main_jsonschema_reserved_field_name_typed_dict(output_file: Path) -> None:
diff --git a/tests/parser/test_jsonschema.py b/tests/parser/test_jsonschema.py
@@ -844,3 +844,80 @@ def test_get_ref_body_from_url_file_local_path(mocker: MockerFixture) -> None:
     mock_load.assert_called_once()
     called_path = mock_load.call_args[0][0]
     assert called_path.parts[-4:] == ("home", "user", "schemas", "pet.json")
+
+
+def test_merge_ref_with_schema_no_ref() -> None:
+    """Test _merge_ref_with_schema returns object unchanged when no $ref is present."""
+    parser = JsonSchemaParser("")
+    obj = JsonSchemaObject.parse_obj({"type": "string", "minLength": 5})
+    result = parser._merge_ref_with_schema(obj)
+    assert result is obj
+
+
+def test_has_ref_with_schema_keywords_extras_with_schema_affecting_keys() -> None:
+    """Test has_ref_with_schema_keywords when extras contains schema-affecting keys."""
+    # const is stored in extras and is schema-affecting
+    obj = JsonSchemaObject.parse_obj({
+        "$ref": "#/$defs/Base",
+        "const": "active",
+    })
+    # Verify extras contains schema-affecting key
+    assert obj.extras
+    assert "const" in obj.extras
+    assert obj.has_ref_with_schema_keywords is True
+
+
+def test_has_ref_with_schema_keywords_extras_with_metadata_only_keys() -> None:
+    """Test has_ref_with_schema_keywords when extras contains only metadata keys."""
+    # $comment is metadata-only, should not trigger merge
+    obj = JsonSchemaObject.parse_obj({
+        "$ref": "#/$defs/Base",
+        "$comment": "this is a comment",
+    })
+    # Verify extras contains only metadata key
+    assert obj.extras
+    assert "$comment" in obj.extras
+    assert obj.has_ref_with_schema_keywords is False
+
+
+def test_has_ref_with_schema_keywords_no_extras() -> None:
+    """Test has_ref_with_schema_keywords when extras is empty."""
+    # Only $ref and a schema-affecting field, no extras
+    obj = JsonSchemaObject.parse_obj({
+        "$ref": "#/$defs/Base",
+        "minLength": 10,
+    })
+    # Verify extras is empty but minLength triggers merge
+    assert not obj.extras
+    assert obj.has_ref_with_schema_keywords is True
+
+
+def test_parse_combined_schema_anyof_with_ref_and_schema_keywords() -> None:
+    """Test parse_combined_schema merges $ref with schema-affecting keywords in anyOf."""
+    parser = JsonSchemaParser("")
+    schema = {
+        "$schema": "http://json-schema.org/draft-07/schema#",
+        "type": "object",
+        "properties": {
+            "value": {
+                "anyOf": [
+                    {
+                        "$ref": "#/$defs/BaseString",
+                        "minLength": 10,
+                    },
+                    {
+                        "type": "integer",
+                    },
+                ]
+            }
+        },
+        "$defs": {
+            "BaseString": {
+                "type": "string",
+                "maxLength": 100,
+            }
+        },
+    }
+    parser.parse_raw_obj("Model", schema, [])
+    results = list(parser.results)
+    assert len(results) >= 1

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# generated by datamodel-codegen:`
	`2`	`+# filename: ref_with_additional_keywords`
	`3`	`+# timestamp: 2019-07-26T00:00:00+00:00`