From b98aae857e202a13359b03be58eab430efeb08bc Mon Sep 17 00:00:00 2001 From: Koudai Aono Date: Sun, 4 Jan 2026 06:10:21 +0000 Subject: [PATCH 1/4] Add __hash__ to Pydantic v2 models used in sets --- src/datamodel_code_generator/model/base.py | 1 + .../template/pydantic_v2/BaseModel.jinja2 | 5 +++- .../template/pydantic_v2/RootModel.jinja2 | 5 +++- src/datamodel_code_generator/parser/base.py | 20 +++++++++++++ .../main/jsonschema/unique_items_enum_set.py | 25 ++++++++++++++++ ...ntic_v2_use_generic_container_types_set.py | 1 + ...ydantic_v2_use_standard_collections_set.py | 1 + .../jsonschema/unique_items_enum_set.json | 29 +++++++++++++++++++ tests/main/jsonschema/test_main_jsonschema.py | 18 ++++++++++++ 9 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 tests/data/expected/main/jsonschema/unique_items_enum_set.py create mode 100644 tests/data/jsonschema/unique_items_enum_set.json diff --git a/src/datamodel_code_generator/model/base.py b/src/datamodel_code_generator/model/base.py index a4a7d9868..db83ecc2b 100644 --- a/src/datamodel_code_generator/model/base.py +++ b/src/datamodel_code_generator/model/base.py @@ -678,6 +678,7 @@ def __init__( # noqa: PLR0913 self.reference.source = self + self.extra_template_data: dict[str, Any] if extra_template_data is not None: # The supplied defaultdict will either create a new entry, # or already contain a predefined entry for this type diff --git a/src/datamodel_code_generator/model/template/pydantic_v2/BaseModel.jinja2 b/src/datamodel_code_generator/model/template/pydantic_v2/BaseModel.jinja2 index d0e8c309d..4c660aafc 100644 --- a/src/datamodel_code_generator/model/template/pydantic_v2/BaseModel.jinja2 +++ b/src/datamodel_code_generator/model/template/pydantic_v2/BaseModel.jinja2 @@ -7,7 +7,7 @@ class {{ class_name }}({{ base_class }}):{% if comment is defined %} # {{ comme {{ description | escape_docstring | indent(4) }} """ {%- endif %} -{%- if not fields and not description and not config %} +{%- if not fields and not description and not config and not set_item_hashable %} pass {%- endif %} {%- if config %} @@ -15,6 +15,9 @@ class {{ class_name }}({{ base_class }}):{% if comment is defined %} # {{ comme {% include 'ConfigDict.jinja2' %} {%- endfilter %} {%- endif %} +{%- if set_item_hashable %} + __hash__ = object.__hash__ +{%- endif %} {%- for field in fields %} {%- if not field.annotated and field.field %} {{ field.name }}: {{ field.type_hint }} = {{ field.field }} diff --git a/src/datamodel_code_generator/model/template/pydantic_v2/RootModel.jinja2 b/src/datamodel_code_generator/model/template/pydantic_v2/RootModel.jinja2 index 347d4dd36..eaa15b26a 100644 --- a/src/datamodel_code_generator/model/template/pydantic_v2/RootModel.jinja2 +++ b/src/datamodel_code_generator/model/template/pydantic_v2/RootModel.jinja2 @@ -27,7 +27,10 @@ class {{ class_name }}({{ base_class }}{%- if fields -%}[{{get_type_hint(fields, {% include 'ConfigDict.jinja2' %} {%- endfilter %} {%- endif %} -{%- if not fields and not description %} +{%- if set_item_hashable %} + __hash__ = object.__hash__ +{%- endif %} +{%- if not fields and not description and not config and not set_item_hashable %} pass {%- else %} {%- set field = fields[0] %} diff --git a/src/datamodel_code_generator/parser/base.py b/src/datamodel_code_generator/parser/base.py index dc1bca6b5..17e25cb77 100644 --- a/src/datamodel_code_generator/parser/base.py +++ b/src/datamodel_code_generator/parser/base.py @@ -1548,6 +1548,25 @@ def __replace_unique_list_to_set(self, models: list[DataModel]) -> None: model_field.default = converted_default model_field.replace_data_type(set_data_type) + def __mark_set_item_models_hashable(self, models: list[DataModel]) -> None: + """Mark models used as set/frozenset items with hash flag for __hash__ generation.""" + set_item_references: set[str] = set() + + for model in models: + for model_field in model.fields: + for data_type in model_field.data_type.all_data_types: + if data_type.is_set or data_type.is_frozen_set: + for item_type in data_type.data_types: + for nested_type in item_type.all_data_types: + if nested_type.reference: + set_item_references.add(nested_type.reference.path) + + for model in models: + if model.reference.path in set_item_references: + if isinstance(model, Enum): + continue + model.extra_template_data["set_item_hashable"] = True + @classmethod def __set_reference_default_value_to_field(cls, models: list[DataModel]) -> None: for model in models: @@ -2932,6 +2951,7 @@ def _process_single_module( # noqa: PLR0913, PLR0917 self.__alias_shadowed_imports(models, all_module_fields) self.__override_required_field(models) self.__replace_unique_list_to_set(models) + self.__mark_set_item_models_hashable(models) self.__change_from_import( models, imports, diff --git a/tests/data/expected/main/jsonschema/unique_items_enum_set.py b/tests/data/expected/main/jsonschema/unique_items_enum_set.py new file mode 100644 index 000000000..ac4b111a5 --- /dev/null +++ b/tests/data/expected/main/jsonschema/unique_items_enum_set.py @@ -0,0 +1,25 @@ +# generated by datamodel-codegen: +# filename: unique_items_enum_set.json +# timestamp: 2019-07-26T00:00:00+00:00 + +from __future__ import annotations + +from enum import Enum + +from pydantic import BaseModel + + +class Status(Enum): + active = 'active' + inactive = 'inactive' + pending = 'pending' + + +class Item(BaseModel): + __hash__ = object.__hash__ + name: str | None = None + + +class Container(BaseModel): + statuses: set[Status] | None = None + items: set[Item] | None = None diff --git a/tests/data/expected/main/openapi/with_field_constraints_pydantic_v2_use_generic_container_types_set.py b/tests/data/expected/main/openapi/with_field_constraints_pydantic_v2_use_generic_container_types_set.py index 5616c0270..ddd77b3fc 100644 --- a/tests/data/expected/main/openapi/with_field_constraints_pydantic_v2_use_generic_container_types_set.py +++ b/tests/data/expected/main/openapi/with_field_constraints_pydantic_v2_use_generic_container_types_set.py @@ -10,6 +10,7 @@ class Pet(BaseModel): + __hash__ = object.__hash__ id: int = Field(..., ge=0, le=9223372036854775807) name: str = Field(..., max_length=256) tag: str | None = Field(None, max_length=64) diff --git a/tests/data/expected/main/openapi/with_field_constraints_pydantic_v2_use_standard_collections_set.py b/tests/data/expected/main/openapi/with_field_constraints_pydantic_v2_use_standard_collections_set.py index 908d73f33..383963678 100644 --- a/tests/data/expected/main/openapi/with_field_constraints_pydantic_v2_use_standard_collections_set.py +++ b/tests/data/expected/main/openapi/with_field_constraints_pydantic_v2_use_standard_collections_set.py @@ -8,6 +8,7 @@ class Pet(BaseModel): + __hash__ = object.__hash__ id: int = Field(..., ge=0, le=9223372036854775807) name: str = Field(..., max_length=256) tag: str | None = Field(None, max_length=64) diff --git a/tests/data/jsonschema/unique_items_enum_set.json b/tests/data/jsonschema/unique_items_enum_set.json new file mode 100644 index 000000000..debe60bba --- /dev/null +++ b/tests/data/jsonschema/unique_items_enum_set.json @@ -0,0 +1,29 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Container", + "type": "object", + "definitions": { + "Status": { + "type": "string", + "enum": ["active", "inactive", "pending"] + }, + "Item": { + "type": "object", + "properties": { + "name": {"type": "string"} + } + } + }, + "properties": { + "statuses": { + "type": "array", + "uniqueItems": true, + "items": {"$ref": "#/definitions/Status"} + }, + "items": { + "type": "array", + "uniqueItems": true, + "items": {"$ref": "#/definitions/Item"} + } + } +} diff --git a/tests/main/jsonschema/test_main_jsonschema.py b/tests/main/jsonschema/test_main_jsonschema.py index 6dda2ee8b..cf7a0e8c9 100644 --- a/tests/main/jsonschema/test_main_jsonschema.py +++ b/tests/main/jsonschema/test_main_jsonschema.py @@ -7903,3 +7903,21 @@ def test_validators_requires_pydantic_v2(output_file: Path, tmp_path: Path, caps capsys=capsys, expected_stderr_contains="--validators option requires Pydantic v2", ) + + +@PYDANTIC_V2_SKIP +def test_unique_items_enum_set(output_file: Path) -> None: + """Test set with enum items does not add __hash__ to enum (already hashable).""" + run_main_and_assert( + input_path=JSON_SCHEMA_DATA_PATH / "unique_items_enum_set.json", + output_path=output_file, + input_file_type="jsonschema", + assert_func=assert_file_content, + expected_file="unique_items_enum_set.py", + extra_args=[ + "--output-model-type", + "pydantic_v2.BaseModel", + "--use-unique-items-as-set", + "--use-standard-collections", + ], + ) From 5de128d4089d12cf083fb7b2900dc6c615a13ad3 Mon Sep 17 00:00:00 2001 From: Koudai Aono Date: Sun, 4 Jan 2026 06:40:40 +0000 Subject: [PATCH 2/4] Fix lint errors: refactor to classmethods and reduce nesting --- src/datamodel_code_generator/parser/base.py | 24 +++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/datamodel_code_generator/parser/base.py b/src/datamodel_code_generator/parser/base.py index 17e25cb77..2b9021881 100644 --- a/src/datamodel_code_generator/parser/base.py +++ b/src/datamodel_code_generator/parser/base.py @@ -1548,18 +1548,24 @@ def __replace_unique_list_to_set(self, models: list[DataModel]) -> None: model_field.default = converted_default model_field.replace_data_type(set_data_type) - def __mark_set_item_models_hashable(self, models: list[DataModel]) -> None: - """Mark models used as set/frozenset items with hash flag for __hash__ generation.""" - set_item_references: set[str] = set() - + @classmethod + def __collect_set_item_references(cls, models: list[DataModel]) -> set[str]: + """Collect reference paths of all types used as set/frozenset items.""" + references: set[str] = set() for model in models: - for model_field in model.fields: - for data_type in model_field.data_type.all_data_types: + for field in model.fields: + for data_type in field.data_type.all_data_types: if data_type.is_set or data_type.is_frozen_set: for item_type in data_type.data_types: - for nested_type in item_type.all_data_types: - if nested_type.reference: - set_item_references.add(nested_type.reference.path) + references.update( + nested.reference.path for nested in item_type.all_data_types if nested.reference + ) + return references + + @classmethod + def __mark_set_item_models_hashable(cls, models: list[DataModel]) -> None: + """Mark models used as set/frozenset items with hash flag for __hash__ generation.""" + set_item_references = cls.__collect_set_item_references(models) for model in models: if model.reference.path in set_item_references: From 8fdb9f3833490bed539631f1bb12092b00cace78 Mon Sep 17 00:00:00 2001 From: Koudai Aono Date: Sun, 4 Jan 2026 07:04:57 +0000 Subject: [PATCH 3/4] Fix cross-module set item hash detection --- src/datamodel_code_generator/parser/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/datamodel_code_generator/parser/base.py b/src/datamodel_code_generator/parser/base.py index 2b9021881..cdea6d2c9 100644 --- a/src/datamodel_code_generator/parser/base.py +++ b/src/datamodel_code_generator/parser/base.py @@ -2957,7 +2957,6 @@ def _process_single_module( # noqa: PLR0913, PLR0917 self.__alias_shadowed_imports(models, all_module_fields) self.__override_required_field(models) self.__replace_unique_list_to_set(models) - self.__mark_set_item_models_hashable(models) self.__change_from_import( models, imports, @@ -2991,6 +2990,8 @@ def _finalize_modules( module_to_import: dict[ModulePath, Imports], ) -> None: """Finalize module processing: apply generic base class and remove unused imports.""" + all_models = [model for ctx in contexts for model in ctx.models] + self.__mark_set_item_models_hashable(all_models) self.__apply_generic_base_class(contexts) for ctx in contexts: From 4249d52a75cb6df408ea01bbe4a875246c1ae24f Mon Sep 17 00:00:00 2001 From: Koudai Aono Date: Sun, 4 Jan 2026 07:11:30 +0000 Subject: [PATCH 4/4] Refactor: use generic class_body_lines instead of set_item_hashable --- .../model/template/pydantic_v2/BaseModel.jinja2 | 8 ++++---- .../model/template/pydantic_v2/RootModel.jinja2 | 8 ++++---- src/datamodel_code_generator/parser/base.py | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/datamodel_code_generator/model/template/pydantic_v2/BaseModel.jinja2 b/src/datamodel_code_generator/model/template/pydantic_v2/BaseModel.jinja2 index 4c660aafc..828c4b416 100644 --- a/src/datamodel_code_generator/model/template/pydantic_v2/BaseModel.jinja2 +++ b/src/datamodel_code_generator/model/template/pydantic_v2/BaseModel.jinja2 @@ -7,7 +7,7 @@ class {{ class_name }}({{ base_class }}):{% if comment is defined %} # {{ comme {{ description | escape_docstring | indent(4) }} """ {%- endif %} -{%- if not fields and not description and not config and not set_item_hashable %} +{%- if not fields and not description and not config and not class_body_lines %} pass {%- endif %} {%- if config %} @@ -15,9 +15,9 @@ class {{ class_name }}({{ base_class }}):{% if comment is defined %} # {{ comme {% include 'ConfigDict.jinja2' %} {%- endfilter %} {%- endif %} -{%- if set_item_hashable %} - __hash__ = object.__hash__ -{%- endif %} +{%- for line in class_body_lines %} + {{ line }} +{%- endfor %} {%- for field in fields %} {%- if not field.annotated and field.field %} {{ field.name }}: {{ field.type_hint }} = {{ field.field }} diff --git a/src/datamodel_code_generator/model/template/pydantic_v2/RootModel.jinja2 b/src/datamodel_code_generator/model/template/pydantic_v2/RootModel.jinja2 index eaa15b26a..af5de33b7 100644 --- a/src/datamodel_code_generator/model/template/pydantic_v2/RootModel.jinja2 +++ b/src/datamodel_code_generator/model/template/pydantic_v2/RootModel.jinja2 @@ -27,10 +27,10 @@ class {{ class_name }}({{ base_class }}{%- if fields -%}[{{get_type_hint(fields, {% include 'ConfigDict.jinja2' %} {%- endfilter %} {%- endif %} -{%- if set_item_hashable %} - __hash__ = object.__hash__ -{%- endif %} -{%- if not fields and not description and not config and not set_item_hashable %} +{%- for line in class_body_lines %} + {{ line }} +{%- endfor %} +{%- if not fields and not description and not config and not class_body_lines %} pass {%- else %} {%- set field = fields[0] %} diff --git a/src/datamodel_code_generator/parser/base.py b/src/datamodel_code_generator/parser/base.py index cdea6d2c9..0d1b4daaf 100644 --- a/src/datamodel_code_generator/parser/base.py +++ b/src/datamodel_code_generator/parser/base.py @@ -1571,7 +1571,8 @@ def __mark_set_item_models_hashable(cls, models: list[DataModel]) -> None: if model.reference.path in set_item_references: if isinstance(model, Enum): continue - model.extra_template_data["set_item_hashable"] = True + class_body_lines = model.extra_template_data.setdefault("class_body_lines", []) + class_body_lines.append("__hash__ = object.__hash__") @classmethod def __set_reference_default_value_to_field(cls, models: list[DataModel]) -> None: