From e6bedb13b0d04ad763e1bca7ee9f171ea3e4fc24 Mon Sep 17 00:00:00 2001 From: Koudai Aono Date: Tue, 30 Dec 2025 10:38:39 +0000 Subject: [PATCH 1/5] Handle Annotated types in _serialize_python_type for TypedDict generation --- src/datamodel_code_generator/__main__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/datamodel_code_generator/__main__.py b/src/datamodel_code_generator/__main__.py index 06d08e600..43fe4028f 100644 --- a/src/datamodel_code_generator/__main__.py +++ b/src/datamodel_code_generator/__main__.py @@ -856,7 +856,7 @@ def _get_preserved_type_origins() -> dict[type, str]: return _PRESERVED_TYPE_ORIGINS -def _serialize_python_type(tp: type) -> str | None: +def _serialize_python_type(tp: type) -> str | None: # noqa: PLR0911 """Serialize Python type to a string for x-python-type field. Returns None if the type doesn't need to be preserved (e.g., standard dict, list). @@ -884,6 +884,14 @@ def _serialize_python_type(tp: type) -> str | None: return " | ".join(n or _simple_type_name(a) for n, a in zip(nested, args, strict=False)) return None # pragma: no cover + # Handle Annotated types - extract the base type and ignore metadata + from typing import Annotated # noqa: PLC0415 + + if origin is Annotated: + if args: + return _serialize_python_type(args[0]) or _simple_type_name(args[0]) + return None # pragma: no cover + type_name: str | None = None if origin is not None: type_name = preserved_origins.get(origin) From 98d7f0251df6fa40fb27f684e2b2458312576397 Mon Sep 17 00:00:00 2001 From: Koudai Aono Date: Tue, 30 Dec 2025 10:52:17 +0000 Subject: [PATCH 2/5] Remove WithJsonSchema from ExtraTemplateDataType --- src/datamodel_code_generator/__main__.py | 5 +++++ src/datamodel_code_generator/config.py | 12 +++++++----- .../parser/jsonschema.py | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/datamodel_code_generator/__main__.py b/src/datamodel_code_generator/__main__.py index 43fe4028f..84e6a9bfe 100644 --- a/src/datamodel_code_generator/__main__.py +++ b/src/datamodel_code_generator/__main__.py @@ -915,8 +915,13 @@ def _serialize_python_type(tp: type) -> str | None: # noqa: PLR0911 def _simple_type_name(tp: type) -> str: """Get a simple string representation of a type.""" + from typing import get_origin # noqa: PLC0415 + if tp is type(None): return "None" + # For generic types (e.g., dict[str, Any]), use full string representation + if get_origin(tp) is not None: + return str(tp).replace("typing.", "") if hasattr(tp, "__name__"): return tp.__name__ return str(tp).replace("typing.", "") # pragma: no cover diff --git a/src/datamodel_code_generator/config.py b/src/datamodel_code_generator/config.py index b8bc22402..25635ad25 100644 --- a/src/datamodel_code_generator/config.py +++ b/src/datamodel_code_generator/config.py @@ -7,7 +7,7 @@ from pathlib import Path # noqa: TC003 - used at runtime by Pydantic from typing import TYPE_CHECKING, Annotated, Any -from pydantic import BaseModel, Field, WithJsonSchema +from pydantic import BaseModel, Field from datamodel_code_generator.enums import ( DEFAULT_SHARED_MODULE_NAME, @@ -48,10 +48,12 @@ CallableSchema = Callable[[str], str] DumpResolveReferenceAction = Callable[[Iterable[str]], str] DefaultPutDictSchema = DefaultPutDict[str, str] -ExtraTemplateDataType = Annotated[ - defaultdict[str, Annotated[dict[str, Any], Field(default_factory=dict)]], - WithJsonSchema({"type": "object", "x-python-type": "defaultdict[str, dict[str, Any]]"}), -] +if TYPE_CHECKING: + ExtraTemplateDataType = defaultdict[str, dict[str, Any]] +elif is_pydantic_v2(): + ExtraTemplateDataType = defaultdict[str, Annotated[dict[str, Any], Field(default_factory=dict)]] +else: + ExtraTemplateDataType = defaultdict[str, dict[str, Any]] class GenerateConfig(BaseModel): diff --git a/src/datamodel_code_generator/parser/jsonschema.py b/src/datamodel_code_generator/parser/jsonschema.py index 4764905f0..fa4866089 100644 --- a/src/datamodel_code_generator/parser/jsonschema.py +++ b/src/datamodel_code_generator/parser/jsonschema.py @@ -629,6 +629,12 @@ class JsonSchemaParser(Parser): PYTHON_TYPE_OVERRIDE_ALWAYS: ClassVar[frozenset[str]] = frozenset({ "Callable", "Type", + # collections types that have no JSON Schema equivalent + "defaultdict", + "OrderedDict", + "Counter", + "deque", + "ChainMap", }) def __init__( # noqa: PLR0913 @@ -1391,6 +1397,10 @@ def _is_compatible_python_type(self, schema_type: str | None, python_type: str) all_type_names = self._extract_all_type_names(python_type) if any(t in self.PYTHON_TYPE_OVERRIDE_ALWAYS for t in all_type_names): return False + # Check for lowercase types in PYTHON_TYPE_OVERRIDE_ALWAYS (e.g., defaultdict, deque) + for override_type in self.PYTHON_TYPE_OVERRIDE_ALWAYS: + if override_type[0].islower() and override_type in python_type: + return False if schema_type is None: return True if base_type in {"Union", "Optional"}: @@ -1471,6 +1481,13 @@ def _get_python_type_override(self, obj: JsonSchemaObject) -> DataType | None: if nested_import: nested_imports.append(self.data_type(import_=nested_import)) + # Collect imports for lowercase types in PYTHON_TYPE_OVERRIDE_ALWAYS (e.g., defaultdict, deque) + for override_type in self.PYTHON_TYPE_OVERRIDE_ALWAYS: + if override_type[0].islower() and override_type in type_str and override_type != base_type: + override_import = self._resolve_type_import(override_type) + if override_import: + nested_imports.append(self.data_type(import_=override_import)) + result = self.data_type(type=type_str, import_=import_) if nested_imports: result.data_types.extend(nested_imports) From 69d1a493edb99f39e964921619fd5afe8885734a Mon Sep 17 00:00:00 2001 From: Koudai Aono Date: Tue, 30 Dec 2025 11:30:51 +0000 Subject: [PATCH 3/5] Use regex word boundaries for precise type matching --- src/datamodel_code_generator/parser/jsonschema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/datamodel_code_generator/parser/jsonschema.py b/src/datamodel_code_generator/parser/jsonschema.py index fa4866089..e7b83d750 100644 --- a/src/datamodel_code_generator/parser/jsonschema.py +++ b/src/datamodel_code_generator/parser/jsonschema.py @@ -1399,7 +1399,7 @@ def _is_compatible_python_type(self, schema_type: str | None, python_type: str) return False # Check for lowercase types in PYTHON_TYPE_OVERRIDE_ALWAYS (e.g., defaultdict, deque) for override_type in self.PYTHON_TYPE_OVERRIDE_ALWAYS: - if override_type[0].islower() and override_type in python_type: + if override_type[0].islower() and re.search(rf"\b{re.escape(override_type)}\b", python_type): return False if schema_type is None: return True @@ -1483,7 +1483,7 @@ def _get_python_type_override(self, obj: JsonSchemaObject) -> DataType | None: # Collect imports for lowercase types in PYTHON_TYPE_OVERRIDE_ALWAYS (e.g., defaultdict, deque) for override_type in self.PYTHON_TYPE_OVERRIDE_ALWAYS: - if override_type[0].islower() and override_type in type_str and override_type != base_type: + if override_type[0].islower() and override_type != base_type and re.search(rf"\b{re.escape(override_type)}\b", type_str): override_import = self._resolve_type_import(override_type) if override_import: nested_imports.append(self.data_type(import_=override_import)) From f4af633d771761975895c36af30e4b98fa7cc3bd Mon Sep 17 00:00:00 2001 From: Koudai Aono Date: Tue, 30 Dec 2025 11:38:51 +0000 Subject: [PATCH 4/5] Format long line in _get_python_type_override --- src/datamodel_code_generator/parser/jsonschema.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/datamodel_code_generator/parser/jsonschema.py b/src/datamodel_code_generator/parser/jsonschema.py index e7b83d750..cac5b795a 100644 --- a/src/datamodel_code_generator/parser/jsonschema.py +++ b/src/datamodel_code_generator/parser/jsonschema.py @@ -1483,7 +1483,11 @@ def _get_python_type_override(self, obj: JsonSchemaObject) -> DataType | None: # Collect imports for lowercase types in PYTHON_TYPE_OVERRIDE_ALWAYS (e.g., defaultdict, deque) for override_type in self.PYTHON_TYPE_OVERRIDE_ALWAYS: - if override_type[0].islower() and override_type != base_type and re.search(rf"\b{re.escape(override_type)}\b", type_str): + if ( + override_type[0].islower() + and override_type != base_type + and re.search(rf"\b{re.escape(override_type)}\b", type_str) + ): override_import = self._resolve_type_import(override_type) if override_import: nested_imports.append(self.data_type(import_=override_import)) From 80fa0d7d48f77db2c3b4f36851babfae1d1a597e Mon Sep 17 00:00:00 2001 From: Koudai Aono Date: Tue, 30 Dec 2025 11:47:29 +0000 Subject: [PATCH 5/5] Use AST parsing instead of regex for type name extraction --- .../parser/jsonschema.py | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/src/datamodel_code_generator/parser/jsonschema.py b/src/datamodel_code_generator/parser/jsonschema.py index cac5b795a..092c18398 100644 --- a/src/datamodel_code_generator/parser/jsonschema.py +++ b/src/datamodel_code_generator/parser/jsonschema.py @@ -1397,10 +1397,6 @@ def _is_compatible_python_type(self, schema_type: str | None, python_type: str) all_type_names = self._extract_all_type_names(python_type) if any(t in self.PYTHON_TYPE_OVERRIDE_ALWAYS for t in all_type_names): return False - # Check for lowercase types in PYTHON_TYPE_OVERRIDE_ALWAYS (e.g., defaultdict, deque) - for override_type in self.PYTHON_TYPE_OVERRIDE_ALWAYS: - if override_type[0].islower() and re.search(rf"\b{re.escape(override_type)}\b", python_type): - return False if schema_type is None: return True if base_type in {"Union", "Optional"}: @@ -1409,11 +1405,16 @@ def _is_compatible_python_type(self, schema_type: str | None, python_type: str) return base_type in compatible def _extract_all_type_names(self, type_str: str) -> list[str]: # noqa: PLR6301 - """Extract all type names from a type annotation string.""" - # Match type names: word characters starting with uppercase, not preceded by a dot - # This handles cases like Callable[[Iterable[str]], str] - pattern = r"(? DataType | None: if nested_import: nested_imports.append(self.data_type(import_=nested_import)) - # Collect imports for lowercase types in PYTHON_TYPE_OVERRIDE_ALWAYS (e.g., defaultdict, deque) - for override_type in self.PYTHON_TYPE_OVERRIDE_ALWAYS: - if ( - override_type[0].islower() - and override_type != base_type - and re.search(rf"\b{re.escape(override_type)}\b", type_str) - ): - override_import = self._resolve_type_import(override_type) - if override_import: - nested_imports.append(self.data_type(import_=override_import)) - result = self.data_type(type=type_str, import_=import_) if nested_imports: result.data_types.extend(nested_imports)