Skip to content

Commit d49dbb4

Browse files
Add --type-mappings option to customize type mappings in JSON schema (#2559)
* Add --type-mappings option to customize type mappings in JSON schema * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add type_mappings parameter to GraphQL and OpenAPI parsers * Add tests for --type-mappings option and validation logic * Add --type-mappings option to override default type mappings in documentation * Add test for fallback behavior in _get_type_with_mappings method --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 6e4fb3a commit d49dbb4

15 files changed

Lines changed: 282 additions & 3 deletions

File tree

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,11 @@ Typing customization:
380380
Set enum members as default values for enum field
381381
--strict-types {str,bytes,int,float,bool} [{str,bytes,int,float,bool} ...]
382382
Use strict types
383+
--type-mappings TYPE_MAPPINGS [TYPE_MAPPINGS ...]
384+
Override default type mappings. Format: "type+format=target" (e.g.,
385+
"string+binary=string" to map binary format to string type) or
386+
"format=target" (e.g., "binary=string"). Can be specified multiple
387+
times.
383388
--use-annotated Use typing.Annotated for Field(). Also, `--field-constraints` option
384389
will be enabled.
385390
--use-generic-container-types

docs/index.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,11 @@ Typing customization:
372372
Set enum members as default values for enum field
373373
--strict-types {str,bytes,int,float,bool} [{str,bytes,int,float,bool} ...]
374374
Use strict types
375+
--type-mappings TYPE_MAPPINGS [TYPE_MAPPINGS ...]
376+
Override default type mappings. Format: "type+format=target" (e.g.,
377+
"string+binary=string" to map binary format to string type) or
378+
"format=target" (e.g., "binary=string"). Can be specified multiple
379+
times.
375380
--use-annotated Use typing.Annotated for Field(). Also, `--field-constraints` option
376381
will be enabled.
377382
--use-generic-container-types

src/datamodel_code_generator/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ def generate( # noqa: PLR0912, PLR0913, PLR0914, PLR0915
340340
formatters: list[Formatter] = DEFAULT_FORMATTERS,
341341
parent_scoped_naming: bool = False,
342342
disable_future_imports: bool = False,
343+
type_mappings: list[str] | None = None,
343344
) -> None:
344345
"""Generate Python data models from schema definitions or structured data.
345346
@@ -560,6 +561,7 @@ def get_header_and_first_line(csv_file: IO[str]) -> dict[str, Any]:
560561
formatters=formatters,
561562
encoding=encoding,
562563
parent_scoped_naming=parent_scoped_naming,
564+
type_mappings=type_mappings,
563565
**kwargs,
564566
)
565567

src/datamodel_code_generator/__main__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ def validate_root(cls, values: dict[str, Any]) -> dict[str, Any]: # noqa: N805
406406
formatters: list[Formatter] = DEFAULT_FORMATTERS
407407
parent_scoped_naming: bool = False
408408
disable_future_imports: bool = False
409+
type_mappings: Optional[list[str]] = None # noqa: UP045
409410

410411
def merge_args(self, args: Namespace) -> None:
411412
"""Merge command-line arguments into config."""
@@ -660,6 +661,7 @@ def main(args: Sequence[str] | None = None) -> Exit: # noqa: PLR0911, PLR0912,
660661
formatters=config.formatters,
661662
parent_scoped_naming=config.parent_scoped_naming,
662663
disable_future_imports=config.disable_future_imports,
664+
type_mappings=config.type_mappings,
663665
)
664666
except InvalidClassNameError as e:
665667
print(f"{e} You have to set `--class-name` option", file=sys.stderr) # noqa: T201

src/datamodel_code_generator/arguments.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,16 @@ def start_section(self, heading: str | None) -> None:
336336
action="store_true",
337337
default=None,
338338
)
339+
typing_options.add_argument(
340+
"--type-mappings",
341+
help="Override default type mappings. "
342+
'Format: "type+format=target" (e.g., "string+binary=string" to map binary format to string type) '
343+
'or "format=target" (e.g., "binary=string"). '
344+
"Can be specified multiple times.",
345+
nargs="+",
346+
type=str,
347+
default=None,
348+
)
339349

340350
# ======================================================================================
341351
# Customization options for generated model fields

src/datamodel_code_generator/parser/base.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,7 @@ def __init__( # noqa: PLR0913, PLR0915
429429
no_alias: bool = False,
430430
formatters: list[Formatter] = DEFAULT_FORMATTERS,
431431
parent_scoped_naming: bool = False,
432+
type_mappings: list[str] | None = None,
432433
) -> None:
433434
"""Initialize the Parser with configuration options."""
434435
self.keyword_only = keyword_only
@@ -548,6 +549,38 @@ def __init__( # noqa: PLR0913, PLR0915
548549
self.treat_dot_as_module = treat_dot_as_module
549550
self.default_field_extras: dict[str, Any] | None = default_field_extras
550551
self.formatters: list[Formatter] = formatters
552+
self.type_mappings: dict[tuple[str, str], str] = Parser._parse_type_mappings(type_mappings)
553+
554+
@staticmethod
555+
def _parse_type_mappings(type_mappings: list[str] | None) -> dict[tuple[str, str], str]:
556+
"""Parse type mappings from CLI format to internal format.
557+
558+
Supports two formats:
559+
- "type+format=target" (e.g., "string+binary=string")
560+
- "format=target" (e.g., "binary=string", assumes type="string")
561+
562+
Returns a dict mapping (type, format) tuples to target type names.
563+
"""
564+
if not type_mappings:
565+
return {}
566+
567+
result: dict[tuple[str, str], str] = {}
568+
for mapping in type_mappings:
569+
if "=" not in mapping:
570+
msg = f"Invalid type mapping format: {mapping!r}. Expected 'type+format=target' or 'format=target'."
571+
raise ValueError(msg)
572+
573+
source, target = mapping.split("=", 1)
574+
if "+" in source:
575+
type_, format_ = source.split("+", 1)
576+
else:
577+
# Default to "string" type if only format is specified
578+
type_ = "string"
579+
format_ = source
580+
581+
result[type_, format_] = target
582+
583+
return result
551584

552585
@property
553586
def iter_source(self) -> Iterator[Source]:

src/datamodel_code_generator/parser/graphql.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ def __init__( # noqa: PLR0913
173173
no_alias: bool = False,
174174
formatters: list[Formatter] = DEFAULT_FORMATTERS,
175175
parent_scoped_naming: bool = False,
176+
type_mappings: list[str] | None = None,
176177
) -> None:
177178
"""Initialize the GraphQL parser with configuration options."""
178179
super().__init__(
@@ -254,6 +255,7 @@ def __init__( # noqa: PLR0913
254255
no_alias=no_alias,
255256
formatters=formatters,
256257
parent_scoped_naming=parent_scoped_naming,
258+
type_mappings=type_mappings,
257259
)
258260

259261
self.data_model_scalar_type = data_model_scalar_type

src/datamodel_code_generator/parser/jsonschema.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,7 @@ def __init__( # noqa: PLR0913
525525
no_alias: bool = False,
526526
formatters: list[Formatter] = DEFAULT_FORMATTERS,
527527
parent_scoped_naming: bool = False,
528+
type_mappings: list[str] | None = None,
528529
) -> None:
529530
"""Initialize the JSON Schema parser with configuration options."""
530531
target_datetime_class = target_datetime_class or DatetimeClassType.Awaredatetime
@@ -607,6 +608,7 @@ def __init__( # noqa: PLR0913
607608
no_alias=no_alias,
608609
formatters=formatters,
609610
parent_scoped_naming=parent_scoped_naming,
611+
type_mappings=type_mappings,
610612
)
611613

612614
self.remote_object_cache: DefaultPutDict[str, dict[str, YamlValue]] = DefaultPutDict()
@@ -645,6 +647,22 @@ def get_field_extras(self, obj: JsonSchemaObject) -> dict[str, Any]:
645647
extras.update(self.default_field_extras)
646648
return extras
647649

650+
def _get_type_with_mappings(self, type_: str, format_: str | None = None) -> Types:
651+
"""Get the Types enum for a given type and format, applying custom type mappings.
652+
653+
Custom mappings from --type-mappings are checked first, then falls back to
654+
the default json_schema_data_formats mappings.
655+
"""
656+
if self.type_mappings and format_ is not None and (type_, format_) in self.type_mappings:
657+
target_format = self.type_mappings[type_, format_]
658+
for type_formats in json_schema_data_formats.values():
659+
if target_format in type_formats:
660+
return type_formats[target_format]
661+
if target_format in json_schema_data_formats:
662+
return json_schema_data_formats[target_format]["default"]
663+
664+
return _get_type(type_, format_)
665+
648666
@cached_property
649667
def schema_paths(self) -> list[tuple[str, list[str]]]:
650668
"""Get schema paths for definitions and defs."""
@@ -713,7 +731,7 @@ def get_data_type(self, obj: JsonSchemaObject) -> DataType:
713731

714732
def _get_data_type(type_: str, format__: str) -> DataType:
715733
return self.data_type_manager.get_data_type(
716-
_get_type(type_, format__),
734+
self._get_type_with_mappings(type_, format__),
717735
**obj.dict() if not self.field_constraints else {},
718736
)
719737

@@ -1493,7 +1511,9 @@ def parse_enum(
14931511
)
14941512

14951513
def create_enum(reference_: Reference) -> DataType:
1496-
type_: Types | None = _get_type(obj.type, obj.format) if isinstance(obj.type, str) else None
1514+
type_: Types | None = (
1515+
self._get_type_with_mappings(obj.type, obj.format) if isinstance(obj.type, str) else None
1516+
)
14971517

14981518
enum_cls: type[Enum] = Enum
14991519
if (

src/datamodel_code_generator/parser/openapi.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ def __init__( # noqa: PLR0913
254254
no_alias: bool = False,
255255
formatters: list[Formatter] = DEFAULT_FORMATTERS,
256256
parent_scoped_naming: bool = False,
257+
type_mappings: list[str] | None = None,
257258
) -> None:
258259
"""Initialize the OpenAPI parser with extensive configuration options."""
259260
target_datetime_class = target_datetime_class or DatetimeClassType.Awaredatetime
@@ -336,6 +337,7 @@ def __init__( # noqa: PLR0913
336337
no_alias=no_alias,
337338
formatters=formatters,
338339
parent_scoped_naming=parent_scoped_naming,
340+
type_mappings=type_mappings,
339341
)
340342
self.open_api_scopes: list[OpenAPIScope] = openapi_scopes or [OpenAPIScope.Schemas]
341343
self.include_path_parameters: bool = include_path_parameters
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# generated by datamodel-codegen:
2+
# filename: type_mappings.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from pydantic import Base64Str, BaseModel, Field
8+
9+
10+
class BlobModel(BaseModel):
11+
content: str = Field(
12+
..., description='Binary content that should be mapped to string'
13+
)
14+
data: Base64Str = Field(..., description='Base64 encoded data')
15+
name: str = Field(..., description='Regular string field')

0 commit comments

Comments
 (0)