Skip to content

Commit 6fefde7

Browse files
saulshanabrookkoxudaxiilovelinux
authored
Add support for prefixItems to emit tuples (#2537)
* Add support for prefixItems to emit tuples * Refactor test for prefix items handling to use helper function for improved readability * feat: Enhance JSON Schema support with prefixItems and tuple validation examples * fix docs * fix: check items is None or False for tuple detection * style: use set literal for membership test * fix: avoid mutating input object, suppress constraints in get_object_field * docs: Update examples to use modern Python 3.10+ type hint syntax - Replace Optional[X] with X | None - Replace List[X] with list[X] - Replace Tuple[X] with tuple[X] - Update Pydantic v1 examples to v2 syntax (RootModel instead of __root__) --------- Co-authored-by: Koudai Aono <koxudaxi@gmail.com> Co-authored-by: Antonio Spadaro <ilovelinux@users.noreply.github.com>
1 parent 4423a49 commit 6fefde7

14 files changed

Lines changed: 304 additions & 50 deletions

File tree

docs/cli-reference/general-options.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,21 +1216,21 @@ and generated code stay in sync. Works with both single files and directory outp
12161216
```python
12171217
# generated by datamodel-codegen:
12181218
# filename: person.json
1219-
1219+
12201220
from __future__ import annotations
1221-
1222-
from typing import Any, List, Optional
1223-
1221+
1222+
from typing import Any
1223+
12241224
from pydantic import BaseModel, Field, conint
1225-
1226-
1225+
1226+
12271227
class Person(BaseModel):
1228-
firstName: Optional[str] = Field(None, description="The person's first name.")
1229-
lastName: Optional[str] = Field(None, description="The person's last name.")
1230-
age: Optional[conint(ge=0)] = Field(
1228+
firstName: str | None = Field(None, description="The person's first name.")
1229+
lastName: str | None = Field(None, description="The person's last name.")
1230+
age: conint(ge=0) | None = Field(
12311231
None, description='Age in years which must be equal to or greater than zero.'
12321232
)
1233-
friends: Optional[List[Any]] = None
1233+
friends: list[Any] | None = None
12341234
comment: None = None
12351235
```
12361236

docs/graphql.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ type Query {
5454

5555
from __future__ import annotations
5656

57-
from typing import List, Optional, TypeAlias
57+
from typing import TypeAlias
5858

5959
from pydantic import BaseModel, Field
6060
from typing_extensions import Literal
@@ -72,29 +72,29 @@ String: TypeAlias = str
7272

7373

7474
class Author(BaseModel):
75-
books: Optional[List[Optional[Book]]] = Field(default_factory=list)
75+
books: list[Book | None] | None = Field(default_factory=list)
7676
id: ID
77-
name: Optional[String] = None
78-
typename__: Optional[Literal['Author']] = Field('Author', alias='__typename')
77+
name: String | None = None
78+
typename__: Literal['Author'] | None = Field('Author', alias='__typename')
7979

8080

8181
class Book(BaseModel):
82-
author: Optional[Author] = None
82+
author: Author | None = None
8383
id: ID
84-
title: Optional[String] = None
85-
typename__: Optional[Literal['Book']] = Field('Book', alias='__typename')
84+
title: String | None = None
85+
typename__: Literal['Book'] | None = Field('Book', alias='__typename')
8686

8787

8888
class AuthorBooksInput(BaseModel):
8989
id: ID
90-
typename__: Optional[Literal['AuthorBooksInput']] = Field(
90+
typename__: Literal['AuthorBooksInput'] | None = Field(
9191
'AuthorBooksInput', alias='__typename'
9292
)
9393

9494

9595
class BooksInput(BaseModel):
96-
ids: List[ID]
97-
typename__: Optional[Literal['BooksInput']] = Field(
96+
ids: list[ID]
97+
typename__: Literal['BooksInput'] | None = Field(
9898
'BooksInput', alias='__typename'
9999
)
100100

docs/jsondata.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,19 +41,17 @@ datamodel-codegen --input pets.json --input-file-type json --output model.py
4141

4242
from __future__ import annotations
4343

44-
from typing import List, Optional
45-
4644
from pydantic import BaseModel
4745

4846

4947
class Pet(BaseModel):
5048
name: str
5149
age: int
52-
nickname: Optional[str] = None
50+
nickname: str | None = None
5351

5452

5553
class Model(BaseModel):
56-
pets: List[Pet]
54+
pets: list[Pet]
5755
status: int
5856

5957
```

docs/jsonschema.md

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,73 @@ datamodel-codegen --input person.json --input-file-type jsonschema --output mode
4949

5050
from __future__ import annotations
5151

52-
from typing import Any, List, Optional
52+
from typing import Any
5353

5454
from pydantic import BaseModel, Field, conint
5555

5656

5757
class Person(BaseModel):
58-
firstName: Optional[str] = Field(None, description="The person's first name.")
59-
lastName: Optional[str] = Field(None, description="The person's last name.")
60-
age: Optional[conint(ge=0)] = Field(
58+
firstName: str | None = Field(None, description="The person's first name.")
59+
lastName: str | None = Field(None, description="The person's last name.")
60+
age: conint(ge=0) | None = Field(
6161
None, description='Age in years which must be equal to or greater than zero.'
6262
)
63-
friends: Optional[List] = None
64-
comment: Optional[Any] = None
63+
friends: list | None = None
64+
comment: Any | None = None
65+
```
66+
67+
## Tuple validation
68+
69+
JSON Schema's [`prefixItems`](https://json-schema.org/understanding-json-schema/reference/array.html#tuple-validation) syntax lets you describe heterogeneous arrays.
70+
71+
When:
72+
73+
- `prefixItems` is present
74+
- no `items` are specified
75+
- `minItems`/`maxItems` match the number of prefix entries
76+
77+
datamodel-code-generator emits precise tuple annotations.
78+
79+
### Example
80+
81+
```json
82+
{
83+
"$defs": {
84+
"Span": {
85+
"type": "object",
86+
"properties": {
87+
"value": { "type": "integer" }
88+
},
89+
"required": ["value"]
90+
}
91+
},
92+
"title": "defaults",
93+
"type": "object",
94+
"properties": {
95+
"a": {
96+
"type": "array",
97+
"prefixItems": [
98+
{ "$ref": "#/$defs/Span" },
99+
{ "type": "string" }
100+
],
101+
"minItems": 2,
102+
"maxItems": 2
103+
}
104+
},
105+
"required": ["a"]
106+
}
107+
```
108+
109+
```py
110+
from pydantic import BaseModel
111+
112+
113+
class Span(BaseModel):
114+
value: int
115+
116+
117+
class Defaults(BaseModel):
118+
a: tuple[Span, str]
65119
```
66120

67121
---

docs/openapi.md

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -130,19 +130,17 @@ components:
130130

131131
from __future__ import annotations
132132

133-
from typing import List, Optional
134-
135-
from pydantic import BaseModel
133+
from pydantic import BaseModel, RootModel
136134

137135

138136
class Pet(BaseModel):
139137
id: int
140138
name: str
141-
tag: Optional[str] = None
139+
tag: str | None = None
142140

143141

144-
class Pets(BaseModel):
145-
__root__: List[Pet]
142+
class Pets(RootModel[list[Pet]]):
143+
root: list[Pet]
146144

147145

148146
class Error(BaseModel):

src/datamodel_code_generator/imports.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ def dump_all(self, *, multiline: bool = False) -> str:
176176
IMPORT_UNION = Import.from_full_path("typing.Union")
177177
IMPORT_OPTIONAL = Import.from_full_path("typing.Optional")
178178
IMPORT_LITERAL = Import.from_full_path("typing.Literal")
179+
IMPORT_TUPLE = Import.from_full_path("typing.Tuple")
179180
IMPORT_TYPE_ALIAS = Import.from_full_path("typing.TypeAlias")
180181
IMPORT_TYPE_ALIAS_TYPE = Import.from_full_path("typing_extensions.TypeAliasType")
181182
IMPORT_SEQUENCE = Import.from_full_path("typing.Sequence")

src/datamodel_code_generator/parser/jsonschema.py

Lines changed: 64 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ def validate_null_type(cls, value: Any) -> Any: # noqa: N805
310310
return value
311311

312312
items: Optional[Union[list[JsonSchemaObject], JsonSchemaObject, bool]] = None # noqa: UP007, UP045
313+
prefixItems: Optional[list[JsonSchemaObject]] = None # noqa: N815, UP045
313314
uniqueItems: Optional[bool] = None # noqa: N815, UP045
314315
type: Optional[Union[str, list[str]]] = None # noqa: UP007, UP045
315316
format: Optional[str] = None # noqa: UP045
@@ -383,7 +384,7 @@ def is_object(self) -> bool:
383384
@cached_property
384385
def is_array(self) -> bool:
385386
"""Check if the schema represents an array type."""
386-
return self.items is not None or self.type == "array"
387+
return self.items is not None or self.prefixItems is not None or self.type == "array"
387388

388389
@cached_property
389390
def ref_object_name(self) -> str: # pragma: no cover
@@ -1014,13 +1015,23 @@ def get_object_field( # noqa: PLR0913
10141015
original_field_name: str | None,
10151016
) -> DataModelFieldBase:
10161017
"""Create a data model field from a JSON Schema object field."""
1018+
constraints = field.dict() if self.is_constraints_field(field) else None
1019+
# Suppress minItems/maxItems for fixed-length tuples
1020+
if (
1021+
constraints
1022+
and field.prefixItems is not None
1023+
and field.minItems == field.maxItems == len(field.prefixItems)
1024+
and field.items in {None, False}
1025+
):
1026+
constraints.pop("minItems", None)
1027+
constraints.pop("maxItems", None)
10171028
return self.data_model_field_type(
10181029
name=field_name,
10191030
default=field.default,
10201031
data_type=field_type,
10211032
required=required,
10221033
alias=alias,
1023-
constraints=field.dict() if self.is_constraints_field(field) else None,
1034+
constraints=constraints,
10241035
nullable=field.nullable if self.strict_nullable and (field.has_default or required) else None,
10251036
strip_default_none=self.strip_default_none,
10261037
extras=self.get_field_extras(field),
@@ -2366,7 +2377,7 @@ def parse_list_item(
23662377
for index, item in enumerate(target_items)
23672378
]
23682379

2369-
def parse_array_fields(
2380+
def parse_array_fields( # noqa: PLR0912
23702381
self,
23712382
name: str,
23722383
obj: JsonSchemaObject,
@@ -2384,13 +2395,23 @@ def parse_array_fields(
23842395
else:
23852396
required = not obj.nullable and required
23862397
nullable = None
2387-
match obj.items:
2388-
case JsonSchemaObject():
2389-
items: list[JsonSchemaObject] = [obj.items]
2390-
case list():
2391-
items = obj.items
2392-
case _:
2393-
items = []
2398+
is_tuple = False
2399+
suppress_item_constraints = False
2400+
if isinstance(obj.items, JsonSchemaObject):
2401+
items: list[JsonSchemaObject] = [obj.items]
2402+
elif isinstance(obj.items, list):
2403+
items = obj.items
2404+
elif (
2405+
obj.prefixItems is not None
2406+
and obj.minItems == obj.maxItems == len(obj.prefixItems)
2407+
and obj.items in {None, False}
2408+
):
2409+
# Suppress minItems/maxItems constraints for fixed-length tuples
2410+
suppress_item_constraints = True
2411+
items = obj.prefixItems
2412+
is_tuple = True
2413+
else:
2414+
items = []
23942415

23952416
if items:
23962417
item_data_types = self.parse_list_item(
@@ -2406,7 +2427,8 @@ def parse_array_fields(
24062427
data_types: list[DataType] = [
24072428
self.data_type(
24082429
data_types=item_data_types,
2409-
is_list=True,
2430+
is_tuple=is_tuple,
2431+
is_list=not is_tuple,
24102432
)
24112433
]
24122434
# TODO: decide special path word for a combined data model.
@@ -2416,11 +2438,15 @@ def parse_array_fields(
24162438
data_types.append(self.parse_object(name, obj, get_special_path("object", path)))
24172439
if obj.enum and not self.ignore_enum_constraints:
24182440
data_types.append(self.parse_enum(name, obj, get_special_path("enum", path)))
2441+
constraints = obj.dict()
2442+
if suppress_item_constraints:
2443+
constraints.pop("minItems", None)
2444+
constraints.pop("maxItems", None)
24192445
return self.data_model_field_type(
24202446
data_type=self.data_type(data_types=data_types),
24212447
default=obj.default,
24222448
required=required,
2423-
constraints=obj.dict(),
2449+
constraints=constraints,
24242450
nullable=nullable,
24252451
strip_default_none=self.strip_default_none,
24262452
extras=self.get_field_extras(obj),
@@ -2921,6 +2947,9 @@ def _traverse_schema_objects( # noqa: PLR0912
29212947
case list() as items:
29222948
for item in items:
29232949
self._traverse_schema_objects(item, path, callback, include_one_of=include_one_of)
2950+
if obj.prefixItems:
2951+
for item in obj.prefixItems:
2952+
self._traverse_schema_objects(item, path, callback, include_one_of=include_one_of)
29242953
if isinstance(obj.additionalProperties, JsonSchemaObject):
29252954
self._traverse_schema_objects(obj.additionalProperties, path, callback, include_one_of=include_one_of)
29262955
if obj.patternProperties:
@@ -2943,10 +2972,32 @@ def _resolve_ref_callback(self, obj: JsonSchemaObject, path: list[str]) -> None:
29432972
if obj.ref:
29442973
self.resolve_ref(obj.ref)
29452974

2946-
def _add_id_callback(self, obj: JsonSchemaObject, path: list[str]) -> None:
2975+
def _add_id_callback(self, obj: JsonSchemaObject, path: list[str]) -> None: # noqa: PLR0912
29472976
"""Add $id to model resolver."""
29482977
if obj.id:
29492978
self.model_resolver.add_id(obj.id, path)
2979+
if obj.items:
2980+
if isinstance(obj.items, JsonSchemaObject):
2981+
self.parse_id(obj.items, path)
2982+
elif isinstance(obj.items, list):
2983+
for item in obj.items:
2984+
self.parse_id(item, path)
2985+
if obj.prefixItems:
2986+
for item in obj.prefixItems:
2987+
self.parse_id(item, path)
2988+
if isinstance(obj.additionalProperties, JsonSchemaObject):
2989+
self.parse_id(obj.additionalProperties, path)
2990+
if obj.patternProperties:
2991+
for value in obj.patternProperties.values():
2992+
self.parse_id(value, path)
2993+
for item in obj.anyOf:
2994+
self.parse_id(item, path)
2995+
for item in obj.allOf:
2996+
self.parse_id(item, path)
2997+
if obj.properties:
2998+
for property_value in obj.properties.values():
2999+
if isinstance(property_value, JsonSchemaObject):
3000+
self.parse_id(property_value, path)
29503001

29513002
def parse_ref(self, obj: JsonSchemaObject, path: list[str]) -> None:
29523003
"""Recursively parse all $ref references in a schema object."""

0 commit comments

Comments
 (0)