Skip to content

Commit b58970a

Browse files
authored
Fix RecursionError in _merge_ref_with_schema for circular $ref (#2983)
* Fix RecursionError in _merge_ref_with_schema for circular $ref * Fix external ref cycle detection context and add e2e coverage * Add schema markers to circular ref fixtures * Generate alias model for circular ref with keywords * Restrict ref+keywords alias generation to definitions * Simplify ref fragment parsing in cycle detection * Handle URL refs in cycle detection context * Use conservative fallback for circular ref detection
1 parent fadd36d commit b58970a

16 files changed

Lines changed: 516 additions & 5 deletions

src/datamodel_code_generator/parser/jsonschema.py

Lines changed: 86 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,7 @@ def __init__(
711711
*self.field_extra_keys,
712712
*self.field_extra_keys_without_x_prefix,
713713
}
714+
self._circular_ref_cache: dict[str, bool] = {}
714715

715716
if self.data_model_field_type.can_have_extra_keys:
716717
self.get_field_extra_key: Callable[[str], str] = (
@@ -1656,6 +1657,10 @@ def _merge_ref_with_schema(self, obj: JsonSchemaObject) -> JsonSchemaObject:
16561657
if not obj.ref:
16571658
return obj
16581659

1660+
resolved_ref = self.model_resolver.resolve_ref(obj.ref)
1661+
if self._is_ref_circular(resolved_ref):
1662+
return obj
1663+
16591664
ref_schema = self._load_ref_schema_object(obj.ref)
16601665
ref_dict = model_dump(ref_schema, exclude_unset=True, by_alias=True)
16611666
current_dict = model_dump(obj, exclude={"ref"}, exclude_unset=True, by_alias=True)
@@ -1664,6 +1669,59 @@ def _merge_ref_with_schema(self, obj: JsonSchemaObject) -> JsonSchemaObject:
16641669

16651670
return model_validate(self.SCHEMA_OBJECT_TYPE, merged)
16661671

1672+
def _is_ref_circular(self, resolved_ref: str) -> bool:
1673+
"""Check if a resolved $ref target contains a circular reference (cached)."""
1674+
if resolved_ref in self._circular_ref_cache:
1675+
return self._circular_ref_cache[resolved_ref]
1676+
try:
1677+
result = self._has_ref_cycle(resolved_ref, resolved_ref, set())
1678+
except Exception: # noqa: BLE001 # pragma: no cover
1679+
result = True
1680+
self._circular_ref_cache[resolved_ref] = result
1681+
return result
1682+
1683+
def _has_ref_cycle(self, ref_to_check: str, target: str, visited: set[str]) -> bool:
1684+
"""Check if the schema at ref_to_check contains a reference back to target."""
1685+
visited.add(ref_to_check)
1686+
file_part, _, fragment = ref_to_check.partition("#")
1687+
if file_part and is_url(file_part):
1688+
base_path = None
1689+
root_path = [file_part]
1690+
else:
1691+
base_path = Path(file_part).parent if file_part else self.model_resolver.current_base_path
1692+
root_path = file_part.split("/") if file_part else self.model_resolver.current_root
1693+
base_url = file_part or self.model_resolver.base_url
1694+
with (
1695+
self.model_resolver.current_base_path_context(base_path),
1696+
self.model_resolver.base_url_context(base_url),
1697+
self.model_resolver.current_root_context(root_path),
1698+
):
1699+
raw_doc = self._get_ref_body(file_part) if file_part else self.raw_obj
1700+
raw_obj: Any = raw_doc
1701+
if fragment:
1702+
pointer = [p for p in fragment.split("/") if p]
1703+
raw_obj = get_model_by_path(raw_doc, pointer)
1704+
return self._walk_for_ref(raw_obj, target, visited)
1705+
1706+
def _walk_for_ref(self, data: dict[str, Any] | list[Any], target: str, visited: set[str]) -> bool:
1707+
"""Recursively walk raw dict/list data looking for a $ref that resolves to target."""
1708+
if isinstance(data, dict):
1709+
ref_value = data.get("$ref")
1710+
if isinstance(ref_value, str):
1711+
try:
1712+
resolved = self.model_resolver.resolve_ref(ref_value)
1713+
except Exception: # noqa: BLE001
1714+
resolved = ref_value
1715+
if resolved == target:
1716+
return True
1717+
if resolved not in visited and self._has_ref_cycle(resolved, target, visited):
1718+
return True
1719+
for value in data.values():
1720+
if isinstance(value, (dict, list)) and self._walk_for_ref(value, target, visited):
1721+
return True
1722+
return False
1723+
return any(isinstance(item, (dict, list)) and self._walk_for_ref(item, target, visited) for item in data)
1724+
16671725
def _merge_primitive_schemas(self, items: list[JsonSchemaObject]) -> JsonSchemaObject:
16681726
"""Merge multiple primitive schemas by computing the intersection of their constraints."""
16691727
if len(items) == 1:
@@ -2189,12 +2247,18 @@ def parse_combined_schema(
21892247
if target_attribute.ref:
21902248
if target_attribute.has_ref_with_schema_keywords and not target_attribute.is_ref_with_nullable_only:
21912249
merged_attr = self._merge_ref_with_schema(target_attribute)
2192-
combined_schemas.append(
2193-
model_validate(
2194-
self.SCHEMA_OBJECT_TYPE,
2195-
self._deep_merge(base_object, model_dump(merged_attr, exclude_unset=True, by_alias=True)),
2250+
if merged_attr.ref:
2251+
combined_schemas.append(merged_attr)
2252+
refs.append(index)
2253+
else:
2254+
combined_schemas.append(
2255+
model_validate(
2256+
self.SCHEMA_OBJECT_TYPE,
2257+
self._deep_merge(
2258+
base_object, model_dump(merged_attr, exclude_unset=True, by_alias=True)
2259+
),
2260+
)
21962261
)
2197-
)
21982262
else:
21992263
combined_schemas.append(target_attribute)
22002264
refs.append(index)
@@ -3992,6 +4056,18 @@ def _handle_python_import(
39924056
"""Mark x-python-import reference as loaded to skip model generation."""
39934057
self.model_resolver.add(path, name, class_name=True, loaded=True)
39944058

4059+
def _is_named_schema_definition_path(self, path: list[str]) -> bool:
4060+
"""Check if path points to a named schema entry under definitions/$defs."""
4061+
current_root = list(self.model_resolver.current_root)
4062+
expected_path_length = len(current_root) + 2
4063+
if len(path) != expected_path_length:
4064+
return False
4065+
4066+
schema_container_path = path[len(current_root)]
4067+
return path[: len(current_root)] == current_root and any(
4068+
schema_container_path == schema_path for schema_path, _ in self.schema_paths
4069+
)
4070+
39954071
def parse_obj( # noqa: PLR0912
39964072
self,
39974073
name: str,
@@ -4001,6 +4077,11 @@ def parse_obj( # noqa: PLR0912
40014077
"""Parse a JsonSchemaObject by dispatching to appropriate parse methods."""
40024078
if obj.has_ref_with_schema_keywords and not obj.is_ref_with_nullable_only:
40034079
obj = self._merge_ref_with_schema(obj)
4080+
if obj.ref:
4081+
if self._is_named_schema_definition_path(path):
4082+
self.parse_root_type(name, obj, path)
4083+
self.parse_ref(obj, path)
4084+
return
40044085

40054086
if obj.is_array:
40064087
self.parse_array(name, obj, path)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# generated by datamodel-codegen:
2+
# filename: root.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from pydantic import BaseModel
8+
9+
10+
class Model(BaseModel):
11+
root: Context | None = None
12+
13+
14+
class Context(BaseModel):
15+
child: Child | None = None
16+
17+
18+
class Child(BaseModel):
19+
parent: Context | None = None
20+
21+
22+
Model.update_forward_refs()
23+
Context.update_forward_refs()
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# generated by datamodel-codegen:
2+
# filename: circular_ref_indirect.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from enum import Enum
8+
9+
from pydantic import BaseModel
10+
11+
12+
class Kind(Enum):
13+
x = 'x'
14+
y = 'y'
15+
16+
17+
class NodeC(BaseModel):
18+
value: str | None = None
19+
20+
21+
class Model(BaseModel):
22+
root: NodeA | None = None
23+
24+
25+
class NodeA(BaseModel):
26+
kind: Kind | None = None
27+
c: NodeC | None = None
28+
b: NodeB | None = None
29+
30+
31+
class NodeB(BaseModel):
32+
a: NodeA | None = None
33+
34+
35+
Model.update_forward_refs()
36+
NodeA.update_forward_refs()
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# generated by datamodel-codegen:
2+
# filename: circular_ref_ref_with_schema_keywords.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from pydantic import BaseModel
8+
9+
10+
class Model(BaseModel):
11+
root: Node | None = None
12+
13+
14+
class Node(BaseModel):
15+
__root__: BaseNode
16+
17+
18+
class BaseNode(BaseModel):
19+
next: Node | None = None
20+
21+
22+
Model.update_forward_refs()
23+
Node.update_forward_refs()
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# generated by datamodel-codegen:
2+
# filename: circular_ref_root_with_type.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from pydantic import BaseModel
8+
9+
10+
class Node(BaseModel):
11+
child: Node | None = None
12+
13+
14+
Node.update_forward_refs()
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# generated by datamodel-codegen:
2+
# filename: circular_ref_with_schema_keywords.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from pydantic import BaseModel
8+
9+
10+
class Context(BaseModel):
11+
name: str | None = None
12+
children: list[Context] | None = None
13+
14+
15+
class Model(BaseModel):
16+
root: Context | None = None
17+
18+
19+
Context.update_forward_refs()
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# generated by datamodel-codegen:
2+
# filename: x_python_import_unused.json
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from pydantic import BaseModel
8+
9+
10+
class Model(BaseModel):
11+
name: str | None = None
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"$defs": {
4+
"Context": {
5+
"type": "object",
6+
"properties": {
7+
"child": {
8+
"anyOf": [
9+
{
10+
"type": "object",
11+
"$ref": "nested/child.json#/$defs/Child"
12+
}
13+
]
14+
}
15+
}
16+
}
17+
},
18+
"$ref": "#/$defs/Context"
19+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"$defs": {
4+
"Child": {
5+
"type": "object",
6+
"x-bad-ref": {
7+
"$ref": "#missing"
8+
},
9+
"properties": {
10+
"parent": {
11+
"anyOf": [
12+
{
13+
"type": "object",
14+
"$ref": "../context.json#/$defs/Context"
15+
}
16+
]
17+
}
18+
}
19+
}
20+
},
21+
"$ref": "#/$defs/Child"
22+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"type": "object",
4+
"properties": {
5+
"root": {
6+
"$ref": "defs/context.json#/$defs/Context"
7+
}
8+
}
9+
}

0 commit comments

Comments
 (0)