Skip to content

Commit 36d102c

Browse files
authored
Fix DataType deepcopy infinite recursion with circular references (#2852)
* Fix DataType deepcopy infinite recursion with circular references * Remove inline comments from __deepcopy__ method
1 parent 055f8ed commit 36d102c

2 files changed

Lines changed: 154 additions & 0 deletions

File tree

src/datamodel_code_generator/types.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import re
1111
from abc import ABC, abstractmethod
12+
from copy import deepcopy
1213
from enum import Enum, auto
1314
from functools import lru_cache
1415
from itertools import chain
@@ -354,6 +355,38 @@ class Config:
354355
_exclude_fields: ClassVar[set[str]] = {"parent", "children"}
355356
_pass_fields: ClassVar[set[str]] = {"parent", "children", "data_types", "reference"}
356357

358+
def __deepcopy__(self, memo: dict[int, Any] | None = None) -> DataType:
359+
"""Create a deep copy handling circular references in parent/children fields."""
360+
if memo is None:
361+
memo = {}
362+
363+
obj_id = id(self)
364+
if obj_id in memo:
365+
return memo[obj_id]
366+
367+
cls = self.__class__
368+
model_fields = getattr(cls, "model_fields" if is_pydantic_v2() else "__fields__")
369+
370+
shallow_kwargs: dict[str, Any] = {}
371+
for field_name in model_fields:
372+
value = getattr(self, field_name)
373+
if field_name in self._exclude_fields:
374+
shallow_kwargs[field_name] = None
375+
else:
376+
shallow_kwargs[field_name] = value
377+
378+
constructor = getattr(cls, "model_construct" if is_pydantic_v2() else "construct")
379+
new_obj: DataType = constructor(**shallow_kwargs)
380+
memo[obj_id] = new_obj
381+
382+
for field_name in model_fields:
383+
if field_name not in self._exclude_fields:
384+
value = getattr(self, field_name)
385+
copied_value = deepcopy(value, memo)
386+
object.__setattr__(new_obj, field_name, copied_value)
387+
388+
return new_obj
389+
357390
@classmethod
358391
def from_import( # noqa: PLR0913
359392
cls: builtins.type[DataTypeT],

tests/test_types.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,124 @@ def test_remove_none_from_union(type_str: str, use_union_operator: bool, expecte
152152
def test_remove_none_from_union_short_strings(type_str: str, use_union_operator: bool, expected: str) -> None:
153153
"""Test _remove_none_from_union with short strings to verify index bounds safety."""
154154
assert _remove_none_from_union(type_str, use_union_operator=use_union_operator) == expected
155+
156+
157+
def test_datatype_deepcopy_with_circular_references() -> None:
158+
"""Test that DataType.__deepcopy__ handles circular references via parent/children.
159+
160+
This test verifies the fix for the recursion error that occurred when deepcopying
161+
DataType objects with circular references through parent and children fields.
162+
"""
163+
from copy import deepcopy
164+
165+
# Import DataModelFieldBase first to trigger model_rebuild
166+
from datamodel_code_generator.model.base import DataModelFieldBase # noqa: F401
167+
from datamodel_code_generator.types import DataType
168+
169+
# Create parent and child DataTypes with circular references
170+
parent = DataType(type="ParentType")
171+
child1 = DataType(type="ChildType1", parent=parent)
172+
child2 = DataType(type="ChildType2", parent=parent)
173+
parent.children = [child1, child2]
174+
175+
# This should not cause infinite recursion
176+
copied_parent = deepcopy(parent)
177+
178+
# Verify the copy was successful
179+
assert copied_parent.type == "ParentType"
180+
# parent and children should be None in the copy (excluded from deepcopy)
181+
assert copied_parent.parent is None
182+
assert copied_parent.children is None
183+
184+
185+
def test_datatype_deepcopy_with_nested_data_types() -> None:
186+
"""Test that DataType.__deepcopy__ properly copies nested data_types."""
187+
from copy import deepcopy
188+
189+
# Import DataModelFieldBase first to trigger model_rebuild
190+
from datamodel_code_generator.model.base import DataModelFieldBase # noqa: F401
191+
from datamodel_code_generator.types import DataType
192+
193+
# Create nested DataTypes
194+
inner = DataType(type="InnerType", is_optional=True)
195+
outer = DataType(type="OuterType", data_types=[inner], is_list=True)
196+
197+
# Deepcopy should work and create independent copies
198+
copied_outer = deepcopy(outer)
199+
200+
# Verify the structure is preserved
201+
assert copied_outer.type == "OuterType"
202+
assert copied_outer.is_list is True
203+
assert len(copied_outer.data_types) == 1
204+
assert copied_outer.data_types[0].type == "InnerType"
205+
assert copied_outer.data_types[0].is_optional is True
206+
207+
# Verify it's a deep copy (modifying original doesn't affect copy)
208+
inner.type = "ModifiedInnerType"
209+
assert copied_outer.data_types[0].type == "InnerType"
210+
211+
212+
def test_datatype_deepcopy_memo_prevents_duplicate_copies() -> None:
213+
"""Test that the memo dictionary prevents duplicate copies of the same object."""
214+
from copy import deepcopy
215+
216+
# Import DataModelFieldBase first to trigger model_rebuild
217+
from datamodel_code_generator.model.base import DataModelFieldBase # noqa: F401
218+
from datamodel_code_generator.types import DataType
219+
220+
# Create a shared DataType referenced by multiple parents
221+
shared = DataType(type="SharedType")
222+
container1 = DataType(type="Container1", data_types=[shared])
223+
container2 = DataType(type="Container2", data_types=[shared])
224+
root = DataType(type="Root", data_types=[container1, container2])
225+
226+
# Deepcopy should handle the shared reference
227+
copied_root = deepcopy(root)
228+
229+
# Verify structure is correct
230+
assert copied_root.type == "Root"
231+
assert len(copied_root.data_types) == 2
232+
assert copied_root.data_types[0].type == "Container1"
233+
assert copied_root.data_types[1].type == "Container2"
234+
235+
# Both containers should have copies of the shared type
236+
assert copied_root.data_types[0].data_types[0].type == "SharedType"
237+
assert copied_root.data_types[1].data_types[0].type == "SharedType"
238+
239+
# Verify that the same object is returned from memo (memoization behavior)
240+
assert copied_root.data_types[0].data_types[0] is copied_root.data_types[1].data_types[0]
241+
242+
243+
def test_datatype_deepcopy_with_none_memo() -> None:
244+
"""Test __deepcopy__ when called with memo=None (covers memo initialization)."""
245+
# Import DataModelFieldBase first to trigger model_rebuild
246+
from datamodel_code_generator.model.base import DataModelFieldBase # noqa: F401
247+
from datamodel_code_generator.types import DataType
248+
249+
data_type = DataType(type="TestType", is_optional=True)
250+
251+
# Call __deepcopy__ directly with None memo to cover the `if memo is None` branch
252+
copied = data_type.__deepcopy__(None) # noqa: PLC2801
253+
254+
assert copied.type == "TestType"
255+
assert copied.is_optional is True
256+
assert copied is not data_type
257+
258+
259+
def test_datatype_deepcopy_memo_cache_hit() -> None:
260+
"""Test that memo cache returns the same object for repeated references."""
261+
# Import DataModelFieldBase first to trigger model_rebuild
262+
from datamodel_code_generator.model.base import DataModelFieldBase # noqa: F401
263+
from datamodel_code_generator.types import DataType
264+
265+
data_type = DataType(type="TestType")
266+
memo: dict[int, DataType] = {}
267+
268+
# First call - should create new object and store in memo
269+
copied1 = data_type.__deepcopy__(memo) # noqa: PLC2801
270+
assert copied1 is not data_type
271+
assert id(data_type) in memo
272+
273+
# Second call with same memo - should return cached object (covers memo hit branch)
274+
copied2 = data_type.__deepcopy__(memo) # noqa: PLC2801
275+
assert copied2 is copied1 # Same object from memo

0 commit comments

Comments
 (0)