Skip to content

Commit 91df24a

Browse files
authored
Fix: resolve relative $ref correctly when fetching schemas from HTTP URLs (#2600)
* Fix: Improve base URL context handling and URL validation * Fix: Enhance HTTP URL validation and improve reference resolution logic
1 parent 8c8c7ed commit 91df24a

2 files changed

Lines changed: 158 additions & 9 deletions

File tree

src/datamodel_code_generator/reference.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -477,9 +477,16 @@ def current_base_path_context(self, base_path: Path | None) -> Generator[None, N
477477
yield
478478

479479
@contextmanager
480-
def base_url_context(self, base_url: str) -> Generator[None, None, None]:
481-
"""Temporarily set the base URL within a context."""
482-
if self._base_url:
480+
def base_url_context(self, base_url: str | None) -> Generator[None, None, None]:
481+
"""Temporarily set the base URL within a context.
482+
483+
Only sets the base_url if:
484+
- The new value is actually a URL (http:// or https://)
485+
- OR _base_url was already set (switching between URLs)
486+
This preserves backward compatibility for local file parsing where
487+
this method was previously a no-op.
488+
"""
489+
if self._base_url or (base_url and is_url(base_url)):
483490
with context_variable(self.set_base_url, self.base_url, base_url):
484491
yield
485492
else:
@@ -523,7 +530,7 @@ def add_id(self, id_: str, path: Sequence[str]) -> None:
523530
"""Register an identifier mapping to a resolved reference path."""
524531
self.ids["/".join(self.current_root)][id_] = self.resolve_ref(path)
525532

526-
def resolve_ref(self, path: Sequence[str] | str) -> str: # noqa: PLR0911, PLR0912
533+
def resolve_ref(self, path: Sequence[str] | str) -> str: # noqa: PLR0911, PLR0912, PLR0914
527534
"""Resolve a reference path to its canonical form."""
528535
joined_path = path if isinstance(path, str) else self.join_path(path)
529536
if joined_path == "#":
@@ -548,18 +555,23 @@ def resolve_ref(self, path: Sequence[str] | str) -> str: # noqa: PLR0911, PLR09
548555
else:
549556
if "#" not in joined_path:
550557
joined_path += "#"
551-
elif joined_path[0] == "#":
558+
elif joined_path[0] == "#" and not self.base_url:
552559
joined_path = f"{'/'.join(self.current_root)}{joined_path}"
553560

554561
file_path, fragment = joined_path.split("#", 1)
555562
ref = f"{file_path}#{fragment}"
556-
if self.root_id_base_path and not (is_url(joined_path) or Path(self._base_path, file_path).is_file()):
563+
if (
564+
self.root_id_base_path
565+
and not self.base_url
566+
and not (is_url(joined_path) or Path(self._base_path, file_path).is_file())
567+
):
557568
ref = f"{self.root_id_base_path}/{ref}"
558569

559570
if self.base_url:
560571
from .http import join_url # noqa: PLC0415
561572

562-
joined_url = join_url(self.base_url, ref)
573+
effective_base = self.root_id or self.base_url
574+
joined_url = join_url(effective_base, ref)
563575
if "#" in joined_url:
564576
return joined_url
565577
return f"{joined_url}#"
@@ -846,5 +858,5 @@ def snake_to_upper_camel(word: str, delimiter: str = "_") -> str:
846858

847859

848860
def is_url(ref: str) -> bool:
849-
"""Check if a reference string is a URL."""
861+
"""Check if a reference string is an HTTP(S) URL."""
850862
return ref.startswith(("https://", "http://"))

tests/test_reference.py

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import pytest
88

9-
from datamodel_code_generator.reference import ModelResolver, get_relative_path
9+
from datamodel_code_generator.reference import ModelResolver, get_relative_path, is_url
1010

1111

1212
@pytest.mark.parametrize(
@@ -70,3 +70,140 @@ def test_model_resolver_add_ref_unevaluated() -> None:
7070
model_resolver = ModelResolver()
7171
reference = model_resolver.add_ref("meta/unevaluated")
7272
assert reference.original_name == "unevaluated"
73+
74+
75+
def test_base_url_context_sets_url_when_base_url_already_set() -> None:
76+
"""When _base_url is already set, base_url_context should switch to new URL."""
77+
resolver = ModelResolver(base_url="https://example.com/original.json")
78+
assert resolver.base_url == "https://example.com/original.json"
79+
80+
with resolver.base_url_context("https://example.com/new.json"):
81+
assert resolver.base_url == "https://example.com/new.json"
82+
83+
# Should restore original
84+
assert resolver.base_url == "https://example.com/original.json"
85+
86+
87+
def test_base_url_context_sets_url_when_new_value_is_url() -> None:
88+
"""When _base_url is None but new value is a URL, should set base_url."""
89+
resolver = ModelResolver()
90+
assert resolver.base_url is None
91+
92+
with resolver.base_url_context("https://example.com/schema.json"):
93+
assert resolver.base_url == "https://example.com/schema.json"
94+
95+
# Should restore to None
96+
assert resolver.base_url is None
97+
98+
99+
def test_base_url_context_noop_when_new_value_is_not_url() -> None:
100+
"""When _base_url is None and new value is not a URL, should do nothing."""
101+
resolver = ModelResolver()
102+
assert resolver.base_url is None
103+
104+
with resolver.base_url_context("../relative/path.json"):
105+
# Should remain None because the value is not a URL
106+
assert resolver.base_url is None
107+
108+
assert resolver.base_url is None
109+
110+
111+
def test_base_url_context_nested() -> None:
112+
"""Nested base_url_context should properly restore values."""
113+
resolver = ModelResolver(base_url="https://example.com/level0.json")
114+
115+
with resolver.base_url_context("https://example.com/level1.json"):
116+
assert resolver.base_url == "https://example.com/level1.json"
117+
118+
with resolver.base_url_context("https://example.com/level2.json"):
119+
assert resolver.base_url == "https://example.com/level2.json"
120+
121+
assert resolver.base_url == "https://example.com/level1.json"
122+
123+
assert resolver.base_url == "https://example.com/level0.json"
124+
125+
126+
def test_resolve_ref_with_base_url_does_not_prepend_root_id_base_path() -> None:
127+
"""When base_url is set, root_id_base_path should not be prepended to refs."""
128+
resolver = ModelResolver(base_url="https://example.com/schemas/main.json")
129+
resolver.set_root_id("https://example.com/schemas/main.json")
130+
131+
# Resolve a relative ref
132+
result = resolver.resolve_ref("../other/schema.json")
133+
134+
# Should resolve via join_url, not prepend root_id_base_path
135+
assert result == "https://example.com/other/schema.json#"
136+
# Should NOT be like "https://example.com/schemas/../other/schema.json#"
137+
138+
139+
def test_resolve_ref_with_base_url_nested_relative_refs() -> None:
140+
"""Nested relative refs should resolve correctly when base_url is set."""
141+
resolver = ModelResolver(base_url="https://example.com/a/b/c/main.json")
142+
143+
# Resolve a deeply nested relative ref
144+
result = resolver.resolve_ref("../../other/schema.json")
145+
146+
assert result == "https://example.com/a/other/schema.json#"
147+
148+
149+
def test_resolve_ref_with_base_url_context_switch() -> None:
150+
"""Relative refs should resolve correctly after base_url context switch."""
151+
resolver = ModelResolver(base_url="https://example.com/schemas/person.json")
152+
153+
# Switch context to a different file
154+
with resolver.base_url_context("https://example.com/schemas/definitions/pet.json"):
155+
# Resolve a relative ref from the new context
156+
result = resolver.resolve_ref("../common/types.json")
157+
158+
assert result == "https://example.com/schemas/common/types.json#"
159+
160+
161+
def test_resolve_ref_local_fragment_with_base_url() -> None:
162+
"""Local fragment refs should resolve to full URL when base_url is set."""
163+
resolver = ModelResolver(base_url="https://example.com/schemas/main.json")
164+
165+
result = resolver.resolve_ref("#/definitions/Foo")
166+
167+
# When base_url is set, local fragments are resolved to full URL
168+
assert result == "https://example.com/schemas/main.json#/definitions/Foo"
169+
170+
171+
@pytest.mark.parametrize(
172+
("ref", "expected"),
173+
[
174+
# HTTP/HTTPS URLs (only supported schemes)
175+
("https://example.com/schema.json", True),
176+
("http://example.com/schema.json", True),
177+
("https://example.com/path/to/schema.json", True),
178+
# file:// URLs - NOT recognized (fetcher only supports HTTP)
179+
("file:///home/user/schema.json", False),
180+
("file:/home/user/schema.json", False),
181+
# Other URL schemes - NOT recognized
182+
("ftp://example.com/schema.json", False),
183+
# Relative paths (not URLs)
184+
("../relative/path.json", False),
185+
("relative/path.json", False),
186+
# Local fragments (not URLs)
187+
("#/definitions/Foo", False),
188+
("#", False),
189+
# Absolute paths (not URLs)
190+
("/absolute/path.json", False),
191+
# Windows paths (not URLs)
192+
("c:/windows/path.json", False),
193+
("d:/path/to/file.json", False),
194+
],
195+
)
196+
def test_is_url(ref: str, expected: bool) -> None:
197+
"""Test is_url correctly identifies HTTP(S) URLs only."""
198+
assert is_url(ref) == expected
199+
200+
201+
def test_resolve_ref_with_root_id_differs_from_base_url() -> None:
202+
"""When $id differs from fetch URL, refs should resolve against $id."""
203+
# Scenario: Schema fetched from CDN but has canonical $id
204+
resolver = ModelResolver(base_url="https://cdn.example.com/latest/schema.json")
205+
resolver.set_root_id("https://example.com/v1/schema.json")
206+
207+
result = resolver.resolve_ref("../common/types.json")
208+
209+
assert result == "https://example.com/common/types.json#"

0 commit comments

Comments
 (0)