Skip to content

Commit 7b7f1e9

Browse files
matssunclaudekoxudaxi
authored
feat: Add --external-ref-mapping to import from external packages instead of generating (#3006)
* feat: Add --external-ref-mapping CLI option and config plumbing Add a new --external-ref-mapping option that maps external $ref file paths to Python import packages. When a $ref points to a mapped file, an import statement is generated instead of a duplicate class definition. This commit adds the option to the CLI argument parser, Config class, GenerateConfig, ParserConfig, and their TypedDict counterparts. No behavior change yet — the core logic follows in the next commit. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: Implement external ref mapping in JSON Schema parser When --external-ref-mapping is provided, external $ref targets that match a mapped file produce import-based DataTypes (via Import.from_full_path and DataType.from_import) instead of loading and parsing the external file. This follows the exact same pattern as the existing x-python-import vendor extension, but configured externally via CLI rather than requiring modifications to the schema YAML. Three changes in jsonschema.py: - __init__: normalize mapping file paths to absolute for reliable matching - get_ref_data_type: check mapping before _load_ref_schema_object - resolve_ref: skip loading/parsing for mapped external files Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * test: Add tests for --external-ref-mapping feature Five test cases covering: - Basic CLI usage: external refs produce imports, not class definitions - No duplicate classes: mapped types are imported, not generated - Regression: without the flag, behavior is unchanged - Invalid format: missing '=' in mapping produces a clear error - Programmatic API: GenerateConfig with external_ref_mapping dict Test fixtures: api.yaml referencing common.yaml via $ref, with expected output showing imports from the mapped package. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: Correct help text for --external-ref-mapping With nargs="+", multiple mappings are passed after a single flag invocation, not by repeating the flag. Updated help text to reflect this. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: resolve PR3006 CI and nested external ref mapping * fix: address external-ref-mapping review feedback * fix: validate non-empty external-ref-mapping pairs * test: use assert helper for external ref mapping errors * test: move external ref mapping cases into main openapi e2e * test: fix absolute ref template for Windows yaml parsing --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Koudai Aono <koxudaxi@gmail.com>
1 parent 991f0ff commit 7b7f1e9

22 files changed

Lines changed: 614 additions & 1 deletion

src/datamodel_code_generator/__main__.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,32 @@ def validate_class_decorators(cls, values: dict[str, Any]) -> dict[str, Any]: #
317317
values["class_decorators"] = decorators
318318
return values
319319

320+
@model_validator(mode="before") # ty: ignore
321+
def validate_external_ref_mapping(cls, values: dict[str, Any]) -> dict[str, Any]: # noqa: N805
322+
"""Parse external_ref_mapping from list of KEY=VALUE strings to dict."""
323+
raw = values.get("external_ref_mapping")
324+
if raw is not None and isinstance(raw, list):
325+
mapping: dict[str, str] = {}
326+
for item in raw:
327+
if not isinstance(item, str) or "=" not in item:
328+
msg = (
329+
f"Invalid --external-ref-mapping format: {item!r}. "
330+
"Expected FILE_PATH=PYTHON_PACKAGE (e.g., '../common/schema.yaml=mypackage.models')"
331+
)
332+
raise Error(msg)
333+
file_path, python_package = item.split("=", maxsplit=1)
334+
file_path = file_path.strip()
335+
python_package = python_package.strip()
336+
if not file_path or not python_package:
337+
msg = (
338+
f"Invalid --external-ref-mapping format: {item!r}. "
339+
"Both FILE_PATH and PYTHON_PACKAGE must be non-empty."
340+
)
341+
raise Error(msg)
342+
mapping[file_path] = python_package
343+
values["external_ref_mapping"] = mapping
344+
return values
345+
320346
__validate_output_datetime_class_err: ClassVar[str] = (
321347
'`--output-datetime-class` only allows "datetime" for '
322348
f"`--output-model-type` {DataModelType.DataclassesDataclass.value}"
@@ -624,6 +650,7 @@ def validate_class_name_affix_scope(cls, v: str | ClassNameAffixScope | None) ->
624650
watch_delay: float = 0.5
625651
schema_version: Optional[str] = None # noqa: UP045
626652
schema_version_mode: Optional[VersionMode] = None # noqa: UP045
653+
external_ref_mapping: Optional[dict[str, str]] = None # noqa: UP045
627654

628655
def merge_args(self, args: Namespace) -> None:
629656
"""Merge command-line arguments into config."""
@@ -1070,6 +1097,7 @@ def run_generate_from_config( # noqa: PLR0913, PLR0917
10701097
default_value_overrides=default_value_overrides,
10711098
schema_version=config.schema_version,
10721099
schema_version_mode=config.schema_version_mode,
1100+
external_ref_mapping=config.external_ref_mapping,
10731101
)
10741102

10751103
if output is None and result is not None: # pragma: no cover

src/datamodel_code_generator/_types/generate_config_dict.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ class GenerateConfigDict(TypedDict, closed=True):
172172
default_value_overrides: NotRequired[Mapping[str, Any] | None]
173173
schema_version: NotRequired[str | None]
174174
schema_version_mode: NotRequired[VersionMode | None]
175+
external_ref_mapping: NotRequired[dict[str, str] | None]
175176

176177

177178
class ValidatorDefinition(TypedDict):

src/datamodel_code_generator/_types/parser_config_dicts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ class ParserConfigDict(TypedDict):
162162
field_type_collision_strategy: NotRequired[FieldTypeCollisionStrategy | None]
163163
target_pydantic_version: NotRequired[TargetPydanticVersion | None]
164164
default_value_overrides: NotRequired[Mapping[str, Any] | None]
165+
external_ref_mapping: NotRequired[dict[str, str] | None]
165166

166167

167168
class GraphQLParserConfigDict(ParserConfigDict, closed=True):

src/datamodel_code_generator/arguments.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,21 @@ def _dataclass_arguments(value: str) -> DataclassArguments:
7070
return cast("DataclassArguments", result)
7171

7272

73+
def _external_ref_mapping(value: str) -> str:
74+
"""Validate FILE_PATH=PYTHON_PACKAGE mapping format."""
75+
if "=" not in value:
76+
msg = (
77+
f"Invalid --external-ref-mapping format: {value!r}. "
78+
"Expected FILE_PATH=PYTHON_PACKAGE (e.g., '../common/schema.yaml=mypackage.models')"
79+
)
80+
raise ArgumentTypeError(msg)
81+
file_path, python_package = value.split("=", maxsplit=1)
82+
if not file_path.strip() or not python_package.strip():
83+
msg = f"Invalid --external-ref-mapping format: {value!r}. Both FILE_PATH and PYTHON_PACKAGE must be non-empty."
84+
raise ArgumentTypeError(msg)
85+
return value
86+
87+
7388
class SortingHelpFormatter(RawDescriptionHelpFormatter):
7489
"""Help formatter that sorts arguments, adds color to section headers, and preserves epilog formatting."""
7590

@@ -147,6 +162,15 @@ def start_section(self, heading: str | None) -> None:
147162
),
148163
choices=[i.value for i in InputFileType],
149164
)
165+
base_options.add_argument(
166+
"--external-ref-mapping",
167+
nargs="+",
168+
metavar="FILE_PATH=PYTHON_PACKAGE",
169+
type=_external_ref_mapping,
170+
help="Map external $ref file paths to Python import packages instead of generating duplicate classes. "
171+
'Accepts one or more mappings after a single flag. Format: "path/to/schema.yaml=mypackage.models". '
172+
"When a $ref points to a mapped file, an import statement is generated instead of a class definition.",
173+
)
150174
base_options.add_argument(
151175
"--output",
152176
help="Output file (default: stdout)",

src/datamodel_code_generator/cli_options.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class CLIOptionMeta:
7272
"--encoding": CLIOptionMeta(name="--encoding", category=OptionCategory.BASE),
7373
"--schema-version": CLIOptionMeta(name="--schema-version", category=OptionCategory.BASE),
7474
"--schema-version-mode": CLIOptionMeta(name="--schema-version-mode", category=OptionCategory.BASE),
75+
"--external-ref-mapping": CLIOptionMeta(name="--external-ref-mapping", category=OptionCategory.BASE),
7576
# ==========================================================================
7677
# Model Customization
7778
# ==========================================================================

src/datamodel_code_generator/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ class Config:
208208
default_value_overrides: Mapping[str, Any] | None = None
209209
schema_version: str | None = None
210210
schema_version_mode: VersionMode | None = None
211+
external_ref_mapping: dict[str, str] | None = None
211212

212213

213214
class ParserConfig(BaseModel):
@@ -341,6 +342,7 @@ class Config:
341342
field_type_collision_strategy: FieldTypeCollisionStrategy | None = None
342343
target_pydantic_version: TargetPydanticVersion | None = None
343344
default_value_overrides: Mapping[str, Any] | None = None
345+
external_ref_mapping: dict[str, str] | None = None
344346

345347

346348
class GraphQLParserConfig(ParserConfig):

src/datamodel_code_generator/parser/jsonschema.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,17 @@ def __init__(
702702
self.raw_obj: dict[str, YamlValue] = {}
703703
self._root_id: Optional[str] = None # noqa: UP045
704704
self._root_id_base_path: Optional[str] = None # noqa: UP045
705+
706+
# Normalize external ref mapping paths to absolute for reliable matching
707+
raw_mapping = self.config.external_ref_mapping
708+
self._external_ref_mapping: dict[str, str] = {}
709+
if raw_mapping:
710+
for file_path, python_package in raw_mapping.items():
711+
if is_url(file_path):
712+
self._external_ref_mapping[file_path] = python_package
713+
else:
714+
abs_path = str((self.base_path / file_path).resolve())
715+
self._external_ref_mapping[abs_path] = python_package
705716
self.reserved_refs: defaultdict[tuple[str, ...], set[str]] = defaultdict(set)
706717
self._dynamic_anchor_index: dict[tuple[str, ...], dict[str, str]] = {}
707718
self._recursive_anchor_index: dict[tuple[str, ...], list[str]] = {}
@@ -1324,8 +1335,71 @@ def _get_data_type(type_: str, format__: str) -> DataType:
13241335
return self.data_type(data_types=[data_type], is_optional=True)
13251336
return data_type
13261337

1338+
def _resolve_external_ref_mapping(self, ref: str) -> tuple[str, str] | None:
1339+
"""Resolve a ref and return mapped package + fragment if configured."""
1340+
if not self._external_ref_mapping:
1341+
return None
1342+
1343+
def _resolve_lookup_key(file_part: str) -> str:
1344+
if is_url(file_part):
1345+
return file_part
1346+
path = Path(file_part)
1347+
if path.is_absolute():
1348+
return str(path.resolve())
1349+
base_path = self.model_resolver.current_base_path or self.base_path
1350+
return str((base_path / path).resolve())
1351+
1352+
candidate_refs = [ref]
1353+
resolved_ref = self.model_resolver.resolve_ref(ref)
1354+
if resolved_ref not in candidate_refs:
1355+
candidate_refs.append(resolved_ref)
1356+
1357+
for candidate_ref in candidate_refs:
1358+
if "#" not in candidate_ref:
1359+
continue
1360+
file_part, fragment = candidate_ref.split("#", maxsplit=1)
1361+
if not file_part:
1362+
continue
1363+
lookup_key = _resolve_lookup_key(file_part)
1364+
if python_package := self._external_ref_mapping.get(lookup_key):
1365+
return python_package, fragment
1366+
1367+
return None
1368+
1369+
def _check_external_ref_mapping(self, ref: str) -> DataType | None:
1370+
"""Check if a $ref matches an external ref mapping and return an import-based DataType.
1371+
1372+
Splits the ref into file path + JSON pointer fragment, resolves the file path
1373+
to absolute, and checks against the normalized mapping. If matched, constructs
1374+
an import from the mapped package and the class name extracted from the fragment.
1375+
1376+
Returns None if no mapping matches, allowing the caller to fall through
1377+
to normal ref resolution.
1378+
"""
1379+
mapped = self._resolve_external_ref_mapping(ref)
1380+
if mapped is None:
1381+
return None
1382+
python_package, fragment = mapped
1383+
1384+
# Extract and normalize class name from fragment to match generated model naming.
1385+
raw_name = unescape_json_pointer_segment(fragment.rstrip("/").rsplit("/", maxsplit=1)[-1])
1386+
if not raw_name:
1387+
return None
1388+
class_name = self.model_resolver.get_class_name(raw_name, unique=False).name
1389+
1390+
# Construct import — same pattern as x-python-import
1391+
full_path = f"{python_package}.{class_name}"
1392+
import_ = Import.from_full_path(full_path)
1393+
self.imports.append(import_)
1394+
return self.data_type.from_import(import_)
1395+
13271396
def get_ref_data_type(self, ref: str) -> DataType:
13281397
"""Get a data type from a reference string."""
1398+
# Check external ref mapping before loading the schema
1399+
mapped = self._check_external_ref_mapping(ref)
1400+
if mapped is not None:
1401+
return mapped
1402+
13291403
ref_schema = self._load_ref_schema_object(ref)
13301404
x_python_import = ref_schema.extras.get("x-python-import")
13311405
if isinstance(x_python_import, dict):
@@ -3803,6 +3877,12 @@ def _get_ref_body_from_remote(self, resolved_ref: str) -> dict[str, YamlValue]:
38033877

38043878
def resolve_ref(self, object_ref: str) -> Reference:
38053879
"""Resolve a reference by loading and parsing the referenced schema."""
3880+
# If the ref is mapped to an external package, mark as loaded and skip parsing
3881+
if self._resolve_external_ref_mapping(object_ref) is not None:
3882+
reference = self.model_resolver.add_ref(object_ref)
3883+
reference.loaded = True
3884+
return reference
3885+
38063886
reference = self.model_resolver.add_ref(object_ref)
38073887
if reference.loaded:
38083888
return reference

tests/data/expected/main/input_model/config_class.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ class GenerateConfig(TypedDict, closed=True):
252252
default_value_overrides: NotRequired[Mapping[str, Any] | None]
253253
schema_version: NotRequired[str | None]
254254
schema_version_mode: NotRequired[VersionMode | None]
255+
external_ref_mapping: NotRequired[dict[str, str] | None]
255256

256257

257258
class ValidatorDefinition(TypedDict):
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# generated by datamodel-codegen:
2+
# filename: api.yaml
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from datetime import datetime
8+
9+
from mypackage.shared.models import Error, User
10+
from pydantic import BaseModel
11+
12+
13+
class UserResponse(BaseModel):
14+
user: User
15+
request_id: str
16+
17+
18+
class ErrorResponse(BaseModel):
19+
error: Error
20+
timestamp: datetime
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# generated by datamodel-codegen:
2+
# filename: api_nested.yaml
3+
# timestamp: 2019-07-26T00:00:00+00:00
4+
5+
from __future__ import annotations
6+
7+
from mypackage.shared.models import User
8+
from pydantic import BaseModel
9+
10+
11+
class UserAlias(BaseModel):
12+
__root__: User
13+
14+
15+
class UserResponse(BaseModel):
16+
user: UserAlias

0 commit comments

Comments
 (0)