Skip to content

Commit e7afe6c

Browse files
authored
Add --check option for CI verification of generated files (#2585)
* Add --check option to verify generated files without modification * Refactor main function to include CLI completion support * Add tests for --check option in OpenAPI generation * Refactor directory comparison logic and add test for --check option ignoring __pycache__ directories * Add test for --check option handling InvalidClassNameError * Improve --check option handling and add test for invalid file format
1 parent 6dd9c28 commit e7afe6c

6 files changed

Lines changed: 379 additions & 9 deletions

File tree

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,9 @@ OpenAPI-only options:
535535
deprecated. it will be removed in future releases
536536

537537
General options:
538+
--check Verify generated files are up-to-date without modifying them. Exits
539+
with code 1 if differences found, 0 if up-to-date. Useful for CI to
540+
ensure generated code is committed.
538541
--debug show debug message (require "debug". `$ pip install ''datamodel-code-
539542
generator[debug]''`)
540543
--disable-warnings disable warnings

docs/index.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,9 @@ OpenAPI-only options:
527527
deprecated. it will be removed in future releases
528528

529529
General options:
530+
--check Verify generated files are up-to-date without modifying them. Exits
531+
with code 1 if differences found, 0 if up-to-date. Useful for CI to
532+
ensure generated code is committed.
530533
--debug show debug message (require "debug". `$ pip install ''datamodel-code-
531534
generator[debug]''`)
532535
--disable-warnings disable warnings

scripts/update_command_help_on_markdown.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import argparse
56
import io
67
import os
78
import re
@@ -63,12 +64,13 @@ def inject_help(markdown_text: str, help_text: str) -> str:
6364
def main() -> Exit:
6465
"""Update or validate command help in target markdown files."""
6566
help_text = get_help()
66-
arg_parser.add_argument(
67+
script_parser = argparse.ArgumentParser(description="Update command help in markdown files")
68+
script_parser.add_argument(
6769
"--check",
6870
action="store_true",
6971
help="Check if the file content is up to date without modifying",
7072
)
71-
args = arg_parser.parse_args()
73+
args = script_parser.parse_args()
7274
check: bool = args.check
7375

7476
for file_path in TARGET_MARKDOWN_FILES:

src/datamodel_code_generator/__main__.py

Lines changed: 136 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22

33
from __future__ import annotations
44

5+
import difflib
56
import json
67
import signal
78
import sys
9+
import tempfile
810
import warnings
911
from collections import defaultdict
1012
from collections.abc import Sequence # noqa: TC003 # pydantic needs it
@@ -59,6 +61,7 @@
5961

6062
# Options that should be excluded from pyproject.toml config generation
6163
EXCLUDED_CONFIG_OPTIONS: frozenset[str] = frozenset({
64+
"check",
6265
"generate_pyproject_config",
6366
"generate_cli_command",
6467
"version",
@@ -77,8 +80,9 @@ class Exit(IntEnum):
7780
"""Exit reasons."""
7881

7982
OK = 0
80-
ERROR = 1
81-
KeyboardInterrupt = 2
83+
DIFF = 1
84+
ERROR = 2
85+
KeyboardInterrupt = 3
8286

8387

8488
def sig_int_handler(_: int, __: Any) -> None: # pragma: no cover
@@ -337,6 +341,7 @@ def validate_root(cls, values: dict[str, Any]) -> dict[str, Any]: # noqa: N805
337341
input_file_type: InputFileType = InputFileType.Auto
338342
output_model_type: DataModelType = DataModelType.PydanticBaseModel
339343
output: Optional[Path] = None # noqa: UP045
344+
check: bool = False
340345
debug: bool = False
341346
disable_warnings: bool = False
342347
target_python_version: PythonVersion = PythonVersionMin
@@ -492,6 +497,79 @@ def generate_pyproject_config(args: Namespace) -> str:
492497
return "\n".join(lines) + "\n"
493498

494499

500+
def _normalize_line_endings(text: str) -> str:
501+
"""Normalize line endings to LF for cross-platform comparison."""
502+
return text.replace("\r\n", "\n")
503+
504+
505+
def _compare_single_file(
506+
generated_path: Path,
507+
actual_path: Path,
508+
encoding: str,
509+
) -> tuple[bool, list[str]]:
510+
"""Compare generated file content with existing file.
511+
512+
Returns:
513+
Tuple of (has_differences, diff_lines)
514+
- has_differences: True if files differ or actual file doesn't exist
515+
- diff_lines: List of diff lines for output
516+
"""
517+
generated_content = _normalize_line_endings(generated_path.read_text(encoding=encoding))
518+
519+
if not actual_path.exists():
520+
return True, [f"MISSING: {actual_path} (file does not exist but should be generated)"]
521+
522+
actual_content = _normalize_line_endings(actual_path.read_text(encoding=encoding))
523+
524+
if generated_content == actual_content:
525+
return False, []
526+
527+
diff_lines = list(
528+
difflib.unified_diff(
529+
actual_content.splitlines(keepends=True),
530+
generated_content.splitlines(keepends=True),
531+
fromfile=str(actual_path),
532+
tofile=f"{actual_path} (expected)",
533+
)
534+
)
535+
return True, diff_lines
536+
537+
538+
def _compare_directories(
539+
generated_dir: Path,
540+
actual_dir: Path,
541+
encoding: str,
542+
) -> tuple[list[str], list[str], list[str]]:
543+
"""Compare generated directory with existing directory."""
544+
diffs: list[str] = []
545+
546+
generated_files = {path.relative_to(generated_dir) for path in generated_dir.rglob("*.py")}
547+
548+
actual_files: set[Path] = set()
549+
if actual_dir.exists():
550+
for path in actual_dir.rglob("*.py"):
551+
if "__pycache__" not in path.parts:
552+
actual_files.add(path.relative_to(actual_dir))
553+
554+
missing_files = [str(rel_path) for rel_path in sorted(generated_files - actual_files)]
555+
extra_files = [str(rel_path) for rel_path in sorted(actual_files - generated_files)]
556+
557+
for rel_path in sorted(generated_files & actual_files):
558+
generated_content = _normalize_line_endings((generated_dir / rel_path).read_text(encoding=encoding))
559+
actual_content = _normalize_line_endings((actual_dir / rel_path).read_text(encoding=encoding))
560+
if generated_content != actual_content:
561+
diffs.extend(
562+
difflib.unified_diff(
563+
actual_content.splitlines(keepends=True),
564+
generated_content.splitlines(keepends=True),
565+
fromfile=str(rel_path),
566+
tofile=f"{rel_path} (expected)",
567+
)
568+
)
569+
570+
return diffs, missing_files, extra_files
571+
572+
495573
def _format_cli_value(value: str | list[str]) -> str:
496574
"""Format a value for CLI argument."""
497575
if isinstance(value, list):
@@ -522,9 +600,8 @@ def generate_cli_command(config: dict[str, TomlValue]) -> str:
522600
return " ".join(parts) + "\n"
523601

524602

525-
def main(args: Sequence[str] | None = None) -> Exit: # noqa: PLR0911, PLR0912, PLR0915
603+
def main(args: Sequence[str] | None = None) -> Exit: # noqa: PLR0911, PLR0912, PLR0914, PLR0915
526604
"""Execute datamodel code generation from command-line arguments."""
527-
# add cli completion support
528605
argcomplete.autocomplete(arg_parser)
529606

530607
if args is None: # pragma: no cover
@@ -571,6 +648,13 @@ def main(args: Sequence[str] | None = None) -> Exit: # noqa: PLR0911, PLR0912,
571648
arg_parser.print_help()
572649
return Exit.ERROR
573650

651+
if config.check and config.output is None:
652+
print( # noqa: T201
653+
"Error: --check cannot be used with stdout output (no --output specified)",
654+
file=sys.stderr,
655+
)
656+
return Exit.ERROR
657+
574658
if not is_supported_in_black(config.target_python_version): # pragma: no cover
575659
print( # noqa: T201
576660
f"Installed black doesn't support Python version {config.target_python_version.value}.\n"
@@ -642,11 +726,25 @@ def main(args: Sequence[str] | None = None) -> Exit: # noqa: PLR0911, PLR0912,
642726
)
643727
return Exit.ERROR
644728

729+
if config.check:
730+
config_output = cast("Path", config.output)
731+
is_directory_output = not config_output.suffix
732+
temp_context: tempfile.TemporaryDirectory[str] | None = tempfile.TemporaryDirectory()
733+
temp_dir = Path(temp_context.name)
734+
if is_directory_output:
735+
generate_output: Path | None = temp_dir / config_output.name
736+
else:
737+
generate_output = temp_dir / "output.py"
738+
else:
739+
temp_context = None
740+
generate_output = config.output
741+
is_directory_output = False
742+
645743
try:
646744
generate(
647745
input_=config.url or config.input or sys.stdin.read(),
648746
input_file_type=config.input_file_type,
649-
output=config.output,
747+
output=generate_output,
650748
output_model_type=config.output_model_type,
651749
target_python_version=config.target_python_version,
652750
base_class=config.base_class,
@@ -731,17 +829,48 @@ def main(args: Sequence[str] | None = None) -> Exit: # noqa: PLR0911, PLR0912,
731829
)
732830
except InvalidClassNameError as e:
733831
print(f"{e} You have to set `--class-name` option", file=sys.stderr) # noqa: T201
832+
if temp_context is not None:
833+
temp_context.cleanup()
734834
return Exit.ERROR
735835
except Error as e:
736836
print(str(e), file=sys.stderr) # noqa: T201
837+
if temp_context is not None:
838+
temp_context.cleanup()
737839
return Exit.ERROR
738840
except Exception: # noqa: BLE001
739841
import traceback # noqa: PLC0415
740842

741843
print(traceback.format_exc(), file=sys.stderr) # noqa: T201
844+
if temp_context is not None:
845+
temp_context.cleanup()
742846
return Exit.ERROR
743-
else:
744-
return Exit.OK
847+
848+
if config.check and config.output is not None and generate_output is not None:
849+
has_differences = False
850+
851+
if is_directory_output:
852+
diffs, missing_files, extra_files = _compare_directories(generate_output, config.output, config.encoding)
853+
if diffs:
854+
print("".join(diffs), end="") # noqa: T201
855+
has_differences = True
856+
for missing in missing_files:
857+
print(f"MISSING: {missing} (should be generated)") # noqa: T201
858+
has_differences = True
859+
for extra in extra_files:
860+
print(f"EXTRA: {extra} (no longer generated)") # noqa: T201
861+
has_differences = True
862+
else:
863+
diff_found, diff_lines = _compare_single_file(generate_output, config.output, config.encoding)
864+
if diff_found:
865+
print("".join(diff_lines), end="") # noqa: T201
866+
has_differences = True
867+
868+
if temp_context is not None: # pragma: no branch
869+
temp_context.cleanup()
870+
871+
return Exit.DIFF if has_differences else Exit.OK
872+
873+
return Exit.OK
745874

746875

747876
if __name__ == "__main__":

src/datamodel_code_generator/arguments.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,14 @@ def start_section(self, heading: str | None) -> None:
633633
# ======================================================================================
634634
# General options
635635
# ======================================================================================
636+
general_options.add_argument(
637+
"--check",
638+
action="store_true",
639+
default=None,
640+
help="Verify generated files are up-to-date without modifying them. "
641+
"Exits with code 1 if differences found, 0 if up-to-date. "
642+
"Useful for CI to ensure generated code is committed.",
643+
)
636644
general_options.add_argument(
637645
"--debug",
638646
help="show debug message (require \"debug\". `$ pip install 'datamodel-code-generator[debug]'`)",

0 commit comments

Comments
 (0)