From 967f327cd4e6b3906963b58b26a558d0012323d7 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 19 Apr 2026 16:37:10 +0200 Subject: [PATCH 01/16] Add pytask lock command --- .../_static/md/commands/build-options.md | 1 - .../_static/md/commands/command-list.md | 1 + docs/source/how_to_guides/portability.md | 8 +- docs/source/reference_guides/lockfile.md | 6 +- src/_pytask/build.py | 10 - src/_pytask/lock.py | 426 ++++++++ src/_pytask/lockfile.py | 42 +- src/_pytask/mark/__init__.py | 13 +- src/_pytask/parameters.py | 38 +- src/_pytask/pluginmanager.py | 1 + tests/test_lock_command.py | 993 ++++++++++++++++++ tests/test_lockfile.py | 31 - 12 files changed, 1507 insertions(+), 63 deletions(-) create mode 100644 src/_pytask/lock.py create mode 100644 tests/test_lock_command.py diff --git a/docs/source/_static/md/commands/build-options.md b/docs/source/_static/md/commands/build-options.md index 317f7f2a..3b8fe0e7 100644 --- a/docs/source/_static/md/commands/build-options.md +++ b/docs/source/_static/md/commands/build-options.md @@ -2,7 +2,6 @@ | ---------------------------------------------------------- | ------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | | -c, --config FILE | - | Path to configuration file. | | --capture [fd\|no\|sys\|tee-sys] | fd | Per task capturing method. | -| --clean-lockfile | false | Rewrite the lockfile with only currently collected tasks. | | --database-url TEXT | - | Url to the database. | | --debug-pytask | false | Trace all function calls in the plugin framework. | | --disable-warnings | false | Disables the summary for warnings. | diff --git a/docs/source/_static/md/commands/command-list.md b/docs/source/_static/md/commands/command-list.md index 093f3ad2..af659a97 100644 --- a/docs/source/_static/md/commands/command-list.md +++ b/docs/source/_static/md/commands/command-list.md @@ -4,5 +4,6 @@ | [`clean`](clean.md) | Clean the provided paths by removing files unknown to pytask. | | [`collect`](collect.md) | Collect tasks and report information about them. | | [`dag`](dag.md) | Create a visualization of the directed acyclic graph. | +| [`lock`](lock.md) | Inspect and update recorded task state in the lockfile. | | [`markers`](markers.md) | Show all registered markers. | | [`profile`](profile.md) | Show information about resource consumption. | diff --git a/docs/source/how_to_guides/portability.md b/docs/source/how_to_guides/portability.md index 6ffb23dd..b3b099fa 100644 --- a/docs/source/how_to_guides/portability.md +++ b/docs/source/how_to_guides/portability.md @@ -85,11 +85,11 @@ tasks run. If tasks are removed or renamed, their old entries remain as stale da are ignored. To clean up stale entries without deleting the file, run -[`pytask build --clean-lockfile`](../reference_guides/commands.md#pytask-build--clean-lockfile): +[`pytask lock clean`](../reference_guides/commands.md#pytask-lock-clean): ```console -$ pytask build --clean-lockfile +$ pytask lock clean ``` -This rewrites the lockfile after a successful build with only the currently collected -tasks and their current state values. +This rewrites the lockfile with only the currently collected tasks and their current +state values. diff --git a/docs/source/reference_guides/lockfile.md b/docs/source/reference_guides/lockfile.md index ea4ac1bc..dfb3c423 100644 --- a/docs/source/reference_guides/lockfile.md +++ b/docs/source/reference_guides/lockfile.md @@ -51,9 +51,9 @@ There are two portability concerns: ## Maintenance -Use [`pytask build --clean-lockfile`](commands.md#pytask-build--clean-lockfile) to -rewrite `pytask.lock` with only currently collected tasks. The rewrite happens after a -successful build and recomputes current state values without executing tasks again. +Use [`pytask lock clean`](commands.md#pytask-lock-clean) to rewrite `pytask.lock` with +only currently collected tasks. The command removes stale task entries without executing +tasks again. ## File Format Reference diff --git a/src/_pytask/build.py b/src/_pytask/build.py index 52ce5e17..4195ee32 100644 --- a/src/_pytask/build.py +++ b/src/_pytask/build.py @@ -72,7 +72,6 @@ def build( # noqa: PLR0913 debug_pytask: bool = False, disable_warnings: bool = False, dry_run: bool = False, - clean_lockfile: bool = False, editor_url_scheme: Literal["no_link", "file", "vscode", "pycharm"] # noqa: PYI051 | str = "file", explain: bool = False, @@ -132,8 +131,6 @@ def build( # noqa: PLR0913 Whether warnings should be disabled and not displayed. dry_run : bool, default=False Whether a dry-run should be performed that shows which tasks need to be rerun. - clean_lockfile : bool, default=False - Whether the lockfile should be rewritten to only include collected tasks. editor_url_scheme : Literal["no_link", "file", "vscode", "pycharm"] | str A URL scheme that allows task, node, and file names to become clickable links. explain : bool, default=False @@ -228,7 +225,6 @@ def build( # noqa: PLR0913 "debug_pytask": debug_pytask, "disable_warnings": disable_warnings, "dry_run": dry_run, - "clean_lockfile": clean_lockfile, "editor_url_scheme": editor_url_scheme, "explain": explain, "expression": expression, @@ -357,12 +353,6 @@ def build( # noqa: PLR0913 default=False, help="Execute a task even if it succeeded successfully before.", ) -@click.option( - "--clean-lockfile", - is_flag=True, - default=False, - help="Rewrite the lockfile with only currently collected tasks.", -) @click.option( "--explain", is_flag=True, diff --git a/src/_pytask/lock.py b/src/_pytask/lock.py new file mode 100644 index 00000000..d35e9e7c --- /dev/null +++ b/src/_pytask/lock.py @@ -0,0 +1,426 @@ +"""Implement commands to inspect and update the lockfile.""" + +from __future__ import annotations + +import sys +from dataclasses import dataclass +from itertools import chain +from typing import TYPE_CHECKING +from typing import Any +from typing import cast + +import click + +from _pytask.click import ColoredCommand +from _pytask.click import ColoredGroup +from _pytask.console import console +from _pytask.dag import create_dag +from _pytask.dag_utils import task_and_descending_tasks +from _pytask.dag_utils import task_and_preceding_tasks +from _pytask.exceptions import CollectionError +from _pytask.exceptions import ConfigurationError +from _pytask.exceptions import ExecutionError +from _pytask.exceptions import NodeNotFoundError +from _pytask.exceptions import ResolvingDependenciesError +from _pytask.lockfile import _build_task_entry +from _pytask.lockfile import _TaskEntry +from _pytask.lockfile import build_portable_task_id +from _pytask.node_protocols import PNode +from _pytask.node_protocols import PProvisionalNode +from _pytask.node_protocols import PTask +from _pytask.outcomes import ExitCode +from _pytask.pluginmanager import hookimpl +from _pytask.pluginmanager import storage +from _pytask.session import Session +from _pytask.traceback import Traceback + +if TYPE_CHECKING: + from collections.abc import Callable + + from _pytask.dag_graph import DAG + from _pytask.lockfile import LockfileState + + +@dataclass(slots=True) +class _PlannedChange: + kind: str + task_id: str + entry: _TaskEntry | None = None + + +def _validate_confirmation_options(raw_config: dict[str, Any]) -> None: + if raw_config["dry_run"] and raw_config["yes"]: + msg = "The options '--dry-run' and '--yes' are mutually exclusive." + raise click.UsageError(msg) + + +def _keyword_filter(tasks: list[PTask], expression: str) -> set[str]: + from _pytask.mark import Expression # noqa: PLC0415 + from _pytask.mark import KeywordMatcher # noqa: PLC0415 + from _pytask.mark import ParseError # noqa: PLC0415 + + try: + compiled = Expression.compile_(expression) + except ParseError as e: + msg = f"Wrong expression passed to '-k': {expression}: {e}" + raise ValueError(msg) from None + + return { + task.signature + for task in tasks + if compiled.evaluate(KeywordMatcher.from_task(task)) + } + + +def _marker_filter(tasks: list[PTask], expression: str) -> set[str]: + from _pytask.mark import Expression # noqa: PLC0415 + from _pytask.mark import MarkMatcher # noqa: PLC0415 + from _pytask.mark import ParseError # noqa: PLC0415 + + try: + compiled = Expression.compile_(expression) + except ParseError as e: + msg = f"Wrong expression passed to '-m': {expression}: {e}" + raise ValueError(msg) from None + + return { + task.signature + for task in tasks + if compiled.evaluate(MarkMatcher.from_task(task)) + } + + +def _expand_task_selection( + task_signatures: set[str], + dag: DAG, + *, + with_ancestors: bool, + with_descendants: bool, +) -> set[str]: + selected = set(task_signatures) + if with_ancestors: + selected |= set( + chain.from_iterable( + task_and_preceding_tasks(signature, dag) + for signature in task_signatures + ) + ) + if with_descendants: + selected |= set( + chain.from_iterable( + task_and_descending_tasks(signature, dag) + for signature in task_signatures + ) + ) + return selected + + +def _select_tasks(session: Session) -> list[PTask]: + selected = {task.signature for task in session.tasks} + + expression = session.config.get("expression") + if expression: + selected &= _keyword_filter(session.tasks, expression) + + marker_expression = session.config.get("marker_expression") + if marker_expression: + selected &= _marker_filter(session.tasks, marker_expression) + + selected = _expand_task_selection( + selected, + session.dag, + with_ancestors=session.config.get("with_ancestors", False), + with_descendants=session.config.get("with_descendants", False), + ) + return [task for task in session.tasks if task.signature in selected] + + +def _validate_task_for_accept(session: Session, task: PTask) -> None: + predecessors = set(session.dag.predecessors(task.signature)) + + for node_signature in chain( + predecessors, [task.signature], session.dag.successors(task.signature) + ): + node = session.dag.nodes[node_signature] + + if node_signature not in predecessors and isinstance(node, PProvisionalNode): + continue + + if isinstance(node, PProvisionalNode): + msg = ( + f"Task {task.name!r} still references provisional node " + f"{node.name!r} while accepting lockfile state." + ) + raise ExecutionError(msg) + + if not isinstance(node, (PTask, PNode)): + continue + + state = node.state() + if state is not None: + continue + + if node_signature in predecessors: + msg = f"{task.name!r} requires missing node {node.name!r}." + raise NodeNotFoundError(msg) + + if node_signature == task.signature: + msg = f"{task.name!r} has no state and cannot be accepted." + raise ExecutionError(msg) + + msg = f"{task.name!r} is missing product {node.name!r}." + raise NodeNotFoundError(msg) + + +def _plan_accept_changes(session: Session, tasks: list[PTask]) -> list[_PlannedChange]: + root = session.config["root"] + planned_changes = [] + + for task in tasks: + _validate_task_for_accept(session, task) + entry = _build_task_entry(session, task, root) + if entry is None: + task_id = build_portable_task_id(task, root) + msg = f"{task_id!r} has no state and cannot be accepted." + raise ExecutionError(msg) + + existing = session.config["lockfile_state"].get_task_entry(entry.id) + if existing != entry: + planned_changes.append( + _PlannedChange(kind="accept", task_id=entry.id, entry=entry) + ) + + return planned_changes + + +def _plan_reset_changes(session: Session, tasks: list[PTask]) -> list[_PlannedChange]: + root = session.config["root"] + planned_changes = [] + + for task in tasks: + task_id = build_portable_task_id(task, root) + if session.config["lockfile_state"].get_task_entry(task_id) is not None: + planned_changes.append(_PlannedChange(kind="reset", task_id=task_id)) + + return planned_changes + + +def _plan_clean_changes(session: Session) -> list[_PlannedChange]: + state: LockfileState = session.config["lockfile_state"] + current_task_ids = { + build_portable_task_id(task, session.config["root"]) for task in session.tasks + } + stale_ids = state.task_ids() - current_task_ids + return [ + _PlannedChange(kind="clean", task_id=task_id) for task_id in sorted(stale_ids) + ] + + +def _describe_change(change: _PlannedChange) -> str: + if change.kind == "accept": + return f"Accept recorded state for {change.task_id}" + return f"Remove recorded state for {change.task_id}" + + +def _apply_changes( + session: Session, planned_changes: list[_PlannedChange] +) -> list[_PlannedChange]: + if session.config["dry_run"]: + for change in planned_changes: + console.print(f"Would {_describe_change(change).lower()}.") + return planned_changes + + accepted = planned_changes + if not session.config["yes"]: + accepted = [] + for change in planned_changes: + prompt = f"{_describe_change(change)}?" + if click.confirm(prompt, default=False): + accepted.append(change) + + if not accepted: + return [] + + state: LockfileState = session.config["lockfile_state"] + entries = [change.entry for change in accepted if change.entry is not None] + if entries: + state.set_task_entries(entries) + + removed_ids = {change.task_id for change in accepted if change.entry is None} + if removed_ids: + state.remove_task_entries(removed_ids) + + state.flush() + + for change in accepted: + console.print(f"{_describe_change(change)}.") + + return accepted + + +def _run_lock_command( + raw_config: dict[str, Any], + *, + planner: Callable[[Session], list[_PlannedChange]] | None = None, + planner_with_tasks: Callable[[Session, list[PTask]], list[_PlannedChange]] + | None = None, + empty_message: str, +) -> int: + _validate_confirmation_options(raw_config) + pm = storage.get() + from _pytask.cli import DEFAULTS_FROM_CLI # noqa: PLC0415 + + raw_config = cast("dict[str, Any]", DEFAULTS_FROM_CLI) | raw_config + raw_config["command"] = "lock" + + try: + config = pm.hook.pytask_configure(pm=pm, raw_config=raw_config) + session = Session.from_config(config) + except (ConfigurationError, Exception): # noqa: BLE001 + console.print(Traceback(sys.exc_info())) + session = Session(exit_code=ExitCode.CONFIGURATION_FAILED) + else: + try: + session.hook.pytask_log_session_header(session=session) + session.hook.pytask_collect(session=session) + session.dag = create_dag(session=session) + + if planner_with_tasks is not None: + tasks = _select_tasks(session) + planned_changes = planner_with_tasks(session, tasks) + else: + assert planner is not None + planned_changes = planner(session) + + if planned_changes: + _apply_changes(session, planned_changes) + else: + console.print() + console.print(empty_message) + + console.print() + console.rule(style="default") + except CollectionError: + session.exit_code = ExitCode.COLLECTION_FAILED + console.rule(style="failed") + except ResolvingDependenciesError: + session.exit_code = ExitCode.DAG_FAILED + console.rule(style="failed") + except Exception: # noqa: BLE001 + console.print(Traceback(sys.exc_info())) + console.rule(style="failed") + session.exit_code = ExitCode.FAILED + + if hasattr(session.hook, "pytask_unconfigure"): + session.hook.pytask_unconfigure(session=session) + return session.exit_code + + +@hookimpl(tryfirst=True) +def pytask_extend_command_line_interface(cli: click.Group) -> None: + """Extend the command line interface.""" + cli.add_command(lock) + + +@click.group(cls=ColoredGroup) +def lock() -> None: + """Inspect and update recorded task state in the lockfile.""" + + +@lock.command(cls=ColoredCommand) +@click.option( + "--with-ancestors", + is_flag=True, + default=False, + help="Also include preceding tasks of the selected tasks.", +) +@click.option( + "--with-descendants", + is_flag=True, + default=False, + help="Also include descending tasks of the selected tasks.", +) +@click.option( + "--dry-run", + is_flag=True, + default=False, + help="Show which recorded states would be updated without writing changes.", +) +@click.option( + "-y", + "--yes", + is_flag=True, + default=False, + help="Apply the changes without prompting for confirmation.", +) +def accept(**raw_config: Any) -> None: + """Accept the current state for selected tasks without executing them.""" + sys.exit( + _run_lock_command( + raw_config, + planner_with_tasks=_plan_accept_changes, + empty_message="No lockfile entries need updating.", + ) + ) + + +@lock.command(cls=ColoredCommand) +@click.option( + "--with-ancestors", + is_flag=True, + default=False, + help="Also include preceding tasks of the selected tasks.", +) +@click.option( + "--with-descendants", + is_flag=True, + default=False, + help="Also include descending tasks of the selected tasks.", +) +@click.option( + "--dry-run", + is_flag=True, + default=False, + help="Show which recorded states would be removed without writing changes.", +) +@click.option( + "-y", + "--yes", + is_flag=True, + default=False, + help="Apply the changes without prompting for confirmation.", +) +def reset(**raw_config: Any) -> None: + """Remove recorded state for selected tasks.""" + sys.exit( + _run_lock_command( + raw_config, + planner_with_tasks=_plan_reset_changes, + empty_message="No lockfile entries need removing.", + ) + ) + + +@lock.command(cls=ColoredCommand) +@click.option( + "--dry-run", + is_flag=True, + default=False, + help="Show which stale entries would be removed without writing changes.", +) +@click.option( + "-y", + "--yes", + is_flag=True, + default=False, + help="Apply the changes without prompting for confirmation.", +) +def clean(**raw_config: Any) -> None: + """Remove stale lockfile entries which no longer correspond to collected tasks.""" + sys.exit( + _run_lock_command( + raw_config, + planner=_plan_clean_changes, + empty_message="There are no stale lockfile entries.", + ) + ) diff --git a/src/_pytask/lockfile.py b/src/_pytask/lockfile.py index ea8ba3d6..080fdaa2 100644 --- a/src/_pytask/lockfile.py +++ b/src/_pytask/lockfile.py @@ -61,11 +61,11 @@ class _JournalEntry(msgspec.Struct): def _should_initialize_lockfile_state(command: str | None) -> bool: - return command in (None, "build") + return command in (None, "build", "lock") def _should_validate_lockfile_ids(command: str | None) -> bool: - return command in (None, "build", "collect") + return command in (None, "build", "collect", "lock") def _encode_node_path(path: tuple[str | int, ...]) -> str: @@ -365,6 +365,17 @@ def get_task_entry(self, task_id: str) -> _TaskEntry | None: def get_node_state(self, task_id: str, node_id: str) -> str | None: return self._node_index.get(task_id, {}).get(node_id) + def task_ids(self) -> set[str]: + return set(self._task_index) + + def _update_from_task_index(self) -> None: + self.lockfile = _Lockfile( + lock_version=CURRENT_LOCKFILE_VERSION, + task=list(self._task_index.values()), + ) + self._rebuild_indexes() + self._dirty = True + def update_task(self, session: Session, task: PTask) -> None: entry = _build_task_entry(session, task, self.root) if entry is None: @@ -390,6 +401,28 @@ def update_task(self, session: Session, task: PTask) -> None: ) self._dirty = True + def set_task_entries(self, entries: list[_TaskEntry]) -> list[str]: + changed = [] + for entry in entries: + existing = self._task_index.get(entry.id) + if existing == entry: + continue + self._task_index[entry.id] = entry + changed.append(entry.id) + if changed: + self._update_from_task_index() + return changed + + def remove_task_entries(self, task_ids: set[str]) -> list[str]: + removed = [] + for task_id in task_ids: + if task_id in self._task_index: + del self._task_index[task_id] + removed.append(task_id) + if removed: + self._update_from_task_index() + return removed + def rebuild_from_session(self, session: Session) -> None: if session.dag is None: return @@ -451,7 +484,4 @@ def pytask_unconfigure(session: Session) -> None: lockfile_state = session.config.get("lockfile_state") if lockfile_state is None: return - if session.config.get("clean_lockfile"): - lockfile_state.rebuild_from_session(session) - else: - lockfile_state.flush() + lockfile_state.flush() diff --git a/src/_pytask/mark/__init__.py b/src/_pytask/mark/__init__.py index 03e23dd9..8067f31b 100644 --- a/src/_pytask/mark/__init__.py +++ b/src/_pytask/mark/__init__.py @@ -6,6 +6,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING from typing import Any +from typing import cast import click from rich.table import Table @@ -74,6 +75,13 @@ def markers(**raw_config: Any) -> NoReturn: sys.exit(session.exit_code) +def _get_command(cli: click.Group, name: str) -> click.Command: + command: click.Command = cli + for part in name.split(): + command = cast("click.Group", command).commands[part] + return command + + @hookimpl def pytask_extend_command_line_interface(cli: click.Group) -> None: """Add marker related options.""" @@ -101,8 +109,9 @@ def pytask_extend_command_line_interface(cli: click.Group) -> None: default=None, ), ] - for command in ("build", "clean", "collect"): - cli.commands[command].params.extend(additional_build_parameters) + for command in ("build", "clean", "collect", "lock accept", "lock reset"): + target = _get_command(cli, command) + target.params.extend(additional_build_parameters) @hookimpl diff --git a/src/_pytask/parameters.py b/src/_pytask/parameters.py index ae38827e..22bd3af2 100644 --- a/src/_pytask/parameters.py +++ b/src/_pytask/parameters.py @@ -5,6 +5,7 @@ import importlib.util from pathlib import Path from typing import TYPE_CHECKING +from typing import cast import click from click import Context @@ -179,16 +180,41 @@ def pytask_add_hooks(pm: PluginManager) -> None: ) +def _get_command(cli: click.Group, name: str) -> click.Command: + command: click.Command = cli + for part in name.split(): + command = cast("click.Group", command).commands[part] + return command + + @hookimpl(trylast=True) def pytask_extend_command_line_interface(cli: click.Group) -> None: """Register general markers.""" for command in ("build", "clean", "collect", "dag", "profile"): cli.commands[command].params.extend((_DATABASE_URL_OPTION,)) - for command in ("build", "clean", "collect", "dag", "markers", "profile"): - cli.commands[command].params.extend( - (_CONFIG_OPTION, _HOOK_MODULE_OPTION, _PATH_ARGUMENT) - ) - for command in ("build", "clean", "collect", "profile"): - cli.commands[command].params.extend([_IGNORE_OPTION, _EDITOR_URL_SCHEME_OPTION]) + for command in ( + "build", + "clean", + "collect", + "dag", + "lock accept", + "lock clean", + "lock reset", + "markers", + "profile", + ): + target = _get_command(cli, command) + target.params.extend((_CONFIG_OPTION, _HOOK_MODULE_OPTION, _PATH_ARGUMENT)) + for command in ( + "build", + "clean", + "collect", + "lock accept", + "lock clean", + "lock reset", + "profile", + ): + target = _get_command(cli, command) + target.params.extend([_IGNORE_OPTION, _EDITOR_URL_SCHEME_OPTION]) for command in ("build",): cli.commands[command].params.append(_VERBOSE_OPTION) diff --git a/src/_pytask/pluginmanager.py b/src/_pytask/pluginmanager.py index 774de974..025f8698 100644 --- a/src/_pytask/pluginmanager.py +++ b/src/_pytask/pluginmanager.py @@ -53,6 +53,7 @@ def pytask_add_hooks(pm: PluginManager) -> None: "_pytask.provisional", "_pytask.execute", "_pytask.live", + "_pytask.lock", "_pytask.lockfile", "_pytask.logging", "_pytask.mark", diff --git a/tests/test_lock_command.py b/tests/test_lock_command.py new file mode 100644 index 00000000..1c09c555 --- /dev/null +++ b/tests/test_lock_command.py @@ -0,0 +1,993 @@ +from __future__ import annotations + +import textwrap + +import pytest + +from _pytask.lockfile import read_lockfile +from pytask import ExitCode +from pytask import build +from pytask import cli + + +def _write_chain_project(tmp_path): + tmp_path.joinpath("task_upstream.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + + def task_upstream(produces=Path("up.txt")): + produces.write_text("up") + """ + ) + ) + tmp_path.joinpath("task_downstream.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + + def task_downstream(depends_on=Path("up.txt"), produces=Path("down.txt")): + produces.write_text(depends_on.read_text() + "down") + """ + ) + ) + + +def _write_marked_chain_project(tmp_path): + tmp_path.joinpath("task_upstream.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + + def task_upstream(produces=Path("up.txt")): + produces.write_text("up") + """ + ) + ) + tmp_path.joinpath("task_downstream.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + import pytask + + + @pytask.mark.try_first + def task_downstream(depends_on=Path("up.txt"), produces=Path("down.txt")): + produces.write_text(depends_on.read_text() + "down") + """ + ) + ) + + +def _task_ids(tmp_path): + lockfile = read_lockfile(tmp_path / "pytask.lock") + assert lockfile is not None + return {entry.id for entry in lockfile.task} + + +def _task_state_by_suffix(tmp_path, suffix): + lockfile = read_lockfile(tmp_path / "pytask.lock") + assert lockfile is not None + for entry in lockfile.task: + if entry.id.endswith(suffix): + return entry.state + msg = f"Could not find lockfile entry ending with {suffix!r}." + raise AssertionError(msg) + + +def _lockfile_text(tmp_path): + return (tmp_path / "pytask.lock").read_text() + + +def _task_by_suffix(session, suffix): + for task in session.tasks: + if task.name.endswith(suffix): + return task + msg = f"Could not find collected task ending with {suffix!r}." + raise AssertionError(msg) + + +def test_lock_help_lists_subcommands(runner): + result = runner.invoke(cli, ["lock", "--help"]) + + assert result.exit_code == ExitCode.OK + assert "accept" in result.output + assert "reset" in result.output + assert "clean" in result.output + + +def test_build_help_no_longer_lists_clean_lockfile(runner): + result = runner.invoke(cli, ["build", "--help"]) + + assert result.exit_code == ExitCode.OK + assert "--clean-lockfile" not in result.output + + +def test_lock_accept_creates_lockfile_without_executing_tasks(runner, tmp_path): + source = """ + from pathlib import Path + + + def task_example(depends_on=Path("in.txt"), produces=Path("out.txt")): + raise RuntimeError("should not execute") + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + tmp_path.joinpath("in.txt").write_text("data") + tmp_path.joinpath("out.txt").write_text("data") + + result = runner.invoke(cli, ["lock", "accept", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert (tmp_path / "pytask.lock").exists() + assert "should not execute" not in result.output + + +def test_lock_accept_can_include_ancestors(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + upstream = tmp_path / "task_upstream.py" + downstream = tmp_path / "task_downstream.py" + upstream_before = _task_state_by_suffix(tmp_path, "task_upstream.py::task_upstream") + downstream_before = _task_state_by_suffix( + tmp_path, "task_downstream.py::task_downstream" + ) + + upstream.write_text(upstream.read_text() + "\n# changed upstream\n") + downstream.write_text(downstream.read_text() + "\n# changed downstream\n") + + result = runner.invoke( + cli, + [ + "lock", + "accept", + "-k", + "downstream", + "--with-ancestors", + "--yes", + tmp_path.as_posix(), + ], + ) + + assert result.exit_code == ExitCode.OK + assert ( + _task_state_by_suffix(tmp_path, "task_upstream.py::task_upstream") + != upstream_before + ) + assert ( + _task_state_by_suffix(tmp_path, "task_downstream.py::task_downstream") + != downstream_before + ) + + +def test_lock_accept_can_include_descendants(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + upstream = tmp_path / "task_upstream.py" + downstream = tmp_path / "task_downstream.py" + upstream_before = _task_state_by_suffix(tmp_path, "task_upstream.py::task_upstream") + downstream_before = _task_state_by_suffix( + tmp_path, "task_downstream.py::task_downstream" + ) + + upstream.write_text(upstream.read_text() + "\n# changed upstream\n") + downstream.write_text(downstream.read_text() + "\n# changed downstream\n") + + result = runner.invoke( + cli, + [ + "lock", + "accept", + "-k", + "upstream", + "--with-descendants", + "--yes", + tmp_path.as_posix(), + ], + ) + + assert result.exit_code == ExitCode.OK + assert ( + _task_state_by_suffix(tmp_path, "task_upstream.py::task_upstream") + != upstream_before + ) + assert ( + _task_state_by_suffix(tmp_path, "task_downstream.py::task_downstream") + != downstream_before + ) + + +def test_lock_accept_updates_selected_task(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + before_upstream = _task_state_by_suffix(tmp_path, "task_upstream.py::task_upstream") + before_downstream = _task_state_by_suffix( + tmp_path, "task_downstream.py::task_downstream" + ) + + downstream = tmp_path / "task_downstream.py" + downstream.write_text(downstream.read_text() + "\n# changed without rerunning\n") + + result = runner.invoke( + cli, ["lock", "accept", "-k", "downstream", "--yes", tmp_path.as_posix()] + ) + + assert result.exit_code == ExitCode.OK + assert ( + _task_state_by_suffix(tmp_path, "task_upstream.py::task_upstream") + == before_upstream + ) + assert ( + _task_state_by_suffix(tmp_path, "task_downstream.py::task_downstream") + != before_downstream + ) + + +def test_lock_accept_uses_intersection_of_keyword_and_marker_selection( + runner, tmp_path +): + _write_marked_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + before_upstream = _task_state_by_suffix(tmp_path, "task_upstream.py::task_upstream") + before_downstream = _task_state_by_suffix( + tmp_path, "task_downstream.py::task_downstream" + ) + + downstream = tmp_path / "task_downstream.py" + downstream.write_text(downstream.read_text() + "\n# changed downstream\n") + + result = runner.invoke( + cli, + [ + "lock", + "accept", + "-k", + "downstream", + "-m", + "try_first", + "--yes", + tmp_path.as_posix(), + ], + ) + + assert result.exit_code == ExitCode.OK + assert ( + _task_state_by_suffix(tmp_path, "task_upstream.py::task_upstream") + == before_upstream + ) + assert ( + _task_state_by_suffix(tmp_path, "task_downstream.py::task_downstream") + != before_downstream + ) + + +def test_lock_accept_no_matching_selection_is_a_no_op(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + before = _lockfile_text(tmp_path) + + result = runner.invoke( + cli, ["lock", "accept", "-k", "missing", "--yes", tmp_path.as_posix()] + ) + + assert result.exit_code == ExitCode.OK + assert "No lockfile entries need updating." in result.output + assert _lockfile_text(tmp_path) == before + + +def test_lock_accept_interactive_only_applies_confirmed_changes(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + upstream = tmp_path / "task_upstream.py" + downstream = tmp_path / "task_downstream.py" + before_upstream = _task_state_by_suffix(tmp_path, "task_upstream.py::task_upstream") + before_downstream = _task_state_by_suffix( + tmp_path, "task_downstream.py::task_downstream" + ) + + upstream.write_text(upstream.read_text() + "\n# changed upstream\n") + downstream.write_text(downstream.read_text() + "\n# changed downstream\n") + + result = runner.invoke( + cli, + [ + "lock", + "accept", + "-k", + "upstream", + "--with-descendants", + tmp_path.as_posix(), + ], + input="y\nn\n", + ) + + assert result.exit_code == ExitCode.OK + upstream_changed = ( + _task_state_by_suffix(tmp_path, "task_upstream.py::task_upstream") + != before_upstream + ) + downstream_changed = ( + _task_state_by_suffix(tmp_path, "task_downstream.py::task_downstream") + != before_downstream + ) + assert upstream_changed ^ downstream_changed + + +def test_lock_accept_current_task_is_a_no_op_without_rewrite(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + lockfile = tmp_path / "pytask.lock" + before_text = lockfile.read_text() + before_mtime = lockfile.stat().st_mtime_ns + + result = runner.invoke( + cli, ["lock", "accept", "-k", "downstream", "--yes", tmp_path.as_posix()] + ) + + assert result.exit_code == ExitCode.OK + assert "No lockfile entries need updating." in result.output + assert lockfile.read_text() == before_text + assert lockfile.stat().st_mtime_ns == before_mtime + + +def test_lock_accept_fails_for_missing_product(runner, tmp_path): + source = """ + from pathlib import Path + + + def task_example(depends_on=Path("in.txt"), produces=Path("out.txt")): + raise RuntimeError("should not execute") + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + tmp_path.joinpath("in.txt").write_text("data") + + result = runner.invoke(cli, ["lock", "accept", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.FAILED + assert "missing product" in result.output + + +def test_lock_accept_fails_for_missing_dependency(runner, tmp_path): + source = """ + from pathlib import Path + + + def task_example(depends_on=Path("in.txt"), produces=Path("out.txt")): + raise RuntimeError("should not execute") + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + tmp_path.joinpath("out.txt").write_text("data") + + result = runner.invoke(cli, ["lock", "accept", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.FAILED + assert "requires missing node" in result.output + + +def test_lock_accept_fails_when_task_state_is_missing(runner, tmp_path): + source = """ + from dataclasses import dataclass, field + from pathlib import Path + from typing import Any + + from pytask import PathNode + + @dataclass(kw_only=True) + class CustomTask: + name: str + function: Any + depends_on: dict[str, Any] = field(default_factory=dict) + produces: dict[str, Any] = field(default_factory=dict) + markers: list[Any] = field(default_factory=list) + report_sections: list[tuple[str, str, str]] = field(default_factory=list) + attributes: dict[Any, Any] = field(default_factory=dict) + + @property + def signature(self): + return "custom-signature" + + def state(self): + return None + + def execute(self, **kwargs): + return self.function(**kwargs) + + def func(path): raise RuntimeError("should not execute") + + task_create_file = CustomTask( + name="task_custom", + function=func, + produces={"path": PathNode(path=Path(__file__).parent / "out.txt")}, + ) + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + tmp_path.joinpath("out.txt").write_text("done") + + result = runner.invoke(cli, ["lock", "accept", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.FAILED + assert "has no state and cannot be accepted" in result.output + + +def test_lock_accept_works_for_task_without_path_via_cli(runner, tmp_path): + source = """ + from pathlib import Path + + from pytask import PathNode, TaskWithoutPath + + def func(path): raise RuntimeError("should not execute") + + task_create_file = TaskWithoutPath( + name="task_without_path", + function=func, + produces={"path": PathNode(path=Path(__file__).parent / "out.txt")}, + ) + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + tmp_path.joinpath("out.txt").write_text("done") + + result = runner.invoke(cli, ["lock", "accept", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert _task_ids(tmp_path) == {"task_without_path"} + + +def test_lock_accept_records_custom_node_state(runner, tmp_path): + source = """ + from dataclasses import dataclass, field + from pathlib import Path + from typing import Any, Annotated + + from pytask import Product + + @dataclass + class CustomNode: + name: str + filepath: Path + signature: str + attributes: dict[Any, Any] = field(default_factory=dict) + + def state(self): + if not self.filepath.exists(): + return None + return self.filepath.read_text() + + def load(self, is_product=False): + return self if is_product else self.filepath.read_text() + + def save(self, value): + self.filepath.write_text(value) + + def task_example( + dependency=CustomNode( + name="custom_dependency", + filepath=Path(__file__).parent / "in.txt", + signature="signature-a", + ), + product: Annotated[CustomNode, Product] = CustomNode( + name="custom_product", + filepath=Path(__file__).parent / "out.txt", + signature="signature-b", + ), + ): + raise RuntimeError("should not execute") + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + tmp_path.joinpath("in.txt").write_text("hello") + tmp_path.joinpath("out.txt").write_text("HELLO") + + result = runner.invoke(cli, ["lock", "accept", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + lockfile = read_lockfile(tmp_path / "pytask.lock") + assert lockfile is not None + entry = lockfile.task[0] + assert "custom_dependency" in entry.depends_on + assert "custom_product" in entry.produces + + +def test_lock_accept_fails_with_provisional_dependencies(runner, tmp_path): + source = """ + from typing import Annotated + from pathlib import Path + + from pytask import DirectoryNode + + def task_example(paths=DirectoryNode(pattern="*.txt")) -> Annotated[str, Path("out.txt")]: + raise RuntimeError("should not execute") + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + tmp_path.joinpath("a.txt").write_text("a") + + result = runner.invoke(cli, ["lock", "accept", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.FAILED + assert "accepting lockfile state" in result.output + + +def test_lock_reset_does_not_include_ancestors_by_default(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + result = runner.invoke( + cli, ["lock", "reset", "-k", "downstream", "--yes", tmp_path.as_posix()] + ) + + assert result.exit_code == ExitCode.OK + assert _task_ids(tmp_path) == {"task_upstream.py::task_upstream"} + + +def test_lock_reset_can_include_ancestors(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + result = runner.invoke( + cli, + [ + "lock", + "reset", + "-k", + "downstream", + "--with-ancestors", + "--yes", + tmp_path.as_posix(), + ], + ) + + assert result.exit_code == ExitCode.OK + assert _task_ids(tmp_path) == set() + + +def test_lock_reset_can_include_descendants(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + result = runner.invoke( + cli, + [ + "lock", + "reset", + "-k", + "upstream", + "--with-descendants", + "--yes", + tmp_path.as_posix(), + ], + ) + + assert result.exit_code == ExitCode.OK + assert _task_ids(tmp_path) == set() + + +def test_lock_reset_uses_intersection_of_keyword_and_marker_selection(runner, tmp_path): + _write_marked_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + result = runner.invoke( + cli, + [ + "lock", + "reset", + "-k", + "downstream", + "-m", + "try_first", + "--yes", + tmp_path.as_posix(), + ], + ) + + assert result.exit_code == ExitCode.OK + assert _task_ids(tmp_path) == {"task_upstream.py::task_upstream"} + + +def test_lock_reset_no_matching_selection_is_a_no_op(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + before = _lockfile_text(tmp_path) + + result = runner.invoke( + cli, ["lock", "reset", "-k", "missing", "--yes", tmp_path.as_posix()] + ) + + assert result.exit_code == ExitCode.OK + assert "No lockfile entries need removing." in result.output + assert _lockfile_text(tmp_path) == before + + +def test_lock_reset_when_selected_task_is_absent_is_a_no_op(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + result = runner.invoke( + cli, ["lock", "reset", "-k", "downstream", "--yes", tmp_path.as_posix()] + ) + assert result.exit_code == ExitCode.OK + + before = _lockfile_text(tmp_path) + result = runner.invoke( + cli, ["lock", "reset", "-k", "downstream", "--yes", tmp_path.as_posix()] + ) + + assert result.exit_code == ExitCode.OK + assert "No lockfile entries need removing." in result.output + assert _lockfile_text(tmp_path) == before + + +def test_lock_reset_interactive_only_applies_confirmed_changes(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + result = runner.invoke( + cli, + [ + "lock", + "reset", + "-k", + "upstream", + "--with-descendants", + tmp_path.as_posix(), + ], + input="y\nn\n", + ) + + assert result.exit_code == ExitCode.OK + assert len(_task_ids(tmp_path)) == 1 + + +def test_lock_reset_dry_run_does_not_modify_lockfile(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + before = _lockfile_text(tmp_path) + + result = runner.invoke( + cli, ["lock", "reset", "-k", "downstream", "--dry-run", tmp_path.as_posix()] + ) + + assert result.exit_code == ExitCode.OK + assert _lockfile_text(tmp_path) == before + + +def test_lock_reset_followed_by_build_reconsiders_task(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + result = runner.invoke( + cli, ["lock", "reset", "-k", "downstream", "--yes", tmp_path.as_posix()] + ) + assert result.exit_code == ExitCode.OK + + result = runner.invoke(cli, [tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert "1 Succeeded" in result.output + assert "1 Skipped because unchanged" in result.output + + +def test_lock_clean_removes_stale_entries(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + tmp_path.joinpath("task_downstream.py").unlink() + + result = runner.invoke(cli, ["lock", "clean", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert _task_ids(tmp_path) == {"task_upstream.py::task_upstream"} + + +def test_lock_clean_dry_run_is_read_only(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + tmp_path.joinpath("task_downstream.py").unlink() + before = _lockfile_text(tmp_path) + + result = runner.invoke(cli, ["lock", "clean", "--dry-run", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert ( + "Would remove recorded state for task_downstream.py::task_downstream." + in result.output + ) + assert _lockfile_text(tmp_path) == before + + +def test_lock_clean_interactive_only_applies_confirmed_changes(runner, tmp_path): + tmp_path.joinpath("task_alpha.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + + def task_alpha(produces=Path("alpha.txt")): + produces.write_text("alpha") + """ + ) + ) + tmp_path.joinpath("task_beta.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + + def task_beta(produces=Path("beta.txt")): + produces.write_text("beta") + """ + ) + ) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + tmp_path.joinpath("task_alpha.py").unlink() + tmp_path.joinpath("task_beta.py").unlink() + + result = runner.invoke( + cli, + ["lock", "clean", tmp_path.as_posix()], + input="y\nn\n", + ) + + assert result.exit_code == ExitCode.OK + assert len(_task_ids(tmp_path)) == 1 + + +def test_lock_clean_reports_when_no_stale_entries_exist(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + result = runner.invoke(cli, ["lock", "clean", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert "There are no stale lockfile entries." in result.output + + +def test_lock_clean_on_fresh_project_without_lockfile_is_harmless(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, ["lock", "clean", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert "There are no stale lockfile entries." in result.output + assert not (tmp_path / "pytask.lock").exists() + + +def test_lock_clean_removes_multiple_stale_entries_without_adding_new_ones( + runner, tmp_path +): + tmp_path.joinpath("task_alpha.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + + def task_alpha(produces=Path("alpha.txt")): + produces.write_text("alpha") + """ + ) + ) + tmp_path.joinpath("task_beta.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + + def task_beta(produces=Path("beta.txt")): + produces.write_text("beta") + """ + ) + ) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + tmp_path.joinpath("task_alpha.py").unlink() + tmp_path.joinpath("task_beta.py").unlink() + tmp_path.joinpath("task_gamma.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + + def task_gamma(produces=Path("gamma.txt")): + produces.write_text("gamma") + """ + ) + ) + + result = runner.invoke(cli, ["lock", "clean", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert _task_ids(tmp_path) == set() + + +def test_lock_accept_followed_by_build_skips_changed_task(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + downstream = tmp_path / "task_downstream.py" + downstream.write_text(downstream.read_text() + "\n# changed downstream\n") + + result = runner.invoke( + cli, ["lock", "accept", "-k", "downstream", "--yes", tmp_path.as_posix()] + ) + assert result.exit_code == ExitCode.OK + + result = runner.invoke(cli, [tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert "2 Skipped because unchanged" in result.output + + +@pytest.mark.parametrize( + ("content", "message"), + [ + ("{not toml", "Lockfile has invalid format"), + ('lock-version = "0.9"\ntask = []\n', "Unsupported lock-version"), + ('lock-version = "9.0"\ntask = []\n', "Unsupported lock-version"), + ], +) +def test_lock_commands_fail_for_invalid_lockfiles(runner, tmp_path, content, message): + _write_chain_project(tmp_path) + tmp_path.joinpath("pytask.lock").write_text(content) + + result = runner.invoke(cli, ["lock", "clean", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.CONFIGURATION_FAILED + assert message in result.output + + +def test_lock_accept_on_database_only_project_creates_lockfile(runner, tmp_path): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert (tmp_path / ".pytask" / "pytask.sqlite3").exists() + + (tmp_path / "pytask.lock").unlink() + + downstream = tmp_path / "task_downstream.py" + downstream.write_text(downstream.read_text() + "\n# changed downstream\n") + + result = runner.invoke( + cli, ["lock", "accept", "-k", "downstream", "--yes", tmp_path.as_posix()] + ) + + assert result.exit_code == ExitCode.OK + assert (tmp_path / "pytask.lock").exists() + + +@pytest.mark.parametrize("subcommand", ["accept", "reset", "clean"]) +def test_lock_rejects_dry_run_and_yes_together(runner, tmp_path, subcommand): + _write_chain_project(tmp_path) + + args = ["lock", subcommand, "--dry-run", "--yes", tmp_path.as_posix()] + result = runner.invoke(cli, args) + + assert result.exit_code == 2 + assert "mutually exclusive" in result.output + + +@pytest.mark.parametrize("subcommand", ["accept", "reset", "clean"]) +def test_lock_commands_replay_journal_before_applying_changes( + runner, tmp_path, subcommand +): + _write_chain_project(tmp_path) + + session = build(paths=tmp_path) + assert session.exit_code == ExitCode.OK + + lockfile_state = session.config["lockfile_state"] + assert lockfile_state is not None + + downstream = tmp_path / "task_downstream.py" + downstream.write_text(downstream.read_text() + "\n# journal change\n") + downstream_task = _task_by_suffix(session, "task_downstream.py::task_downstream") + lockfile_state.update_task(session, downstream_task) + + journal_path = (tmp_path / "pytask.lock").with_suffix(".lock.journal") + assert journal_path.exists() + + if subcommand == "accept": + downstream.write_text(downstream.read_text() + "\n# current change\n") + args = ["lock", "accept", "-k", "downstream", "--yes", tmp_path.as_posix()] + elif subcommand == "reset": + args = ["lock", "reset", "-k", "downstream", "--yes", tmp_path.as_posix()] + else: + tmp_path.joinpath("task_downstream.py").unlink() + args = ["lock", "clean", "--yes", tmp_path.as_posix()] + + result = runner.invoke(cli, args) + + assert result.exit_code == ExitCode.OK + assert not journal_path.exists() + + +def test_lock_command_fails_for_ambiguous_lockfile_ids(runner, tmp_path): + source = """ + from dataclasses import dataclass, field + from pathlib import Path + from typing import Any + + @dataclass + class CustomNode: + name: str + value: str + signature: str + attributes: dict[Any, Any] = field(default_factory=dict) + + def state(self): + return self.value + + def load(self, is_product=False): + return self.value + + def save(self, value): + self.value = value + + def task_example( + first=CustomNode(name="dup", value="1", signature="signature-a"), + second=CustomNode(name="dup", value="2", signature="signature-b"), + produces=Path("out.txt"), + ): + raise RuntimeError("should not execute") + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + + result = runner.invoke(cli, ["lock", "clean", "--yes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.COLLECTION_FAILED + assert "Ambiguous lockfile ids detected" in result.output diff --git a/tests/test_lockfile.py b/tests/test_lockfile.py index 2aebe95b..7dcdbf19 100644 --- a/tests/test_lockfile.py +++ b/tests/test_lockfile.py @@ -226,37 +226,6 @@ def func(path): assert state.hash_ == session.tasks[0].state() -def test_clean_lockfile_removes_stale_entries(tmp_path): - def func_first(path): - path.touch() - - def func_second(path): - path.touch() - - task_first = TaskWithoutPath( - name="task_first", - function=func_first, - produces={"path": PathNode(path=tmp_path / "first.txt")}, - ) - task_second = TaskWithoutPath( - name="task_second", - function=func_second, - produces={"path": PathNode(path=tmp_path / "second.txt")}, - ) - - session = build(tasks=[task_first, task_second], paths=tmp_path) - assert session.exit_code == ExitCode.OK - lockfile = read_lockfile(tmp_path / "pytask.lock") - assert lockfile is not None - assert {entry.id for entry in lockfile.task} == {"task_first", "task_second"} - - session = build(tasks=[task_first], paths=tmp_path, clean_lockfile=True) - assert session.exit_code == ExitCode.OK - lockfile = read_lockfile(tmp_path / "pytask.lock") - assert lockfile is not None - assert {entry.id for entry in lockfile.task} == {"task_first"} - - def test_update_task_skips_write_when_unchanged(tmp_path, monkeypatch): def func(path): path.write_text("data") From 468fdc68b32ee08bcc83c215d5f881decc6f1264 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 19 Apr 2026 16:41:36 +0200 Subject: [PATCH 02/16] fix --- tests/test_lock_command.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_lock_command.py b/tests/test_lock_command.py index 1c09c555..2dd8e0a2 100644 --- a/tests/test_lock_command.py +++ b/tests/test_lock_command.py @@ -517,7 +517,9 @@ def test_lock_accept_fails_with_provisional_dependencies(runner, tmp_path): from pytask import DirectoryNode - def task_example(paths=DirectoryNode(pattern="*.txt")) -> Annotated[str, Path("out.txt")]: + def task_example( + paths=DirectoryNode(pattern="*.txt") + ) -> Annotated[str, Path("out.txt")]: raise RuntimeError("should not execute") """ tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) From 6bc0e5d96d8a6432f137e9df91f94c4af0ffce75 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 19 Apr 2026 20:16:38 +0200 Subject: [PATCH 03/16] Simplify lock selection workflow --- src/_pytask/lock.py | 89 ++++--------- tests/test_lock_command.py | 260 +++++++++++++++++++++++++++++-------- 2 files changed, 225 insertions(+), 124 deletions(-) diff --git a/src/_pytask/lock.py b/src/_pytask/lock.py index d35e9e7c..9beab2c7 100644 --- a/src/_pytask/lock.py +++ b/src/_pytask/lock.py @@ -15,7 +15,6 @@ from _pytask.click import ColoredGroup from _pytask.console import console from _pytask.dag import create_dag -from _pytask.dag_utils import task_and_descending_tasks from _pytask.dag_utils import task_and_preceding_tasks from _pytask.exceptions import CollectionError from _pytask.exceptions import ConfigurationError @@ -25,6 +24,10 @@ from _pytask.lockfile import _build_task_entry from _pytask.lockfile import _TaskEntry from _pytask.lockfile import build_portable_task_id +from _pytask.mark import Expression +from _pytask.mark import KeywordMatcher +from _pytask.mark import MarkMatcher +from _pytask.mark import ParseError from _pytask.node_protocols import PNode from _pytask.node_protocols import PProvisionalNode from _pytask.node_protocols import PTask @@ -37,7 +40,6 @@ if TYPE_CHECKING: from collections.abc import Callable - from _pytask.dag_graph import DAG from _pytask.lockfile import LockfileState @@ -55,10 +57,6 @@ def _validate_confirmation_options(raw_config: dict[str, Any]) -> None: def _keyword_filter(tasks: list[PTask], expression: str) -> set[str]: - from _pytask.mark import Expression # noqa: PLC0415 - from _pytask.mark import KeywordMatcher # noqa: PLC0415 - from _pytask.mark import ParseError # noqa: PLC0415 - try: compiled = Expression.compile_(expression) except ParseError as e: @@ -73,10 +71,6 @@ def _keyword_filter(tasks: list[PTask], expression: str) -> set[str]: def _marker_filter(tasks: list[PTask], expression: str) -> set[str]: - from _pytask.mark import Expression # noqa: PLC0415 - from _pytask.mark import MarkMatcher # noqa: PLC0415 - from _pytask.mark import ParseError # noqa: PLC0415 - try: compiled = Expression.compile_(expression) except ParseError as e: @@ -90,32 +84,7 @@ def _marker_filter(tasks: list[PTask], expression: str) -> set[str]: } -def _expand_task_selection( - task_signatures: set[str], - dag: DAG, - *, - with_ancestors: bool, - with_descendants: bool, -) -> set[str]: - selected = set(task_signatures) - if with_ancestors: - selected |= set( - chain.from_iterable( - task_and_preceding_tasks(signature, dag) - for signature in task_signatures - ) - ) - if with_descendants: - selected |= set( - chain.from_iterable( - task_and_descending_tasks(signature, dag) - for signature in task_signatures - ) - ) - return selected - - -def _select_tasks(session: Session) -> list[PTask]: +def _select_tasks_exact(session: Session) -> list[PTask]: selected = {task.signature for task in session.tasks} expression = session.config.get("expression") @@ -126,11 +95,15 @@ def _select_tasks(session: Session) -> list[PTask]: if marker_expression: selected &= _marker_filter(session.tasks, marker_expression) - selected = _expand_task_selection( - selected, - session.dag, - with_ancestors=session.config.get("with_ancestors", False), - with_descendants=session.config.get("with_descendants", False), + return [task for task in session.tasks if task.signature in selected] + + +def _select_tasks_with_ancestors(session: Session) -> list[PTask]: + selected = {task.signature for task in _select_tasks_exact(session)} + selected |= set( + chain.from_iterable( + task_and_preceding_tasks(signature, session.dag) for signature in selected + ) ) return [task for task in session.tasks if task.signature in selected] @@ -286,7 +259,11 @@ def _run_lock_command( session.dag = create_dag(session=session) if planner_with_tasks is not None: - tasks = _select_tasks(session) + tasks = ( + _select_tasks_with_ancestors(session) + if raw_config["subcommand"] == "accept" + else _select_tasks_exact(session) + ) planned_changes = planner_with_tasks(session, tasks) else: assert planner is not None @@ -328,18 +305,6 @@ def lock() -> None: @lock.command(cls=ColoredCommand) -@click.option( - "--with-ancestors", - is_flag=True, - default=False, - help="Also include preceding tasks of the selected tasks.", -) -@click.option( - "--with-descendants", - is_flag=True, - default=False, - help="Also include descending tasks of the selected tasks.", -) @click.option( "--dry-run", is_flag=True, @@ -354,7 +319,8 @@ def lock() -> None: help="Apply the changes without prompting for confirmation.", ) def accept(**raw_config: Any) -> None: - """Accept the current state for selected tasks without executing them.""" + """Accept the current state for selected tasks and their ancestors.""" + raw_config["subcommand"] = "accept" sys.exit( _run_lock_command( raw_config, @@ -365,18 +331,6 @@ def accept(**raw_config: Any) -> None: @lock.command(cls=ColoredCommand) -@click.option( - "--with-ancestors", - is_flag=True, - default=False, - help="Also include preceding tasks of the selected tasks.", -) -@click.option( - "--with-descendants", - is_flag=True, - default=False, - help="Also include descending tasks of the selected tasks.", -) @click.option( "--dry-run", is_flag=True, @@ -392,6 +346,7 @@ def accept(**raw_config: Any) -> None: ) def reset(**raw_config: Any) -> None: """Remove recorded state for selected tasks.""" + raw_config["subcommand"] = "reset" sys.exit( _run_lock_command( raw_config, diff --git a/tests/test_lock_command.py b/tests/test_lock_command.py index 2dd8e0a2..022e9700 100644 --- a/tests/test_lock_command.py +++ b/tests/test_lock_command.py @@ -63,6 +63,20 @@ def task_downstream(depends_on=Path("up.txt"), produces=Path("down.txt")): ) +def _write_single_task_project(tmp_path): + tmp_path.joinpath("task_example.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + + def task_example(produces=Path("out.txt")): + produces.write_text("data") + """ + ) + ) + + def _task_ids(tmp_path): lockfile = read_lockfile(tmp_path / "pytask.lock") assert lockfile is not None @@ -126,7 +140,7 @@ def task_example(depends_on=Path("in.txt"), produces=Path("out.txt")): assert "should not execute" not in result.output -def test_lock_accept_can_include_ancestors(runner, tmp_path): +def test_lock_accept_includes_ancestors_of_selected_tasks(runner, tmp_path): _write_chain_project(tmp_path) result = runner.invoke(cli, [tmp_path.as_posix()]) @@ -149,7 +163,6 @@ def test_lock_accept_can_include_ancestors(runner, tmp_path): "accept", "-k", "downstream", - "--with-ancestors", "--yes", tmp_path.as_posix(), ], @@ -166,7 +179,7 @@ def test_lock_accept_can_include_ancestors(runner, tmp_path): ) -def test_lock_accept_can_include_descendants(runner, tmp_path): +def test_lock_accept_does_not_include_descendants_of_selected_tasks(runner, tmp_path): _write_chain_project(tmp_path) result = runner.invoke(cli, [tmp_path.as_posix()]) @@ -189,7 +202,6 @@ def test_lock_accept_can_include_descendants(runner, tmp_path): "accept", "-k", "upstream", - "--with-descendants", "--yes", tmp_path.as_posix(), ], @@ -202,7 +214,7 @@ def test_lock_accept_can_include_descendants(runner, tmp_path): ) assert ( _task_state_by_suffix(tmp_path, "task_downstream.py::task_downstream") - != downstream_before + == downstream_before ) @@ -315,8 +327,7 @@ def test_lock_accept_interactive_only_applies_confirmed_changes(runner, tmp_path "lock", "accept", "-k", - "upstream", - "--with-descendants", + "downstream", tmp_path.as_posix(), ], input="y\nn\n", @@ -531,7 +542,7 @@ def task_example( assert "accepting lockfile state" in result.output -def test_lock_reset_does_not_include_ancestors_by_default(runner, tmp_path): +def test_lock_reset_only_affects_exact_selection(runner, tmp_path): _write_chain_project(tmp_path) result = runner.invoke(cli, [tmp_path.as_posix()]) @@ -545,52 +556,6 @@ def test_lock_reset_does_not_include_ancestors_by_default(runner, tmp_path): assert _task_ids(tmp_path) == {"task_upstream.py::task_upstream"} -def test_lock_reset_can_include_ancestors(runner, tmp_path): - _write_chain_project(tmp_path) - - result = runner.invoke(cli, [tmp_path.as_posix()]) - assert result.exit_code == ExitCode.OK - - result = runner.invoke( - cli, - [ - "lock", - "reset", - "-k", - "downstream", - "--with-ancestors", - "--yes", - tmp_path.as_posix(), - ], - ) - - assert result.exit_code == ExitCode.OK - assert _task_ids(tmp_path) == set() - - -def test_lock_reset_can_include_descendants(runner, tmp_path): - _write_chain_project(tmp_path) - - result = runner.invoke(cli, [tmp_path.as_posix()]) - assert result.exit_code == ExitCode.OK - - result = runner.invoke( - cli, - [ - "lock", - "reset", - "-k", - "upstream", - "--with-descendants", - "--yes", - tmp_path.as_posix(), - ], - ) - - assert result.exit_code == ExitCode.OK - assert _task_ids(tmp_path) == set() - - def test_lock_reset_uses_intersection_of_keyword_and_marker_selection(runner, tmp_path): _write_marked_chain_project(tmp_path) @@ -664,9 +629,6 @@ def test_lock_reset_interactive_only_applies_confirmed_changes(runner, tmp_path) [ "lock", "reset", - "-k", - "upstream", - "--with-descendants", tmp_path.as_posix(), ], input="y\nn\n", @@ -874,6 +836,190 @@ def test_lock_accept_followed_by_build_skips_changed_task(runner, tmp_path): assert "2 Skipped because unchanged" in result.output +class TestScenarios: + def test_lock_accept_and_reset_end_to_end_workflow_for_single_task( + self, runner, tmp_path + ): + _write_single_task_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + task = tmp_path / "task_example.py" + task.write_text(task.read_text() + "\n# changed without rerunning\n") + + result = runner.invoke( + cli, ["lock", "accept", "-k", "example", "--yes", tmp_path.as_posix()] + ) + assert result.exit_code == ExitCode.OK + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert "1 Skipped because unchanged" in result.output + + result = runner.invoke( + cli, ["lock", "reset", "-k", "example", "--yes", tmp_path.as_posix()] + ) + assert result.exit_code == ExitCode.OK + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert "1 Succeeded" in result.output + + def test_lock_accept_downstream_target_then_build_skips_target_and_ancestors( + self, runner, tmp_path + ): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + upstream = tmp_path / "task_upstream.py" + downstream = tmp_path / "task_downstream.py" + upstream.write_text(upstream.read_text() + "\n# changed upstream\n") + downstream.write_text(downstream.read_text() + "\n# changed downstream\n") + + result = runner.invoke( + cli, ["lock", "accept", "-k", "downstream", "--yes", tmp_path.as_posix()] + ) + assert result.exit_code == ExitCode.OK + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert "2 Skipped because unchanged" in result.output + + def test_lock_accept_upstream_target_then_build_only_runs_unaccepted_descendant( + self, runner, tmp_path + ): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + upstream = tmp_path / "task_upstream.py" + downstream = tmp_path / "task_downstream.py" + upstream.write_text(upstream.read_text() + "\n# changed upstream\n") + downstream.write_text(downstream.read_text() + "\n# changed downstream\n") + + result = runner.invoke( + cli, ["lock", "accept", "-k", "upstream", "--yes", tmp_path.as_posix()] + ) + assert result.exit_code == ExitCode.OK + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert "1 Succeeded" in result.output + assert "1 Skipped because unchanged" in result.output + + def test_lock_accept_with_ancestors_then_exact_reset_reexecutes_only_target( + self, runner, tmp_path + ): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + upstream = tmp_path / "task_upstream.py" + downstream = tmp_path / "task_downstream.py" + upstream.write_text(upstream.read_text() + "\n# changed upstream\n") + downstream.write_text(downstream.read_text() + "\n# changed downstream\n") + + result = runner.invoke( + cli, ["lock", "accept", "-k", "downstream", "--yes", tmp_path.as_posix()] + ) + assert result.exit_code == ExitCode.OK + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert "2 Skipped because unchanged" in result.output + + result = runner.invoke( + cli, ["lock", "reset", "-k", "downstream", "--yes", tmp_path.as_posix()] + ) + assert result.exit_code == ExitCode.OK + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert "1 Succeeded" in result.output + assert "1 Skipped because unchanged" in result.output + + def test_lock_accept_interactive_partial_workflow_only_reexecutes_unaccepted_tasks( + self, runner, tmp_path + ): + _write_chain_project(tmp_path) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + upstream = tmp_path / "task_upstream.py" + downstream = tmp_path / "task_downstream.py" + upstream.write_text(upstream.read_text() + "\n# changed upstream\n") + downstream.write_text(downstream.read_text() + "\n# changed downstream\n") + + result = runner.invoke( + cli, + ["lock", "accept", "-k", "downstream", tmp_path.as_posix()], + input="y\nn\n", + ) + assert result.exit_code == ExitCode.OK + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert "1 Succeeded" in result.output + assert "1 Skipped because unchanged" in result.output + + def test_lock_clean_removes_stale_entries_without_accepting_new_tasks_workflow( + self, runner, tmp_path + ): + tmp_path.joinpath("task_alpha.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + + def task_alpha(produces=Path("alpha.txt")): + produces.write_text("alpha") + """ + ) + ) + tmp_path.joinpath("task_beta.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + + def task_beta(produces=Path("beta.txt")): + produces.write_text("beta") + """ + ) + ) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + tmp_path.joinpath("task_alpha.py").unlink() + tmp_path.joinpath("task_gamma.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + + def task_gamma(produces=Path("gamma.txt")): + produces.write_text("gamma") + """ + ) + ) + + result = runner.invoke(cli, ["lock", "clean", "--yes", tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert _task_ids(tmp_path) == {"task_beta.py::task_beta"} + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert "1 Succeeded" in result.output + assert "1 Skipped because unchanged" in result.output + + @pytest.mark.parametrize( ("content", "message"), [ From fb0d9f8a7b1a45fefb6ea196ff5df4e81e8e39df Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 19 Apr 2026 20:42:59 +0200 Subject: [PATCH 04/16] fix: simplify --- src/_pytask/click.py | 10 ++++- src/_pytask/lock.py | 75 +++++++++++++++--------------------- src/_pytask/mark/__init__.py | 11 +----- src/_pytask/parameters.py | 13 ++----- 4 files changed, 46 insertions(+), 63 deletions(-) diff --git a/src/_pytask/click.py b/src/_pytask/click.py index ba7abbf8..1ab18fca 100644 --- a/src/_pytask/click.py +++ b/src/_pytask/click.py @@ -33,7 +33,7 @@ from collections.abc import Sequence -__all__ = ["ColoredCommand", "ColoredGroup", "EnumChoice"] +__all__ = ["ColoredCommand", "ColoredGroup", "EnumChoice", "get_command"] if importlib.metadata.version("click") < "8.2": @@ -47,6 +47,14 @@ def split_opt(option: str) -> tuple[str, str]: return cast("Callable[[str], tuple[str, str]]", _split_opt)(option) +def get_command(cli: click.Group, name: str) -> click.Command: + """Get a nested command by name.""" + command: click.Command = cli + for part in name.split(): + command = cast("click.Group", command).commands[part] + return command + + class EnumChoice(Choice): """An enum-based choice type. diff --git a/src/_pytask/lock.py b/src/_pytask/lock.py index 9beab2c7..6951bd10 100644 --- a/src/_pytask/lock.py +++ b/src/_pytask/lock.py @@ -43,6 +43,27 @@ from _pytask.lockfile import LockfileState +def _add_lock_command_options( + *, dry_run_help: str +) -> Callable[[Callable[..., None]], Callable[..., None]]: + def decorator(func: Callable[..., None]) -> Callable[..., None]: + func = click.option( + "--dry-run", + is_flag=True, + default=False, + help=dry_run_help, + )(func) + return click.option( + "-y", + "--yes", + is_flag=True, + default=False, + help="Apply the changes without prompting for confirmation.", + )(func) + + return decorator + + @dataclass(slots=True) class _PlannedChange: kind: str @@ -237,6 +258,7 @@ def _run_lock_command( planner: Callable[[Session], list[_PlannedChange]] | None = None, planner_with_tasks: Callable[[Session, list[PTask]], list[_PlannedChange]] | None = None, + select_tasks: Callable[[Session], list[PTask]] | None = None, empty_message: str, ) -> int: _validate_confirmation_options(raw_config) @@ -259,11 +281,8 @@ def _run_lock_command( session.dag = create_dag(session=session) if planner_with_tasks is not None: - tasks = ( - _select_tasks_with_ancestors(session) - if raw_config["subcommand"] == "accept" - else _select_tasks_exact(session) - ) + assert select_tasks is not None + tasks = select_tasks(session) planned_changes = planner_with_tasks(session, tasks) else: assert planner is not None @@ -305,70 +324,40 @@ def lock() -> None: @lock.command(cls=ColoredCommand) -@click.option( - "--dry-run", - is_flag=True, - default=False, - help="Show which recorded states would be updated without writing changes.", -) -@click.option( - "-y", - "--yes", - is_flag=True, - default=False, - help="Apply the changes without prompting for confirmation.", +@_add_lock_command_options( + dry_run_help="Show which recorded states would be updated without writing changes." ) def accept(**raw_config: Any) -> None: """Accept the current state for selected tasks and their ancestors.""" - raw_config["subcommand"] = "accept" sys.exit( _run_lock_command( raw_config, planner_with_tasks=_plan_accept_changes, + select_tasks=_select_tasks_with_ancestors, empty_message="No lockfile entries need updating.", ) ) @lock.command(cls=ColoredCommand) -@click.option( - "--dry-run", - is_flag=True, - default=False, - help="Show which recorded states would be removed without writing changes.", -) -@click.option( - "-y", - "--yes", - is_flag=True, - default=False, - help="Apply the changes without prompting for confirmation.", +@_add_lock_command_options( + dry_run_help="Show which recorded states would be removed without writing changes." ) def reset(**raw_config: Any) -> None: """Remove recorded state for selected tasks.""" - raw_config["subcommand"] = "reset" sys.exit( _run_lock_command( raw_config, planner_with_tasks=_plan_reset_changes, + select_tasks=_select_tasks_exact, empty_message="No lockfile entries need removing.", ) ) @lock.command(cls=ColoredCommand) -@click.option( - "--dry-run", - is_flag=True, - default=False, - help="Show which stale entries would be removed without writing changes.", -) -@click.option( - "-y", - "--yes", - is_flag=True, - default=False, - help="Apply the changes without prompting for confirmation.", +@_add_lock_command_options( + dry_run_help="Show which stale entries would be removed without writing changes." ) def clean(**raw_config: Any) -> None: """Remove stale lockfile entries which no longer correspond to collected tasks.""" diff --git a/src/_pytask/mark/__init__.py b/src/_pytask/mark/__init__.py index 8067f31b..b4af1238 100644 --- a/src/_pytask/mark/__init__.py +++ b/src/_pytask/mark/__init__.py @@ -6,12 +6,12 @@ from dataclasses import dataclass from typing import TYPE_CHECKING from typing import Any -from typing import cast import click from rich.table import Table from _pytask.click import ColoredCommand +from _pytask.click import get_command from _pytask.console import console from _pytask.dag_utils import task_and_preceding_tasks from _pytask.exceptions import ConfigurationError @@ -75,13 +75,6 @@ def markers(**raw_config: Any) -> NoReturn: sys.exit(session.exit_code) -def _get_command(cli: click.Group, name: str) -> click.Command: - command: click.Command = cli - for part in name.split(): - command = cast("click.Group", command).commands[part] - return command - - @hookimpl def pytask_extend_command_line_interface(cli: click.Group) -> None: """Add marker related options.""" @@ -110,7 +103,7 @@ def pytask_extend_command_line_interface(cli: click.Group) -> None: ), ] for command in ("build", "clean", "collect", "lock accept", "lock reset"): - target = _get_command(cli, command) + target = get_command(cli, command) target.params.extend(additional_build_parameters) diff --git a/src/_pytask/parameters.py b/src/_pytask/parameters.py index 22bd3af2..3baee8ef 100644 --- a/src/_pytask/parameters.py +++ b/src/_pytask/parameters.py @@ -5,7 +5,6 @@ import importlib.util from pathlib import Path from typing import TYPE_CHECKING -from typing import cast import click from click import Context @@ -13,6 +12,7 @@ from sqlalchemy.engine import make_url from sqlalchemy.exc import ArgumentError +from _pytask.click import get_command from _pytask.config_utils import set_defaults_from_config from _pytask.path import import_path from _pytask.pluginmanager import hookimpl @@ -180,13 +180,6 @@ def pytask_add_hooks(pm: PluginManager) -> None: ) -def _get_command(cli: click.Group, name: str) -> click.Command: - command: click.Command = cli - for part in name.split(): - command = cast("click.Group", command).commands[part] - return command - - @hookimpl(trylast=True) def pytask_extend_command_line_interface(cli: click.Group) -> None: """Register general markers.""" @@ -203,7 +196,7 @@ def pytask_extend_command_line_interface(cli: click.Group) -> None: "markers", "profile", ): - target = _get_command(cli, command) + target = get_command(cli, command) target.params.extend((_CONFIG_OPTION, _HOOK_MODULE_OPTION, _PATH_ARGUMENT)) for command in ( "build", @@ -214,7 +207,7 @@ def pytask_extend_command_line_interface(cli: click.Group) -> None: "lock reset", "profile", ): - target = _get_command(cli, command) + target = get_command(cli, command) target.params.extend([_IGNORE_OPTION, _EDITOR_URL_SCHEME_OPTION]) for command in ("build",): cli.commands[command].params.append(_VERBOSE_OPTION) From 87d9a97cb00595f1532f9547e6b59c55661ba4cf Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 26 Apr 2026 13:23:43 +0200 Subject: [PATCH 05/16] Document lock workflows --- .pre-commit-config.yaml | 7 + docs/source/_static/md/lock-accept-dry-run.md | 16 ++ .../_static/md/lock-accept-interactive.md | 16 ++ docs/source/_static/md/lock-clean.md | 17 ++ docs/source/how_to_guides/index.md | 1 + docs/source/how_to_guides/portability.md | 2 - .../reconciling_lockfile_state.md | 133 ++++++++++ mkdocs.yml | 1 + scripts/check_termynal_line_lengths.py | 74 ++++++ scripts/demo_lock_workflows.py | 244 ++++++++++++++++++ 10 files changed, 509 insertions(+), 2 deletions(-) create mode 100644 docs/source/_static/md/lock-accept-dry-run.md create mode 100644 docs/source/_static/md/lock-accept-interactive.md create mode 100644 docs/source/_static/md/lock-clean.md create mode 100644 docs/source/how_to_guides/reconciling_lockfile_state.md create mode 100644 scripts/check_termynal_line_lengths.py create mode 100644 scripts/demo_lock_workflows.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3e2fa90b..cdb95826 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,6 +33,13 @@ repos: rev: 0.11.6 hooks: - id: uv-lock +- repo: local + hooks: + - id: check-termynal-line-lengths + name: Check Termynal line lengths + entry: uv run python scripts/check_termynal_line_lengths.py + language: system + files: ^docs/source/_static/md/.*\.md$ - repo: https://github.com/executablebooks/mdformat rev: 1.0.0 hooks: diff --git a/docs/source/_static/md/lock-accept-dry-run.md b/docs/source/_static/md/lock-accept-dry-run.md new file mode 100644 index 00000000..49d39596 --- /dev/null +++ b/docs/source/_static/md/lock-accept-dry-run.md @@ -0,0 +1,16 @@ +
+ +```console +$ pytask lock accept -k train --dry-run + +────────────────────────── Start pytask session ────────────────────────── +Platform: <platform> -- Python <version>, pytask <version> +Root: <path> +Collected 2 tasks. +Would accept recorded state for task_train.py::task_train. +Would accept recorded state for task_evaluate.py::task_evaluate. + +───────────────────────────────────────────────────────────────────────── +``` + +
diff --git a/docs/source/_static/md/lock-accept-interactive.md b/docs/source/_static/md/lock-accept-interactive.md new file mode 100644 index 00000000..ad30944f --- /dev/null +++ b/docs/source/_static/md/lock-accept-interactive.md @@ -0,0 +1,16 @@ +
+ +```console +$ pytask lock accept -k train +────────────────────────── Start pytask session ────────────────────────── +Platform: <platform> -- Python <version>, pytask <version> +Root: <path> +Collected 2 tasks. +# Accept recorded state for task_train.py::task_train? [y/N]: $ y +# Accept recorded state for task_evaluate.py::task_evaluate? [y/N]: $ n +Accept recorded state for task_train.py::task_train. + +───────────────────────────────────────────────────────────────────────── +``` + +
diff --git a/docs/source/_static/md/lock-clean.md b/docs/source/_static/md/lock-clean.md new file mode 100644 index 00000000..86d76263 --- /dev/null +++ b/docs/source/_static/md/lock-clean.md @@ -0,0 +1,17 @@ +
+ +```console +$ pytask lock clean +────────────────────────── Start pytask session ────────────────────────── +Platform: <platform> -- Python <version>, pytask <version> +Root: <path> +Collected 2 tasks. +# Remove recorded state for task_old.py::task_train? [y/N]: $ y +# Remove recorded state for task_old.py::task_evaluate? [y/N]: $ y +Remove recorded state for task_old.py::task_train. +Remove recorded state for task_old.py::task_evaluate. + +───────────────────────────────────────────────────────────────────────── +``` + +
diff --git a/docs/source/how_to_guides/index.md b/docs/source/how_to_guides/index.md index ce4084f1..990e2ad5 100644 --- a/docs/source/how_to_guides/index.md +++ b/docs/source/how_to_guides/index.md @@ -10,6 +10,7 @@ specific tasks with pytask. - [Migrating From Scripts To Pytask](migrating_from_scripts_to_pytask.md) - [Interfaces For Dependencies Products](interfaces_for_dependencies_products.md) - [Portability](portability.md) +- [Reconciling Lockfile State](reconciling_lockfile_state.md) - [Remote Files](remote_files.md) - [Functional Interface](functional_interface.md) - [Capture Warnings](capture_warnings.md) diff --git a/docs/source/how_to_guides/portability.md b/docs/source/how_to_guides/portability.md index b3b099fa..af9cb7c7 100644 --- a/docs/source/how_to_guides/portability.md +++ b/docs/source/how_to_guides/portability.md @@ -17,13 +17,11 @@ Use this checklist when you move a project to another machine or environment. Run a normal build with [`pytask build`](../reference_guides/commands.md#pytask-build) so `pytask.lock` is up to date: -```` ```console $ pytask build ``` If you already have a recent lockfile and up-to-date outputs, you can skip this step. -```` 1. **Ship the right files.** diff --git a/docs/source/how_to_guides/reconciling_lockfile_state.md b/docs/source/how_to_guides/reconciling_lockfile_state.md new file mode 100644 index 00000000..6cd8ed8f --- /dev/null +++ b/docs/source/how_to_guides/reconciling_lockfile_state.md @@ -0,0 +1,133 @@ +# Reconciling Lockfile State + +Use [`pytask lock`](../reference_guides/commands.md#pytask-lock) when the current files +in the project are already correct and only the recorded state in `pytask.lock` needs to +catch up. + +This is an advanced workflow. Most of the time, +[`pytask build`](../reference_guides/commands.md#pytask-build) is the right command. +Reach for `pytask lock` when you want to change the lockfile without executing tasks. + +!!! warning + + `pytask lock` is a sharp tool. It updates recorded state without proving that the files + were produced by the current task definitions. + +## When is this useful? + +Typical situations are: + +- You reformatted or reorganized a task file and do not want to rerun an expensive task. +- You renamed or moved a task and want to accept the current outputs for the new task. +- You produced outputs manually or elsewhere and now want to register them in the + lockfile. +- You deleted or renamed tasks and want to remove their stale lockfile entries. + +## Preview changes first + +By default, `pytask lock` runs interactively. It shows the planned changes and then asks +for confirmation one by one. Only entries which would actually change appear in the +prompt sequence. + +To preview changes without writing them, use `--dry-run`: + +--8<-- "docs/source/_static/md/lock-accept-dry-run.md" + +To apply all planned changes without prompting, use `--yes`: + +```console +$ pytask lock accept -k train --yes +``` + +## Accept the current state + +Use [`pytask lock accept`](../reference_guides/commands.md#pytask-lock-accept) when the +current dependencies, products, and task definition are already correct and should +become the new recorded state. + +--8<-- "docs/source/_static/md/lock-accept-interactive.md" + +If no selectors are provided, `pytask lock accept` applies to all collected tasks in the +provided paths. + +If selectors are provided with `-k` or `-m`, `accept` automatically includes the +ancestors of the selected tasks. This is useful when you target a downstream task and +want the accepted state to stay consistent with its upstream dependencies. + +```console +$ pytask lock accept -k evaluate +``` + +In this example, `pytask` accepts `evaluate` and its ancestors. It does not +automatically include descendants. If you want to accept a wider part of the DAG, widen +the task selection yourself. + +```console +$ pytask lock accept -k "train or evaluate" +``` + +If a selected task is missing a required dependency or product, the command fails +instead of accepting incomplete state. + +## Reset recorded state + +Use [`pytask lock reset`](../reference_guides/commands.md#pytask-lock-reset) to remove +recorded state for selected tasks. + +```console +$ pytask lock reset -k train +``` + +Unlike `accept`, `reset` works on the exact selected tasks. It does not automatically +include ancestors. + +On the next build, `pytask` determines again whether these tasks require execution. This +is useful when state was accepted too broadly or when you want a specific task to be +reconsidered from scratch. + +## Remove stale lockfile entries + +Use [`pytask lock clean`](../reference_guides/commands.md#pytask-lock-clean) to remove +entries from the lockfile which no longer correspond to collected tasks in the current +project. + +--8<-- "docs/source/_static/md/lock-clean.md" + +This is useful after deleting, renaming, or moving tasks when old entries should no +longer remain in the lockfile. + +## Example workflow + +One common workflow looks like this: + +1. Run a normal build once. +1. Change a task file in a way that should not force a rerun. +1. Accept the current state. +1. Verify that a later build skips the task. +1. Reset the task if you want `pytask` to reconsider it again. + +```console +$ pytask build +$ pytask lock accept -k train --yes +$ pytask build +$ pytask lock reset -k train --yes +$ pytask build +``` + +After `accept`, the next build skips unchanged tasks according to the updated lockfile. +After `reset`, the selected tasks are reconsidered on the next build. + +## Be explicit about scope + +Start with narrow task selections, preview changes with `--dry-run`, and widen the +selection only when needed. + +This is especially important for `accept`: it is often better to accept a small part of +the DAG first and then inspect the result than to update the whole project at once. + +## Related + +- [`pytask lock`](../reference_guides/commands.md#pytask-lock) +- [`pytask build`](../reference_guides/commands.md#pytask-build) +- [Portability](portability.md) +- [The lockfile](../reference_guides/lockfile.md) diff --git a/mkdocs.yml b/mkdocs.yml index 4711b71a..9b95ffe9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -38,6 +38,7 @@ nav: - Migrating From Scripts To pytask: how_to_guides/migrating_from_scripts_to_pytask.md - Interfaces For Dependencies And Products: how_to_guides/interfaces_for_dependencies_products.md - Portability: how_to_guides/portability.md + - Reconciling Lockfile State: how_to_guides/reconciling_lockfile_state.md - Remote Files: how_to_guides/remote_files.md - Functional Interface: how_to_guides/functional_interface.md - Capture Warnings: how_to_guides/capture_warnings.md diff --git a/scripts/check_termynal_line_lengths.py b/scripts/check_termynal_line_lengths.py new file mode 100644 index 00000000..df42782a --- /dev/null +++ b/scripts/check_termynal_line_lengths.py @@ -0,0 +1,74 @@ +"""Check visible line lengths in Termynal documentation snippets.""" + +from __future__ import annotations + +import argparse +import re +import sys +from html import unescape +from pathlib import Path + +# Existing Termynal snippets top out at 78 visible characters. +MAX_TERMYNAL_LINE_LENGTH = 78 + +_CONSOLE_BLOCK_PATTERN = re.compile(r"```console\n(.*?)\n```", re.DOTALL) +_HTML_TAG_PATTERN = re.compile(r"<[^>]+>") + + +def _visible_text(line: str) -> str: + return unescape(_HTML_TAG_PATTERN.sub("", line)) + + +def _iter_violations(path: Path, max_length: int) -> list[str]: + text = path.read_text(encoding="utf-8") + if 'class="termy"' not in text: + return [] + + violations = [] + for match in _CONSOLE_BLOCK_PATTERN.finditer(text): + start_line = text.count("\n", 0, match.start(1)) + 1 + for offset, raw_line in enumerate(match.group(1).splitlines()): + rendered_line = _visible_text(raw_line) + line_length = len(rendered_line) + if line_length > max_length: + violations.append( + f"{path}:{start_line + offset}: rendered line has " + f"{line_length} characters, maximum is {max_length}:\n" + f" {rendered_line}" + ) + + return violations + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument( + "paths", + nargs="*", + type=Path, + default=sorted(Path("docs/source/_static/md").rglob("*.md")), + help="Markdown files with Termynal snippets.", + ) + parser.add_argument( + "--max-length", + type=int, + default=MAX_TERMYNAL_LINE_LENGTH, + help="Maximum visible line length for rendered terminal lines.", + ) + args = parser.parse_args() + + violations = [ + violation + for path in args.paths + if path.is_file() + for violation in _iter_violations(path, args.max_length) + ] + if violations: + sys.stderr.write("\n\n".join(violations)) + sys.stderr.write("\n") + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/demo_lock_workflows.py b/scripts/demo_lock_workflows.py new file mode 100644 index 00000000..8f671821 --- /dev/null +++ b/scripts/demo_lock_workflows.py @@ -0,0 +1,244 @@ +"""Demonstrate end-to-end lock workflows with small temporary projects. + +Run with + + uv run python scripts/demo_lock_workflows.py + +or select one scenario with + + uv run python scripts/demo_lock_workflows.py --scenario golden-path + +Use ``--keep`` to keep the temporary project directories for inspection. +""" + +from __future__ import annotations + +import argparse +import shutil +import subprocess +import sys +import tempfile +import textwrap +from dataclasses import dataclass +from pathlib import Path + +import click + + +@dataclass(frozen=True) +class Scenario: + name: str + description: str + callback_name: str + + +SCENARIOS = ( + Scenario( + name="golden-path", + description=( + "Build a task, modify it, accept the change, verify that build skips it, " + "reset it, and verify that build executes it again." + ), + callback_name="scenario_golden_path", + ), + Scenario( + name="accept-ancestors", + description=( + "Target a downstream task, accept it, and show that ancestors are accepted " + "implicitly while descendants are not." + ), + callback_name="scenario_accept_ancestors", + ), + Scenario( + name="clean-subtractive", + description=( + "Remove a stale task, add a new one, run lock clean, and show that clean " + "removes stale entries without accepting new tasks." + ), + callback_name="scenario_clean_subtractive", + ), +) + + +def _write(path: Path, content: str) -> None: + path.write_text(textwrap.dedent(content).lstrip()) + + +def _run(project: Path, *args: str) -> None: + cmd = [sys.executable, "-m", "pytask", *args, project.as_posix()] + click.echo() + click.echo(f"$ {' '.join(cmd)}") + result = subprocess.run( + cmd, cwd=project, capture_output=True, text=True, check=False + ) + if result.stdout: + click.echo(result.stdout.rstrip()) + if result.stderr: + click.echo(result.stderr.rstrip()) + click.echo(f"[exit code: {result.returncode}]") + if result.returncode != 0: + msg = "The demo command failed. Inspect the output above." + raise SystemExit(msg) + + +def _scenario_header(scenario: Scenario, project: Path) -> None: + click.echo() + click.echo("=" * 80) + click.echo(f"{scenario.name}: {scenario.description}") + click.echo(f"project: {project}") + click.echo("=" * 80) + + +def _write_single_task_project(project: Path) -> None: + _write( + project / "task_example.py", + """ + from pathlib import Path + + + def task_example(produces=Path("out.txt")): + produces.write_text("data") + """, + ) + + +def _write_chain_project(project: Path) -> None: + _write( + project / "task_upstream.py", + """ + from pathlib import Path + + + def task_upstream(produces=Path("up.txt")): + produces.write_text("up") + """, + ) + _write( + project / "task_downstream.py", + """ + from pathlib import Path + + + def task_downstream(depends_on=Path("up.txt"), produces=Path("down.txt")): + produces.write_text(depends_on.read_text() + "down") + """, + ) + + +def _write_alpha_beta_project(project: Path) -> None: + _write( + project / "task_alpha.py", + """ + from pathlib import Path + + + def task_alpha(produces=Path("alpha.txt")): + produces.write_text("alpha") + """, + ) + _write( + project / "task_beta.py", + """ + from pathlib import Path + + + def task_beta(produces=Path("beta.txt")): + produces.write_text("beta") + """, + ) + + +def scenario_golden_path(project: Path) -> None: + _write_single_task_project(project) + + _run(project) + + task = project / "task_example.py" + task.write_text(task.read_text() + "\n# changed without rerunning\n") + + _run(project, "lock", "accept", "-k", "example", "--yes") + _run(project) + _run(project, "lock", "reset", "-k", "example", "--yes") + _run(project) + + +def scenario_accept_ancestors(project: Path) -> None: + _write_chain_project(project) + + _run(project) + + upstream = project / "task_upstream.py" + downstream = project / "task_downstream.py" + upstream.write_text(upstream.read_text() + "\n# changed upstream\n") + downstream.write_text(downstream.read_text() + "\n# changed downstream\n") + + _run(project, "lock", "accept", "-k", "downstream", "--yes") + _run(project) + + upstream.write_text(upstream.read_text() + "\n# changed upstream again\n") + downstream.write_text(downstream.read_text() + "\n# changed downstream again\n") + + _run(project, "lock", "accept", "-k", "upstream", "--yes") + _run(project) + + +def scenario_clean_subtractive(project: Path) -> None: + _write_alpha_beta_project(project) + + _run(project) + + (project / "task_alpha.py").unlink() + _write( + project / "task_gamma.py", + """ + from pathlib import Path + + + def task_gamma(produces=Path("gamma.txt")): + produces.write_text("gamma") + """, + ) + + _run(project, "lock", "clean", "--yes") + _run(project) + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument( + "--scenario", + choices=[scenario.name for scenario in SCENARIOS] + ["all"], + default="all", + help="Select one scenario or run them all.", + ) + parser.add_argument( + "--keep", + action="store_true", + help="Keep temporary project directories instead of deleting them.", + ) + return parser.parse_args() + + +def main() -> int: + args = _parse_args() + selected = [ + scenario for scenario in SCENARIOS if args.scenario in ("all", scenario.name) + ] + + for scenario in selected: + temp_dir = Path(tempfile.mkdtemp(prefix=f"pytask-{scenario.name}-")) + _scenario_header(scenario, temp_dir) + try: + globals()[scenario.callback_name](temp_dir) + finally: + if args.keep: + click.echo() + click.echo(f"kept project at {temp_dir}") + else: + shutil.rmtree(temp_dir) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From f124e9187cadba47e7d515f20635bb80306597ca Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 26 Apr 2026 13:26:48 +0200 Subject: [PATCH 06/16] fix --- docs/source/_static/md/lock-accept-dry-run.md | 4 ++-- docs/source/_static/md/lock-accept-interactive.md | 4 ++-- docs/source/_static/md/lock-clean.md | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/_static/md/lock-accept-dry-run.md b/docs/source/_static/md/lock-accept-dry-run.md index 49d39596..acf5ee72 100644 --- a/docs/source/_static/md/lock-accept-dry-run.md +++ b/docs/source/_static/md/lock-accept-dry-run.md @@ -4,8 +4,8 @@ $ pytask lock accept -k train --dry-run ────────────────────────── Start pytask session ────────────────────────── -Platform: <platform> -- Python <version>, pytask <version> -Root: <path> +Platform: win32 -- Python 3.12.0, pytask 0.5.3 +Root: C:\Users\pytask-dev\git\my_project Collected 2 tasks. Would accept recorded state for task_train.py::task_train. Would accept recorded state for task_evaluate.py::task_evaluate. diff --git a/docs/source/_static/md/lock-accept-interactive.md b/docs/source/_static/md/lock-accept-interactive.md index ad30944f..67615034 100644 --- a/docs/source/_static/md/lock-accept-interactive.md +++ b/docs/source/_static/md/lock-accept-interactive.md @@ -3,8 +3,8 @@ ```console $ pytask lock accept -k train ────────────────────────── Start pytask session ────────────────────────── -Platform: <platform> -- Python <version>, pytask <version> -Root: <path> +Platform: win32 -- Python 3.12.0, pytask 0.5.3 +Root: C:\Users\pytask-dev\git\my_project Collected 2 tasks. # Accept recorded state for task_train.py::task_train? [y/N]: $ y # Accept recorded state for task_evaluate.py::task_evaluate? [y/N]: $ n diff --git a/docs/source/_static/md/lock-clean.md b/docs/source/_static/md/lock-clean.md index 86d76263..b60b30ed 100644 --- a/docs/source/_static/md/lock-clean.md +++ b/docs/source/_static/md/lock-clean.md @@ -3,8 +3,8 @@ ```console $ pytask lock clean ────────────────────────── Start pytask session ────────────────────────── -Platform: <platform> -- Python <version>, pytask <version> -Root: <path> +Platform: win32 -- Python 3.12.0, pytask 0.5.3 +Root: C:\Users\pytask-dev\git\my_project Collected 2 tasks. # Remove recorded state for task_old.py::task_train? [y/N]: $ y # Remove recorded state for task_old.py::task_evaluate? [y/N]: $ y From b359818ef42278e4eb5cdf93083465b9241f230e Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 26 Apr 2026 15:48:40 +0200 Subject: [PATCH 07/16] fix --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cdb95826..37c93a3e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,12 +25,12 @@ repos: - id: python-no-log-warn - id: text-unicode-replacement-char - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.15.10 + rev: v0.15.12 hooks: - id: ruff-format - id: ruff-check - repo: https://github.com/astral-sh/uv-pre-commit - rev: 0.11.6 + rev: 0.11.7 hooks: - id: uv-lock - repo: local @@ -67,7 +67,7 @@ repos: - id: nbstripout exclude: (docs) - repo: https://github.com/crate-ci/typos - rev: v1 + rev: v1.45.1 hooks: - id: typos exclude: (\.ipynb) From 31f42749880af93694f6af4e8287cc8654c70cb5 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 26 Apr 2026 15:50:44 +0200 Subject: [PATCH 08/16] Make termynal hook independent of uv --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 37c93a3e..68364c31 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,8 +37,8 @@ repos: hooks: - id: check-termynal-line-lengths name: Check Termynal line lengths - entry: uv run python scripts/check_termynal_line_lengths.py - language: system + entry: python scripts/check_termynal_line_lengths.py + language: python files: ^docs/source/_static/md/.*\.md$ - repo: https://github.com/executablebooks/mdformat rev: 1.0.0 From a410e9b4967d30a9c3f984d8791dcff84bca97d3 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 26 Apr 2026 16:05:58 +0200 Subject: [PATCH 09/16] fix --- .../reconciling_lockfile_state.md | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/docs/source/how_to_guides/reconciling_lockfile_state.md b/docs/source/how_to_guides/reconciling_lockfile_state.md index 6cd8ed8f..40508104 100644 --- a/docs/source/how_to_guides/reconciling_lockfile_state.md +++ b/docs/source/how_to_guides/reconciling_lockfile_state.md @@ -4,23 +4,14 @@ Use [`pytask lock`](../reference_guides/commands.md#pytask-lock) when the curren in the project are already correct and only the recorded state in `pytask.lock` needs to catch up. -This is an advanced workflow. Most of the time, -[`pytask build`](../reference_guides/commands.md#pytask-build) is the right command. -Reach for `pytask lock` when you want to change the lockfile without executing tasks. - -!!! warning - - `pytask lock` is a sharp tool. It updates recorded state without proving that the files - were produced by the current task definitions. - ## When is this useful? Typical situations are: - You reformatted or reorganized a task file and do not want to rerun an expensive task. - You renamed or moved a task and want to accept the current outputs for the new task. -- You produced outputs manually or elsewhere and now want to register them in the - lockfile. +- You produced outputs outside of pytask and now want to register the task along the + outputs in the lockfile. - You deleted or renamed tasks and want to remove their stale lockfile entries. ## Preview changes first @@ -72,14 +63,19 @@ instead of accepting incomplete state. ## Reset recorded state Use [`pytask lock reset`](../reference_guides/commands.md#pytask-lock-reset) to remove -recorded state for selected tasks. +recorded state for selected tasks. The following command removes the recorded state for +all tasks. ```console -$ pytask lock reset -k train +$ pytask lock reset ``` -Unlike `accept`, `reset` works on the exact selected tasks. It does not automatically -include ancestors. +Unlike `accept`, `reset` with a selector works on the exact selected tasks. It does not +automatically include ancestors. + +```console +$ pytask lock reset -k train +``` On the next build, `pytask` determines again whether these tasks require execution. This is useful when state was accepted too broadly or when you want a specific task to be From 2b36df7a050c7b51906a48dd1b381faec51698f4 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 26 Apr 2026 16:27:00 +0200 Subject: [PATCH 10/16] Fix lock docs build path --- .readthedocs.yaml | 2 +- docs/source/how_to_guides/portability.md | 3 +-- docs/source/how_to_guides/reconciling_lockfile_state.md | 8 -------- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 519e8b04..2aa8017f 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -19,7 +19,7 @@ build: - UV_PROJECT_ENVIRONMENT="${READTHEDOCS_VIRTUALENV_PATH}" uv sync --frozen --group docs build: html: - - UV_PROJECT_ENVIRONMENT="${READTHEDOCS_VIRTUALENV_PATH}" uv run --group docs zensical build + - UV_PROJECT_ENVIRONMENT="${READTHEDOCS_VIRTUALENV_PATH}" uvx --from rust-just just docs post_build: - mkdir -p "${READTHEDOCS_OUTPUT}/html" - cp -a docs/build/. "${READTHEDOCS_OUTPUT}/html/" diff --git a/docs/source/how_to_guides/portability.md b/docs/source/how_to_guides/portability.md index af9cb7c7..ac38cf92 100644 --- a/docs/source/how_to_guides/portability.md +++ b/docs/source/how_to_guides/portability.md @@ -89,5 +89,4 @@ To clean up stale entries without deleting the file, run $ pytask lock clean ``` -This rewrites the lockfile with only the currently collected tasks and their current -state values. +This removes lockfile entries for tasks which are no longer collected. diff --git a/docs/source/how_to_guides/reconciling_lockfile_state.md b/docs/source/how_to_guides/reconciling_lockfile_state.md index 40508104..85491984 100644 --- a/docs/source/how_to_guides/reconciling_lockfile_state.md +++ b/docs/source/how_to_guides/reconciling_lockfile_state.md @@ -113,14 +113,6 @@ $ pytask build After `accept`, the next build skips unchanged tasks according to the updated lockfile. After `reset`, the selected tasks are reconsidered on the next build. -## Be explicit about scope - -Start with narrow task selections, preview changes with `--dry-run`, and widen the -selection only when needed. - -This is especially important for `accept`: it is often better to accept a small part of -the DAG first and then inspect the result than to update the whole project at once. - ## Related - [`pytask lock`](../reference_guides/commands.md#pytask-lock) From 904364df5306ee5fc7bd51c19d932f6b0c2c9d81 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 26 Apr 2026 16:38:19 +0200 Subject: [PATCH 11/16] Simplify lock command helpers --- src/_pytask/lock.py | 109 +++++++++++++++++++++++--------------------- 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/src/_pytask/lock.py b/src/_pytask/lock.py index 6951bd10..4b0f66ba 100644 --- a/src/_pytask/lock.py +++ b/src/_pytask/lock.py @@ -43,33 +43,18 @@ from _pytask.lockfile import LockfileState -def _add_lock_command_options( - *, dry_run_help: str -) -> Callable[[Callable[..., None]], Callable[..., None]]: - def decorator(func: Callable[..., None]) -> Callable[..., None]: - func = click.option( - "--dry-run", - is_flag=True, - default=False, - help=dry_run_help, - )(func) - return click.option( - "-y", - "--yes", - is_flag=True, - default=False, - help="Apply the changes without prompting for confirmation.", - )(func) - - return decorator - - @dataclass(slots=True) class _PlannedChange: - kind: str task_id: str entry: _TaskEntry | None = None + @property + def is_accept(self) -> bool: + return self.entry is not None + + +# Task selection. + def _validate_confirmation_options(raw_config: dict[str, Any]) -> None: if raw_config["dry_run"] and raw_config["yes"]: @@ -77,31 +62,20 @@ def _validate_confirmation_options(raw_config: dict[str, Any]) -> None: raise click.UsageError(msg) -def _keyword_filter(tasks: list[PTask], expression: str) -> set[str]: - try: - compiled = Expression.compile_(expression) - except ParseError as e: - msg = f"Wrong expression passed to '-k': {expression}: {e}" - raise ValueError(msg) from None - - return { - task.signature - for task in tasks - if compiled.evaluate(KeywordMatcher.from_task(task)) - } - - -def _marker_filter(tasks: list[PTask], expression: str) -> set[str]: +def _expression_filter( + tasks: list[PTask], + expression: str, + option: str, + matcher_from_task: Callable[[PTask], Any], +) -> set[str]: try: compiled = Expression.compile_(expression) except ParseError as e: - msg = f"Wrong expression passed to '-m': {expression}: {e}" + msg = f"Wrong expression passed to {option!r}: {expression}: {e}" raise ValueError(msg) from None return { - task.signature - for task in tasks - if compiled.evaluate(MarkMatcher.from_task(task)) + task.signature for task in tasks if compiled.evaluate(matcher_from_task(task)) } @@ -110,11 +84,15 @@ def _select_tasks_exact(session: Session) -> list[PTask]: expression = session.config.get("expression") if expression: - selected &= _keyword_filter(session.tasks, expression) + selected &= _expression_filter( + session.tasks, expression, "-k", KeywordMatcher.from_task + ) marker_expression = session.config.get("marker_expression") if marker_expression: - selected &= _marker_filter(session.tasks, marker_expression) + selected &= _expression_filter( + session.tasks, marker_expression, "-m", MarkMatcher.from_task + ) return [task for task in session.tasks if task.signature in selected] @@ -129,6 +107,9 @@ def _select_tasks_with_ancestors(session: Session) -> list[PTask]: return [task for task in session.tasks if task.signature in selected] +# Change planning. + + def _validate_task_for_accept(session: Session, task: PTask) -> None: predecessors = set(session.dag.predecessors(task.signature)) @@ -180,9 +161,7 @@ def _plan_accept_changes(session: Session, tasks: list[PTask]) -> list[_PlannedC existing = session.config["lockfile_state"].get_task_entry(entry.id) if existing != entry: - planned_changes.append( - _PlannedChange(kind="accept", task_id=entry.id, entry=entry) - ) + planned_changes.append(_PlannedChange(task_id=entry.id, entry=entry)) return planned_changes @@ -194,7 +173,7 @@ def _plan_reset_changes(session: Session, tasks: list[PTask]) -> list[_PlannedCh for task in tasks: task_id = build_portable_task_id(task, root) if session.config["lockfile_state"].get_task_entry(task_id) is not None: - planned_changes.append(_PlannedChange(kind="reset", task_id=task_id)) + planned_changes.append(_PlannedChange(task_id=task_id)) return planned_changes @@ -205,13 +184,14 @@ def _plan_clean_changes(session: Session) -> list[_PlannedChange]: build_portable_task_id(task, session.config["root"]) for task in session.tasks } stale_ids = state.task_ids() - current_task_ids - return [ - _PlannedChange(kind="clean", task_id=task_id) for task_id in sorted(stale_ids) - ] + return [_PlannedChange(task_id=task_id) for task_id in sorted(stale_ids)] + + +# Change application. def _describe_change(change: _PlannedChange) -> str: - if change.kind == "accept": + if change.is_accept: return f"Accept recorded state for {change.task_id}" return f"Remove recorded state for {change.task_id}" @@ -252,6 +232,9 @@ def _apply_changes( return accepted +# Command execution. + + def _run_lock_command( raw_config: dict[str, Any], *, @@ -312,6 +295,30 @@ def _run_lock_command( return session.exit_code +# Command line interface. + + +def _add_lock_command_options( + *, dry_run_help: str +) -> Callable[[Callable[..., None]], Callable[..., None]]: + def decorator(func: Callable[..., None]) -> Callable[..., None]: + func = click.option( + "--dry-run", + is_flag=True, + default=False, + help=dry_run_help, + )(func) + return click.option( + "-y", + "--yes", + is_flag=True, + default=False, + help="Apply the changes without prompting for confirmation.", + )(func) + + return decorator + + @hookimpl(tryfirst=True) def pytask_extend_command_line_interface(cli: click.Group) -> None: """Extend the command line interface.""" From 09be30fd4695fddda89b31c35f9d871c307012d1 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 26 Apr 2026 16:45:55 +0200 Subject: [PATCH 12/16] Simplify lock command planning --- src/_pytask/lock.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/src/_pytask/lock.py b/src/_pytask/lock.py index 4b0f66ba..e3e995cc 100644 --- a/src/_pytask/lock.py +++ b/src/_pytask/lock.py @@ -147,11 +147,11 @@ def _validate_task_for_accept(session: Session, task: PTask) -> None: raise NodeNotFoundError(msg) -def _plan_accept_changes(session: Session, tasks: list[PTask]) -> list[_PlannedChange]: +def _plan_accept_changes(session: Session) -> list[_PlannedChange]: root = session.config["root"] planned_changes = [] - for task in tasks: + for task in _select_tasks_with_ancestors(session): _validate_task_for_accept(session, task) entry = _build_task_entry(session, task, root) if entry is None: @@ -166,11 +166,11 @@ def _plan_accept_changes(session: Session, tasks: list[PTask]) -> list[_PlannedC return planned_changes -def _plan_reset_changes(session: Session, tasks: list[PTask]) -> list[_PlannedChange]: +def _plan_reset_changes(session: Session) -> list[_PlannedChange]: root = session.config["root"] planned_changes = [] - for task in tasks: + for task in _select_tasks_exact(session): task_id = build_portable_task_id(task, root) if session.config["lockfile_state"].get_task_entry(task_id) is not None: planned_changes.append(_PlannedChange(task_id=task_id)) @@ -238,10 +238,7 @@ def _apply_changes( def _run_lock_command( raw_config: dict[str, Any], *, - planner: Callable[[Session], list[_PlannedChange]] | None = None, - planner_with_tasks: Callable[[Session, list[PTask]], list[_PlannedChange]] - | None = None, - select_tasks: Callable[[Session], list[PTask]] | None = None, + planner: Callable[[Session], list[_PlannedChange]], empty_message: str, ) -> int: _validate_confirmation_options(raw_config) @@ -263,13 +260,7 @@ def _run_lock_command( session.hook.pytask_collect(session=session) session.dag = create_dag(session=session) - if planner_with_tasks is not None: - assert select_tasks is not None - tasks = select_tasks(session) - planned_changes = planner_with_tasks(session, tasks) - else: - assert planner is not None - planned_changes = planner(session) + planned_changes = planner(session) if planned_changes: _apply_changes(session, planned_changes) @@ -339,8 +330,7 @@ def accept(**raw_config: Any) -> None: sys.exit( _run_lock_command( raw_config, - planner_with_tasks=_plan_accept_changes, - select_tasks=_select_tasks_with_ancestors, + planner=_plan_accept_changes, empty_message="No lockfile entries need updating.", ) ) @@ -355,8 +345,7 @@ def reset(**raw_config: Any) -> None: sys.exit( _run_lock_command( raw_config, - planner_with_tasks=_plan_reset_changes, - select_tasks=_select_tasks_exact, + planner=_plan_reset_changes, empty_message="No lockfile entries need removing.", ) ) From 02e775f8e30d1054b8b00751dc6977f723a3f852 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 26 Apr 2026 16:50:19 +0200 Subject: [PATCH 13/16] delete demo script --- scripts/demo_lock_workflows.py | 244 --------------------------------- 1 file changed, 244 deletions(-) delete mode 100644 scripts/demo_lock_workflows.py diff --git a/scripts/demo_lock_workflows.py b/scripts/demo_lock_workflows.py deleted file mode 100644 index 8f671821..00000000 --- a/scripts/demo_lock_workflows.py +++ /dev/null @@ -1,244 +0,0 @@ -"""Demonstrate end-to-end lock workflows with small temporary projects. - -Run with - - uv run python scripts/demo_lock_workflows.py - -or select one scenario with - - uv run python scripts/demo_lock_workflows.py --scenario golden-path - -Use ``--keep`` to keep the temporary project directories for inspection. -""" - -from __future__ import annotations - -import argparse -import shutil -import subprocess -import sys -import tempfile -import textwrap -from dataclasses import dataclass -from pathlib import Path - -import click - - -@dataclass(frozen=True) -class Scenario: - name: str - description: str - callback_name: str - - -SCENARIOS = ( - Scenario( - name="golden-path", - description=( - "Build a task, modify it, accept the change, verify that build skips it, " - "reset it, and verify that build executes it again." - ), - callback_name="scenario_golden_path", - ), - Scenario( - name="accept-ancestors", - description=( - "Target a downstream task, accept it, and show that ancestors are accepted " - "implicitly while descendants are not." - ), - callback_name="scenario_accept_ancestors", - ), - Scenario( - name="clean-subtractive", - description=( - "Remove a stale task, add a new one, run lock clean, and show that clean " - "removes stale entries without accepting new tasks." - ), - callback_name="scenario_clean_subtractive", - ), -) - - -def _write(path: Path, content: str) -> None: - path.write_text(textwrap.dedent(content).lstrip()) - - -def _run(project: Path, *args: str) -> None: - cmd = [sys.executable, "-m", "pytask", *args, project.as_posix()] - click.echo() - click.echo(f"$ {' '.join(cmd)}") - result = subprocess.run( - cmd, cwd=project, capture_output=True, text=True, check=False - ) - if result.stdout: - click.echo(result.stdout.rstrip()) - if result.stderr: - click.echo(result.stderr.rstrip()) - click.echo(f"[exit code: {result.returncode}]") - if result.returncode != 0: - msg = "The demo command failed. Inspect the output above." - raise SystemExit(msg) - - -def _scenario_header(scenario: Scenario, project: Path) -> None: - click.echo() - click.echo("=" * 80) - click.echo(f"{scenario.name}: {scenario.description}") - click.echo(f"project: {project}") - click.echo("=" * 80) - - -def _write_single_task_project(project: Path) -> None: - _write( - project / "task_example.py", - """ - from pathlib import Path - - - def task_example(produces=Path("out.txt")): - produces.write_text("data") - """, - ) - - -def _write_chain_project(project: Path) -> None: - _write( - project / "task_upstream.py", - """ - from pathlib import Path - - - def task_upstream(produces=Path("up.txt")): - produces.write_text("up") - """, - ) - _write( - project / "task_downstream.py", - """ - from pathlib import Path - - - def task_downstream(depends_on=Path("up.txt"), produces=Path("down.txt")): - produces.write_text(depends_on.read_text() + "down") - """, - ) - - -def _write_alpha_beta_project(project: Path) -> None: - _write( - project / "task_alpha.py", - """ - from pathlib import Path - - - def task_alpha(produces=Path("alpha.txt")): - produces.write_text("alpha") - """, - ) - _write( - project / "task_beta.py", - """ - from pathlib import Path - - - def task_beta(produces=Path("beta.txt")): - produces.write_text("beta") - """, - ) - - -def scenario_golden_path(project: Path) -> None: - _write_single_task_project(project) - - _run(project) - - task = project / "task_example.py" - task.write_text(task.read_text() + "\n# changed without rerunning\n") - - _run(project, "lock", "accept", "-k", "example", "--yes") - _run(project) - _run(project, "lock", "reset", "-k", "example", "--yes") - _run(project) - - -def scenario_accept_ancestors(project: Path) -> None: - _write_chain_project(project) - - _run(project) - - upstream = project / "task_upstream.py" - downstream = project / "task_downstream.py" - upstream.write_text(upstream.read_text() + "\n# changed upstream\n") - downstream.write_text(downstream.read_text() + "\n# changed downstream\n") - - _run(project, "lock", "accept", "-k", "downstream", "--yes") - _run(project) - - upstream.write_text(upstream.read_text() + "\n# changed upstream again\n") - downstream.write_text(downstream.read_text() + "\n# changed downstream again\n") - - _run(project, "lock", "accept", "-k", "upstream", "--yes") - _run(project) - - -def scenario_clean_subtractive(project: Path) -> None: - _write_alpha_beta_project(project) - - _run(project) - - (project / "task_alpha.py").unlink() - _write( - project / "task_gamma.py", - """ - from pathlib import Path - - - def task_gamma(produces=Path("gamma.txt")): - produces.write_text("gamma") - """, - ) - - _run(project, "lock", "clean", "--yes") - _run(project) - - -def _parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser() - parser.add_argument( - "--scenario", - choices=[scenario.name for scenario in SCENARIOS] + ["all"], - default="all", - help="Select one scenario or run them all.", - ) - parser.add_argument( - "--keep", - action="store_true", - help="Keep temporary project directories instead of deleting them.", - ) - return parser.parse_args() - - -def main() -> int: - args = _parse_args() - selected = [ - scenario for scenario in SCENARIOS if args.scenario in ("all", scenario.name) - ] - - for scenario in selected: - temp_dir = Path(tempfile.mkdtemp(prefix=f"pytask-{scenario.name}-")) - _scenario_header(scenario, temp_dir) - try: - globals()[scenario.callback_name](temp_dir) - finally: - if args.keep: - click.echo() - click.echo(f"kept project at {temp_dir}") - else: - shutil.rmtree(temp_dir) - - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) From bb1743b29eee071f2565bbc9c8e09c7ff3b8934d Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 26 Apr 2026 17:11:08 +0200 Subject: [PATCH 14/16] refactor methdo --- src/_pytask/lock.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/_pytask/lock.py b/src/_pytask/lock.py index e3e995cc..4723f36b 100644 --- a/src/_pytask/lock.py +++ b/src/_pytask/lock.py @@ -52,6 +52,11 @@ class _PlannedChange: def is_accept(self) -> bool: return self.entry is not None + def describe(self) -> str: + if self.is_accept: + return f"Accept recorded state for {self.task_id}" + return f"Remove recorded state for {self.task_id}" + # Task selection. @@ -190,25 +195,19 @@ def _plan_clean_changes(session: Session) -> list[_PlannedChange]: # Change application. -def _describe_change(change: _PlannedChange) -> str: - if change.is_accept: - return f"Accept recorded state for {change.task_id}" - return f"Remove recorded state for {change.task_id}" - - def _apply_changes( session: Session, planned_changes: list[_PlannedChange] ) -> list[_PlannedChange]: if session.config["dry_run"]: for change in planned_changes: - console.print(f"Would {_describe_change(change).lower()}.") + console.print(f"Would {change.describe().lower()}.") return planned_changes accepted = planned_changes if not session.config["yes"]: accepted = [] for change in planned_changes: - prompt = f"{_describe_change(change)}?" + prompt = f"{change.describe()}?" if click.confirm(prompt, default=False): accepted.append(change) @@ -227,7 +226,7 @@ def _apply_changes( state.flush() for change in accepted: - console.print(f"{_describe_change(change)}.") + console.print(f"{change.describe()}.") return accepted From bc0fd823125c006e12c2d1d44c85bdc53e7714ad Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Mon, 27 Apr 2026 08:29:00 +0200 Subject: [PATCH 15/16] add comments --- src/_pytask/lock.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/_pytask/lock.py b/src/_pytask/lock.py index 4723f36b..e3dbf33d 100644 --- a/src/_pytask/lock.py +++ b/src/_pytask/lock.py @@ -267,6 +267,12 @@ def _run_lock_command( console.print() console.print(empty_message) + # Journal replay can make the lockfile state dirty even if this lock command + # has no net changes. Flush it so replayed entries are persisted and the + # journal is removed. + if not session.config["dry_run"]: + session.config["lockfile_state"].flush() + console.print() console.rule(style="default") except CollectionError: @@ -280,6 +286,8 @@ def _run_lock_command( console.rule(style="failed") session.exit_code = ExitCode.FAILED + # Configuration can fail before the session receives the plugin manager's hook + # relay. A fallback session only has a bare HookRelay without this hook. if hasattr(session.hook, "pytask_unconfigure"): session.hook.pytask_unconfigure(session=session) return session.exit_code From 255db5838367a4b63df763b1a1464d9d87618828 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Fri, 1 May 2026 09:33:38 +0200 Subject: [PATCH 16/16] Restructure lockfile state guide --- docs/source/how_to_guides/index.md | 2 +- .../reconciling_lockfile_state.md | 108 +++++++++--------- 2 files changed, 56 insertions(+), 54 deletions(-) diff --git a/docs/source/how_to_guides/index.md b/docs/source/how_to_guides/index.md index 990e2ad5..c18c0c4d 100644 --- a/docs/source/how_to_guides/index.md +++ b/docs/source/how_to_guides/index.md @@ -10,7 +10,7 @@ specific tasks with pytask. - [Migrating From Scripts To Pytask](migrating_from_scripts_to_pytask.md) - [Interfaces For Dependencies Products](interfaces_for_dependencies_products.md) - [Portability](portability.md) -- [Reconciling Lockfile State](reconciling_lockfile_state.md) +- [Update the Lockfile to Match Project State](reconciling_lockfile_state.md) - [Remote Files](remote_files.md) - [Functional Interface](functional_interface.md) - [Capture Warnings](capture_warnings.md) diff --git a/docs/source/how_to_guides/reconciling_lockfile_state.md b/docs/source/how_to_guides/reconciling_lockfile_state.md index 85491984..135c2496 100644 --- a/docs/source/how_to_guides/reconciling_lockfile_state.md +++ b/docs/source/how_to_guides/reconciling_lockfile_state.md @@ -1,43 +1,30 @@ -# Reconciling Lockfile State +# Update the Lockfile to Match Project State Use [`pytask lock`](../reference_guides/commands.md#pytask-lock) when the current files -in the project are already correct and only the recorded state in `pytask.lock` needs to -catch up. +and outputs in the project are already correct, but the recorded state in `pytask.lock` +needs to catch up. This can happen after refactoring task files, moving or renaming +tasks, producing outputs outside of pytask, or deleting tasks. -## When is this useful? +## Accept current files and outputs -Typical situations are: - -- You reformatted or reorganized a task file and do not want to rerun an expensive task. -- You renamed or moved a task and want to accept the current outputs for the new task. -- You produced outputs outside of pytask and now want to register the task along the - outputs in the lockfile. -- You deleted or renamed tasks and want to remove their stale lockfile entries. +Use [`pytask lock accept`](../reference_guides/commands.md#pytask-lock-accept) when the +current dependencies, products, and task definition are already correct and should +become the new recorded state. -## Preview changes first +Preview the changes without writing them with `--dry-run`: -By default, `pytask lock` runs interactively. It shows the planned changes and then asks -for confirmation one by one. Only entries which would actually change appear in the -prompt sequence. +--8<-- "docs/source/_static/md/lock-accept-dry-run.md" -To preview changes without writing them, use `--dry-run`: +Then accept the planned changes interactively: ---8<-- "docs/source/_static/md/lock-accept-dry-run.md" +--8<-- "docs/source/_static/md/lock-accept-interactive.md" -To apply all planned changes without prompting, use `--yes`: +Add `--yes` to apply all planned changes without prompting: ```console $ pytask lock accept -k train --yes ``` -## Accept the current state - -Use [`pytask lock accept`](../reference_guides/commands.md#pytask-lock-accept) when the -current dependencies, products, and task definition are already correct and should -become the new recorded state. - ---8<-- "docs/source/_static/md/lock-accept-interactive.md" - If no selectors are provided, `pytask lock accept` applies to all collected tasks in the provided paths. @@ -60,58 +47,73 @@ $ pytask lock accept -k "train or evaluate" If a selected task is missing a required dependency or product, the command fails instead of accepting incomplete state. -## Reset recorded state +Run a build afterwards to check that unchanged tasks are skipped according to the +updated lockfile. + +```console +$ pytask build +``` + +## Reset state for selected tasks Use [`pytask lock reset`](../reference_guides/commands.md#pytask-lock-reset) to remove -recorded state for selected tasks. The following command removes the recorded state for -all tasks. +recorded state for selected tasks when state was accepted too broadly or when specific +tasks should be reconsidered from scratch. ```console -$ pytask lock reset +$ pytask lock reset -k train ``` Unlike `accept`, `reset` with a selector works on the exact selected tasks. It does not automatically include ancestors. +Preview the reset with `--dry-run` if you want to check the affected tasks first: + ```console -$ pytask lock reset -k train +$ pytask lock reset -k train --dry-run +``` + +Add `--yes` to remove all planned entries without prompting: + +```console +$ pytask lock reset -k train --yes ``` -On the next build, `pytask` determines again whether these tasks require execution. This -is useful when state was accepted too broadly or when you want a specific task to be -reconsidered from scratch. +If no selectors are provided, `pytask lock reset` removes the recorded state for all +collected tasks in the provided paths. + +Run a build afterwards so `pytask` determines again whether the selected tasks require +execution. + +```console +$ pytask build +``` -## Remove stale lockfile entries +## Remove stale entries for deleted or moved tasks Use [`pytask lock clean`](../reference_guides/commands.md#pytask-lock-clean) to remove entries from the lockfile which no longer correspond to collected tasks in the current -project. +project. This is useful after deleting, renaming, or moving tasks when old entries +should no longer remain in the lockfile. ---8<-- "docs/source/_static/md/lock-clean.md" +Preview stale entries without writing them with `--dry-run`: -This is useful after deleting, renaming, or moving tasks when old entries should no -longer remain in the lockfile. +```console +$ pytask lock clean --dry-run +``` -## Example workflow +Then remove stale entries interactively: -One common workflow looks like this: +--8<-- "docs/source/_static/md/lock-clean.md" -1. Run a normal build once. -1. Change a task file in a way that should not force a rerun. -1. Accept the current state. -1. Verify that a later build skips the task. -1. Reset the task if you want `pytask` to reconsider it again. +Add `--yes` to remove all stale entries without prompting: ```console -$ pytask build -$ pytask lock accept -k train --yes -$ pytask build -$ pytask lock reset -k train --yes -$ pytask build +$ pytask lock clean --yes ``` -After `accept`, the next build skips unchanged tasks according to the updated lockfile. -After `reset`, the selected tasks are reconsidered on the next build. +`clean` only removes entries for tasks which are no longer collected. It does not accept +or update the current state of collected tasks. ## Related