From 19325a1d461a936ff748ec33bbab6de01c0c8b48 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 13 Mar 2024 16:22:09 +0100 Subject: [PATCH 01/11] Skip collection of `MarkGenerator` when using `from pytask import mark`. (#576) --- docs/source/changes.md | 5 +++++ docs/source/reference_guides/api.md | 6 ++---- .../tutorials/repeating_tasks_with_different_inputs.md | 6 ++---- docs/source/tutorials/selecting_tasks.md | 3 +-- docs/source/tutorials/skipping_tasks.md | 5 +++-- docs/source/tutorials/write_a_task.md | 6 ++---- pyproject.toml | 2 +- src/_pytask/collect.py | 5 +++++ tests/test_collect.py | 1 + 9 files changed, 22 insertions(+), 17 deletions(-) diff --git a/docs/source/changes.md b/docs/source/changes.md index 0556fdc2d..3a0a82700 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -5,6 +5,11 @@ chronological order. Releases follow [semantic versioning](https://semver.org/) releases are available on [PyPI](https://pypi.org/project/pytask) and [Anaconda.org](https://anaconda.org/conda-forge/pytask). +## 0.4.6 - 2024-03-13 + +- {pull}`576` fixes accidentally collecting `pytask.MarkGenerator` when using + `from pytask import mark`. + ## 0.4.5 - 2024-01-09 - {pull}`515` enables tests with graphviz in CI. Thanks to {user}`NickCrews`. diff --git a/docs/source/reference_guides/api.md b/docs/source/reference_guides/api.md index b7efd0894..284d0f792 100644 --- a/docs/source/reference_guides/api.md +++ b/docs/source/reference_guides/api.md @@ -177,8 +177,7 @@ For example: ```python @pytask.mark.timeout(10, "slow", method="thread") -def task_function(): - ... +def task_function(): ... ``` Will create and attach a {class}`Mark ` object to the collected @@ -195,8 +194,7 @@ Example for using multiple custom markers: ```python @pytask.mark.timeout(10, "slow", method="thread") @pytask.mark.slow -def task_function(): - ... +def task_function(): ... ``` ### Classes diff --git a/docs/source/tutorials/repeating_tasks_with_different_inputs.md b/docs/source/tutorials/repeating_tasks_with_different_inputs.md index 5fed02c73..aca24cb18 100644 --- a/docs/source/tutorials/repeating_tasks_with_different_inputs.md +++ b/docs/source/tutorials/repeating_tasks_with_different_inputs.md @@ -328,8 +328,7 @@ the {func}`@task ` decorator to pass keyword arguments to the task. for id_, kwargs in ID_TO_KWARGS.items(): @task(id=id_, kwargs=kwargs) - def task_create_random_data(seed, produces): - ... + def task_create_random_data(seed, produces): ... ``` Writing a function that creates `ID_TO_KWARGS` would be even more pythonic. @@ -349,8 +348,7 @@ ID_TO_KWARGS = create_parametrization() for id_, kwargs in ID_TO_KWARGS.items(): @task(id=id_, kwargs=kwargs) - def task_create_random_data(i, produces): - ... + def task_create_random_data(i, produces): ... ``` The {doc}`best-practices guide on parametrizations <../how_to_guides/bp_scaling_tasks>` diff --git a/docs/source/tutorials/selecting_tasks.md b/docs/source/tutorials/selecting_tasks.md index 266b47bf8..23b069034 100644 --- a/docs/source/tutorials/selecting_tasks.md +++ b/docs/source/tutorials/selecting_tasks.md @@ -91,8 +91,7 @@ from pytask import task for i in range(2): @task - def task_parametrized(i=i): - ... + def task_parametrized(i=i): ... ``` To run the task where `i = 1`, run this command. diff --git a/docs/source/tutorials/skipping_tasks.md b/docs/source/tutorials/skipping_tasks.md index 7278ee3e4..e223b1cdd 100644 --- a/docs/source/tutorials/skipping_tasks.md +++ b/docs/source/tutorials/skipping_tasks.md @@ -42,8 +42,9 @@ from config import NO_LONG_RUNNING_TASKS @pytask.mark.skipif(NO_LONG_RUNNING_TASKS, reason="Skip long-running tasks.") -def task_that_takes_really_long_to_run(path: Path = Path("time_intensive_product.pkl")): - ... +def task_that_takes_really_long_to_run( + path: Path = Path("time_intensive_product.pkl"), +): ... ``` ## Further reading diff --git a/docs/source/tutorials/write_a_task.md b/docs/source/tutorials/write_a_task.md index 878f37cf0..f2815449b 100644 --- a/docs/source/tutorials/write_a_task.md +++ b/docs/source/tutorials/write_a_task.md @@ -136,16 +136,14 @@ from pytask import task @task -def create_random_data(): - ... +def create_random_data(): ... # The id will be ".../task_data_preparation.py::create_data". @task(name="create_data") -def create_random_data(): - ... +def create_random_data(): ... ``` ```{warning} diff --git a/pyproject.toml b/pyproject.toml index 4a42ab3d3..6ca9d6a5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ name = "Tobias Raabe" email = "raabe@posteo.de" [project.optional-dependencies] -all = ['universal-pathlib; python_version<"3.12"'] +all = ['universal-pathlib<0.2; python_version<"3.12"'] docs = [ "furo", "ipython", diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index 3b2625684..afa1c9bc3 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -21,6 +21,7 @@ from _pytask.console import get_file from _pytask.exceptions import CollectionError from _pytask.exceptions import NodeNotCollectedError +from _pytask.mark import MarkGenerator from _pytask.mark_utils import get_all_marks from _pytask.mark_utils import has_mark from _pytask.node_protocols import PNode @@ -235,6 +236,10 @@ def _is_filtered_object(obj: Any) -> bool: See :issue:`507`. """ + # Filter :class:`pytask.mark.MarkGenerator` which can raise errors on some marks. + if isinstance(obj, MarkGenerator): + return True + # Filter :class:`pytask.Task` and :class:`pytask.TaskWithoutPath` objects. if isinstance(obj, PTask) and inspect.isclass(obj): return True diff --git a/tests/test_collect.py b/tests/test_collect.py index 6d01eec64..58bd99dfe 100644 --- a/tests/test_collect.py +++ b/tests/test_collect.py @@ -692,6 +692,7 @@ def __getattr__(self, name): result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == ExitCode.OK + assert "attr_that_definitely_does_not_exist" not in result.output @pytest.mark.end_to_end() From 72f7289fc8a0d17f23dca0f9548fbc5d0962adbc Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 19 Mar 2024 20:20:16 +0100 Subject: [PATCH 02/11] Backport #580 to v0.4.7. (#580) --- docs/source/changes.md | 4 ++++ src/_pytask/debugging.py | 6 +++-- tests/test_debugging.py | 47 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/docs/source/changes.md b/docs/source/changes.md index 3a0a82700..3528db2f2 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -5,6 +5,10 @@ chronological order. Releases follow [semantic versioning](https://semver.org/) releases are available on [PyPI](https://pypi.org/project/pytask) and [Anaconda.org](https://anaconda.org/conda-forge/pytask). +## 0.4.7 - 2024-03-19 + +- {pull}`580` is a backport of {pull}`579`. + ## 0.4.6 - 2024-03-13 - {pull}`576` fixes accidentally collecting `pytask.MarkGenerator` when using diff --git a/src/_pytask/debugging.py b/src/_pytask/debugging.py index c95f79d32..5d5c9e1b5 100644 --- a/src/_pytask/debugging.py +++ b/src/_pytask/debugging.py @@ -328,7 +328,7 @@ def wrapper(*args: Any, **kwargs: Any) -> None: capman = session.config["pm"].get_plugin("capturemanager") live_manager = session.config["pm"].get_plugin("live_manager") try: - task_function(*args, **kwargs) + return task_function(*args, **kwargs) except Exception: # Order is important! Pausing the live object before the capturemanager @@ -409,11 +409,13 @@ def wrapper(*args: Any, **kwargs: Any) -> None: console.rule("Captured stderr", style="default") console.print(err) - _pdb.runcall(task_function, *args, **kwargs) + out = _pdb.runcall(task_function, *args, **kwargs) live_manager.resume() capman.resume() + return out + task.function = wrapper diff --git a/tests/test_debugging.py b/tests/test_debugging.py index 77802a623..3dd49599a 100644 --- a/tests/test_debugging.py +++ b/tests/test_debugging.py @@ -21,6 +21,12 @@ IS_PEXPECT_INSTALLED = True +def _escape_ansi(line): + """Escape ANSI sequences produced by rich.""" + ansi_escape = re.compile(r"(?:\x1B[@-_]|[\x80-\x9F])[0-?]*[ -/]*[@-~]") + return ansi_escape.sub("", line) + + @pytest.mark.unit() @pytest.mark.parametrize( ("value", "expected", "expectation"), @@ -487,7 +493,40 @@ def test_function(): _flush(child) -def _escape_ansi(line): - """Escape ANSI sequences produced by rich.""" - ansi_escape = re.compile(r"(?:\x1B[@-_]|[\x80-\x9F])[0-?]*[ -/]*[@-~]") - return ansi_escape.sub("", line) +@pytest.mark.end_to_end() +@pytest.mark.skipif(not IS_PEXPECT_INSTALLED, reason="pexpect is not installed.") +@pytest.mark.skipif(sys.platform == "win32", reason="pexpect cannot spawn on Windows.") +def test_pdb_with_task_that_returns(tmp_path, runner): + source = """ + from typing_extensions import Annotated + from pathlib import Path + + def task_example() -> Annotated[str, Path("data.txt")]: + return "1" + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + + result = runner.invoke(cli, [tmp_path.as_posix(), "--pdb"]) + assert result.exit_code == ExitCode.OK + assert tmp_path.joinpath("data.txt").read_text() == "1" + + +@pytest.mark.end_to_end() +@pytest.mark.skipif(not IS_PEXPECT_INSTALLED, reason="pexpect is not installed.") +@pytest.mark.skipif(sys.platform == "win32", reason="pexpect cannot spawn on Windows.") +def test_trace_with_task_that_returns(tmp_path): + source = """ + from typing_extensions import Annotated + from pathlib import Path + + def task_example() -> Annotated[str, Path("data.txt")]: + return "1" + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + + child = pexpect.spawn(f"pytask {tmp_path.as_posix()}") + child.sendline("c") + rest = child.read().decode("utf8") + assert "1 Succeeded" in _escape_ansi(rest) + assert tmp_path.joinpath("data.txt").read_text() == "1" + _flush(child) From c158f0a363ef7918f97d2a06cf77436515be0c6a Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Fri, 29 Mar 2024 21:47:58 +0100 Subject: [PATCH 03/11] Add fix for imports. --- src/_pytask/path.py | 128 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/src/_pytask/path.py b/src/_pytask/path.py index 86968b8c0..ee032ad3c 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -4,6 +4,7 @@ import contextlib import functools import importlib.util +import itertools import os import sys from pathlib import Path @@ -122,7 +123,9 @@ def find_case_sensitive_path(path: Path, platform: str) -> Path: return path.resolve() if platform == "win32" else path -def import_path(path: Path, root: Path) -> ModuleType: +def import_path( + path: Path, root: Path, consider_namespace_packages: bool = False +) -> ModuleType: """Import and return a module from the given path. The function is taken from pytest when the import mode is set to ``importlib``. It @@ -130,6 +133,23 @@ def import_path(path: Path, root: Path) -> ModuleType: ``prepend``. More discussion and information can be found in :issue:`373`. """ + try: + pkg_root, module_name = resolve_pkg_root_and_module_name( + path, consider_namespace_packages=consider_namespace_packages + ) + except CouldNotResolvePathError: + pass + else: + # If the given module name is already in sys.modules, do not import it again. + with contextlib.suppress(KeyError): + return sys.modules[module_name] + + mod = _import_module_using_spec( + module_name, path, pkg_root, insert_modules=False + ) + if mod is not None: + return mod + module_name = _module_name_from_path(path, root) with contextlib.suppress(KeyError): return sys.modules[module_name] @@ -147,6 +167,112 @@ def import_path(path: Path, root: Path) -> ModuleType: return mod +def resolve_package_path(path: Path) -> Path | None: + """Return the Python package path by looking for the last + directory upwards which still contains an __init__.py. + + Returns None if it can not be determined. + """ + result = None + for parent in itertools.chain((path,), path.parents): + if parent.is_dir(): + if not (parent / "__init__.py").is_file(): + break + if not parent.name.isidentifier(): + break + result = parent + return result + + +def resolve_pkg_root_and_module_name( + path: Path, *, consider_namespace_packages: bool = False +) -> tuple[Path, str]: + """ + Return the path to the directory of the root package that contains the + given Python file, and its module name: + + src/ + app/ + __init__.py + core/ + __init__.py + models.py + + Passing the full path to `models.py` will yield Path("src") and "app.core.models". + + If consider_namespace_packages is True, then we additionally check upwards in the hierarchy + until we find a directory that is reachable from sys.path, which marks it as a namespace package: + + https://packaging.python.org/en/latest/guides/packaging-namespace-packages + + Raises CouldNotResolvePathError if the given path does not belong to a package (missing any __init__.py files). + """ + pkg_path = resolve_package_path(path) + if pkg_path is not None: + pkg_root = pkg_path.parent + # https://packaging.python.org/en/latest/guides/packaging-namespace-packages/ + if consider_namespace_packages: + # Go upwards in the hierarchy, if we find a parent path included + # in sys.path, it means the package found by resolve_package_path() + # actually belongs to a namespace package. + for parent in pkg_root.parents: + # If any of the parent paths has a __init__.py, it means it is not + # a namespace package (see the docs linked above). + if (parent / "__init__.py").is_file(): + break + if str(parent) in sys.path: + # Point the pkg_root to the root of the namespace package. + pkg_root = parent + break + + names = list(path.with_suffix("").relative_to(pkg_root).parts) + if names[-1] == "__init__": + names.pop() + module_name = ".".join(names) + return pkg_root, module_name + + msg = f"Could not resolve for {path}" + raise CouldNotResolvePathError(msg) + + +class CouldNotResolvePathError(Exception): + """Custom exception raised by resolve_pkg_root_and_module_name.""" + + +def _import_module_using_spec( + module_name: str, module_path: Path, module_location: Path, *, insert_modules: bool +) -> ModuleType | None: + """ + Tries to import a module by its canonical name, path to the .py file, and its + parent location. + + :param insert_modules: + If True, will call insert_missing_modules to create empty intermediate modules + for made-up module names (when importing test files not reachable from sys.path). + Note: we can probably drop insert_missing_modules altogether: instead of + generating module names such as "src.tests.test_foo", which require intermediate + empty modules, we might just as well generate unique module names like + "src_tests_test_foo". + """ + # Checking with sys.meta_path first in case one of its hooks can import this module, + # such as our own assertion-rewrite hook. + for meta_importer in sys.meta_path: + spec = meta_importer.find_spec(module_name, [str(module_location)]) + if spec is not None: + break + else: + spec = importlib.util.spec_from_file_location(module_name, str(module_path)) + if spec is not None: + mod = importlib.util.module_from_spec(spec) + sys.modules[module_name] = mod + spec.loader.exec_module(mod) # type: ignore[union-attr] + if insert_modules: + _insert_missing_modules(sys.modules, module_name) + return mod + + return None + + def _module_name_from_path(path: Path, root: Path) -> str: """Return a dotted module name based on the given path, anchored on root. From 17a34d291bb47e6e60a8b7223545f7e617ddf374 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 31 Mar 2024 00:03:02 +0100 Subject: [PATCH 04/11] make integrations. --- environment.yml | 1 - src/_pytask/path.py | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/environment.yml b/environment.yml index 1cd3b6fa6..45ca67a46 100644 --- a/environment.yml +++ b/environment.yml @@ -50,5 +50,4 @@ dependencies: - sphinxext-opengraph - pip: - - sphinx-toolbox - -e . diff --git a/src/_pytask/path.py b/src/_pytask/path.py index f4e4c0761..5771d3093 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -134,7 +134,7 @@ def import_path( """ try: - pkg_root, module_name = resolve_pkg_root_and_module_name( + pkg_root, module_name = _resolve_pkg_root_and_module_name( path, consider_namespace_packages=consider_namespace_packages ) except CouldNotResolvePathError: @@ -167,7 +167,7 @@ def import_path( return mod -def resolve_package_path(path: Path) -> Path | None: +def _resolve_package_path(path: Path) -> Path | None: """Resolve package path. Return the Python package path by looking for the last directory upwards which still @@ -187,7 +187,7 @@ def resolve_package_path(path: Path) -> Path | None: return result -def resolve_pkg_root_and_module_name( +def _resolve_pkg_root_and_module_name( path: Path, *, consider_namespace_packages: bool = False ) -> tuple[Path, str]: """Resolve the root package directory and module name for the given Python file. @@ -213,13 +213,13 @@ def resolve_pkg_root_and_module_name( Raises CouldNotResolvePathError if the given path does not belong to a package (missing any __init__.py files). """ - pkg_path = resolve_package_path(path) + pkg_path = _resolve_package_path(path) if pkg_path is not None: pkg_root = pkg_path.parent # https://packaging.python.org/en/latest/guides/packaging-namespace-packages/ if consider_namespace_packages: # Go upwards in the hierarchy, if we find a parent path included in - # sys.path, it means the package found by resolve_package_path() actually + # sys.path, it means the package found by _resolve_package_path() actually # belongs to a namespace package. for parent in pkg_root.parents: # If any of the parent paths has a __init__.py, it means it is not a @@ -242,7 +242,7 @@ def resolve_pkg_root_and_module_name( class CouldNotResolvePathError(Exception): - """Custom exception raised by resolve_pkg_root_and_module_name.""" + """Custom exception raised by _resolve_pkg_root_and_module_name.""" def _import_module_using_spec( From 5f27c25b53ba2573b80a676e228d192fa8b52c1b Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 31 Mar 2024 00:20:05 +0100 Subject: [PATCH 05/11] FIx. --- environment.yml | 1 - src/_pytask/path.py | 15 +- tests/test_path.py | 504 ++++++++++++++++++++++++++++++-------------- 3 files changed, 359 insertions(+), 161 deletions(-) diff --git a/environment.yml b/environment.yml index 45ca67a46..e773cd340 100644 --- a/environment.yml +++ b/environment.yml @@ -29,7 +29,6 @@ dependencies: - jupyterlab - matplotlib - nbmake - - pre-commit - pygraphviz - pytest - pytest-cov diff --git a/src/_pytask/path.py b/src/_pytask/path.py index 5771d3093..665cc51c5 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -8,6 +8,7 @@ import itertools import os import sys +from enum import Enum from pathlib import Path from types import ModuleType from typing import Sequence @@ -123,8 +124,18 @@ def find_case_sensitive_path(path: Path, platform: str) -> Path: return path.resolve() if platform == "win32" else path +class ImportMode(Enum): + """Enum to specify the import mode.""" + + importlib = "importlib" + + def import_path( - path: Path, root: Path, consider_namespace_packages: bool = False + path: Path, + root: Path, + *, + consider_namespace_packages: bool = False, + mode: ImportMode = ImportMode.importlib, ) -> ModuleType: """Import and return a module from the given path. @@ -133,6 +144,8 @@ def import_path( ``prepend``. More discussion and information can be found in :issue:`373`. """ + mode = ImportMode(mode) + try: pkg_root, module_name = _resolve_pkg_root_and_module_name( path, consider_namespace_packages=consider_namespace_packages diff --git a/tests/test_path.py b/tests/test_path.py index d79f4ffad..36d39e9ab 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -10,8 +10,11 @@ from typing import Any import pytest +from _pytask.path import CouldNotResolvePathError +from _pytask.path import ImportMode from _pytask.path import _insert_missing_modules from _pytask.path import _module_name_from_path +from _pytask.path import _resolve_pkg_root_and_module_name from _pytask.path import find_case_sensitive_path from _pytask.path import find_closest_ancestor from _pytask.path import find_common_ancestor @@ -178,183 +181,366 @@ def test_no_meta_path_found( import_path(simple_module, root=tmp_path) -@pytest.mark.unit() -def test_importmode_importlib_with_dataclass(tmp_path: Path) -> None: +@pytest.fixture(params=[True, False]) +def ns_param(request: pytest.FixtureRequest) -> bool: """ - Ensure that importlib mode works with a module containing dataclasses (#373, - pytest#7856). + Simple parametrized fixture for tests which call import_path() with + consider_namespace_packages using True and False. """ - fn = tmp_path.joinpath("_src/project/task_dataclass.py") - fn.parent.mkdir(parents=True) - fn.write_text( - textwrap.dedent( - """ - from dataclasses import dataclass - - @dataclass - class Data: - value: str - """ - ) - ) - - module = import_path(fn, root=tmp_path) - Data: Any = module.Data # noqa: N806 - data = Data(value="foo") - assert data.value == "foo" - assert data.__module__ == "_src.project.task_dataclass" + return bool(request.param) @pytest.mark.unit() -def test_importmode_importlib_with_pickle(tmp_path: Path) -> None: - """Ensure that importlib mode works with pickle (#373, pytest#7859).""" - fn = tmp_path.joinpath("_src/project/task_pickle.py") - fn.parent.mkdir(parents=True) - fn.write_text( - textwrap.dedent( - """ - import pickle - - def _action(): - return 42 - - def round_trip(): - s = pickle.dumps(_action) - return pickle.loads(s) - """ +class TestImportLibMode: + def test_importmode_importlib_with_dataclass( + self, tmp_path: Path, ns_param: bool + ) -> None: + """ + Ensure that importlib mode works with a module containing dataclasses (#373, + pytest#7856). + """ + fn = tmp_path.joinpath("_src/project/task_dataclass.py") + fn.parent.mkdir(parents=True) + fn.write_text( + textwrap.dedent( + """ + from dataclasses import dataclass + + @dataclass + class Data: + value: str + """ + ) ) - ) - module = import_path(fn, root=tmp_path) - round_trip = module.round_trip - action = round_trip() - assert action() == 42 + module = import_path(fn, mode="importlib", root=tmp_path) + Data: Any = module.Data # noqa: N806 + data = Data(value="foo") + assert data.value == "foo" + assert data.__module__ == "_src.project.task_dataclass" - -@pytest.mark.unit() -def test_importmode_importlib_with_pickle_separate_modules(tmp_path: Path) -> None: - """ - Ensure that importlib mode works can load pickles that look similar but are - defined in separate modules. - """ - fn1 = tmp_path.joinpath("_src/m1/project/task.py") - fn1.parent.mkdir(parents=True) - fn1.write_text( - textwrap.dedent( - """ - import dataclasses - import pickle - - @dataclasses.dataclass - class Data: - x: int = 42 - """ + # Ensure we do not import the same module again (pytest#11475). + module2 = import_path( + fn, mode="importlib", root=tmp_path, consider_namespace_packages=ns_param ) - ) - - fn2 = tmp_path.joinpath("_src/m2/project/task.py") - fn2.parent.mkdir(parents=True) - fn2.write_text( - textwrap.dedent( - """ - import dataclasses - import pickle - - @dataclasses.dataclass - class Data: - x: str = "" - """ + assert module is module2 + + def test_importmode_importlib_with_pickle( + self, tmp_path: Path, ns_param: bool + ) -> None: + """Ensure that importlib mode works with pickle (#373, pytest#7859).""" + fn = tmp_path.joinpath("_src/project/task_pickle.py") + fn.parent.mkdir(parents=True) + fn.write_text( + textwrap.dedent( + """ + import pickle + + def _action(): + return 42 + + def round_trip(): + s = pickle.dumps(_action) + return pickle.loads(s) + """ + ) ) - ) - - import pickle - - def round_trip(obj): - s = pickle.dumps(obj) - return pickle.loads(s) # noqa: S301 - - module = import_path(fn1, root=tmp_path) - Data1 = module.Data # noqa: N806 - - module = import_path(fn2, root=tmp_path) - Data2 = module.Data # noqa: N806 - - assert round_trip(Data1(20)) == Data1(20) - assert round_trip(Data2("hello")) == Data2("hello") - assert Data1.__module__ == "_src.m1.project.task" - assert Data2.__module__ == "_src.m2.project.task" + module = import_path( + fn, mode="importlib", root=tmp_path, consider_namespace_packages=ns_param + ) + round_trip = module.round_trip + action = round_trip() + assert action() == 42 + + def test_importmode_importlib_with_pickle_separate_modules( + self, tmp_path: Path, ns_param: bool + ) -> None: + """ + Ensure that importlib mode works can load pickles that look similar but are + defined in separate modules. + """ + fn1 = tmp_path.joinpath("_src/m1/project/task.py") + fn1.parent.mkdir(parents=True) + fn1.write_text( + textwrap.dedent( + """ + import dataclasses + import pickle + + @dataclasses.dataclass + class Data: + x: int = 42 + """ + ) + ) -@pytest.mark.unit() -def test_module_name_from_path(tmp_path: Path) -> None: - result = _module_name_from_path(tmp_path / "src/project/task_foo.py", tmp_path) - assert result == "src.project.task_foo" + fn2 = tmp_path.joinpath("_src/m2/project/task.py") + fn2.parent.mkdir(parents=True) + fn2.write_text( + textwrap.dedent( + """ + import dataclasses + import pickle + + @dataclasses.dataclass + class Data: + x: str = "" + """ + ) + ) - # Path is not relative to root dir: use the full path to obtain the module name. - result = _module_name_from_path(Path("/home/foo/task_foo.py"), Path("/bar")) - assert result == "home.foo.task_foo" + import pickle - # Importing __init__.py files should return the package as module name. - result = _module_name_from_path(tmp_path / "src/app/__init__.py", tmp_path) - assert result == "src.app" + def round_trip(obj): + s = pickle.dumps(obj) + return pickle.loads(s) # noqa: S301 + module = import_path( + fn1, mode="importlib", root=tmp_path, consider_namespace_packages=ns_param + ) + Data1 = module.Data # noqa: N806 -@pytest.mark.unit() -def test_insert_missing_modules( - monkeypatch: pytest.MonkeyPatch, tmp_path: Path -) -> None: - monkeypatch.chdir(tmp_path) - # Use 'xxx' and 'xxy' as parent names as they are unlikely to exist and - # don't end up being imported. - modules = {"xxx.project.foo": ModuleType("xxx.project.foo")} - _insert_missing_modules(modules, "xxx.project.foo") - assert sorted(modules) == ["xxx", "xxx.project", "xxx.project.foo"] + module = import_path( + fn2, mode="importlib", root=tmp_path, consider_namespace_packages=ns_param + ) + Data2 = module.Data # noqa: N806 + + assert round_trip(Data1(20)) == Data1(20) + assert round_trip(Data2("hello")) == Data2("hello") + assert Data1.__module__ == "_src.m1.project.task" + assert Data2.__module__ == "_src.m2.project.task" + + def test_module_name_from_path(self, tmp_path: Path) -> None: + result = _module_name_from_path(tmp_path / "src/project/task_foo.py", tmp_path) + assert result == "src.project.task_foo" + + # Path is not relative to root dir: use the full path to obtain the module name. + result = _module_name_from_path(Path("/home/foo/task_foo.py"), Path("/bar")) + assert result == "home.foo.task_foo" + + # Importing __init__.py files should return the package as module name. + result = _module_name_from_path(tmp_path / "src/app/__init__.py", tmp_path) + assert result == "src.app" + + # Unless __init__.py file is at the root, in which case we cannot have an empty + # module name. + result = _module_name_from_path(tmp_path / "__init__.py", tmp_path) + assert result == "__init__" + + # Modules which start with "." are considered relative and will not be imported + # unless part of a package, so we replace it with a "_" when generating the fake + # module name. + result = _module_name_from_path(tmp_path / ".env/tasks/task_foo.py", tmp_path) + assert result == "_env.tasks.task_foo" + + # We want to avoid generating extra intermediate modules if some directory just + # happens to contain a "." in the name. + result = _module_name_from_path( + tmp_path / ".env.310/tasks/task_foo.py", tmp_path + ) + assert result == "_env_310.tasks.task_foo" + + def test_resolve_pkg_root_and_module_name( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + # Create a directory structure first without __init__.py files. + (tmp_path / "src/app/core").mkdir(parents=True) + models_py = tmp_path / "src/app/core/models.py" + models_py.touch() + with pytest.raises(CouldNotResolvePathError): + _ = _resolve_pkg_root_and_module_name(models_py) + + # Create the __init__.py files, it should now resolve to a proper module name. + (tmp_path / "src/app/__init__.py").touch() + (tmp_path / "src/app/core/__init__.py").touch() + assert _resolve_pkg_root_and_module_name( + models_py, consider_namespace_packages=True + ) == (tmp_path / "src", "app.core.models") + + # If we add tmp_path to sys.path, src becomes a namespace package. + monkeypatch.syspath_prepend(tmp_path) + assert _resolve_pkg_root_and_module_name( + models_py, consider_namespace_packages=True + ) == (tmp_path, "src.app.core.models") + assert _resolve_pkg_root_and_module_name( + models_py, consider_namespace_packages=False + ) == (tmp_path / "src", "app.core.models") + + def test_insert_missing_modules( + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path + ) -> None: + monkeypatch.chdir(tmp_path) + # Use 'xxx' and 'xxy' as parent names as they are unlikely to exist and + # don't end up being imported. + modules = {"xxx.project.foo": ModuleType("xxx.project.foo")} + _insert_missing_modules(modules, "xxx.project.foo") + assert sorted(modules) == ["xxx", "xxx.project", "xxx.project.foo"] + + mod = ModuleType("mod", doc="My Module") + modules = {"xxy": mod} + _insert_missing_modules(modules, "xxy") + assert modules == {"xxy": mod} + + modules = {} + _insert_missing_modules(modules, "") + assert not modules + + def test_parent_contains_child_module_attribute( + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path + ): + monkeypatch.chdir(tmp_path) + # Use 'xxx' and 'xxy' as parent names as they are unlikely to exist and + # don't end up being imported. + modules = {"xxx.tasks.foo": ModuleType("xxx.tasks.foo")} + _insert_missing_modules(modules, "xxx.tasks.foo") + assert sorted(modules) == ["xxx", "xxx.tasks", "xxx.tasks.foo"] + assert modules["xxx"].tasks is modules["xxx.tasks"] + assert modules["xxx.tasks"].foo is modules["xxx.tasks.foo"] + + def test_importlib_package( + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, ns_param: bool + ) -> None: + """ + Importing a package using --importmode=importlib should not import the + package's __init__.py file more than once (#11306). + """ + monkeypatch.chdir(tmp_path) + monkeypatch.syspath_prepend(tmp_path) + + package_name = "importlib_import_package" + tmp_path.joinpath(package_name).mkdir() + init = tmp_path.joinpath(f"{package_name}/__init__.py") + init.write_text( + textwrap.dedent( + """ + from .singleton import Singleton + instance = Singleton() + """ + ), + encoding="ascii", + ) + singleton = tmp_path.joinpath(f"{package_name}/singleton.py") + singleton.write_text( + textwrap.dedent( + """ + class Singleton: + INSTANCES = [] + def __init__(self) -> None: + self.INSTANCES.append(self) + if len(self.INSTANCES) > 1: + raise RuntimeError("Already initialized") + """ + ), + encoding="ascii", + ) - mod = ModuleType("mod", doc="My Module") - modules = {"xxy": mod} - _insert_missing_modules(modules, "xxy") - assert modules == {"xxy": mod} + mod = import_path( + init, + root=tmp_path, + mode=ImportMode.importlib, + consider_namespace_packages=ns_param, + ) + assert len(mod.instance.INSTANCES) == 1 + # Ensure we do not import the same module again (#11475). + mod2 = import_path( + init, + root=tmp_path, + mode=ImportMode.importlib, + consider_namespace_packages=ns_param, + ) + assert mod is mod2 + + +class TestNamespacePackages: + """Test import_path support when importing from properly namespace packages.""" + + def setup_directories( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> tuple[Path, Path]: + # Set up a namespace package "com.company", containing + # two subpackages, "app" and "calc". + (tmp_path / "src/dist1/com/company/app/core").mkdir(parents=True) + (tmp_path / "src/dist1/com/company/app/__init__.py").touch() + (tmp_path / "src/dist1/com/company/app/core/__init__.py").touch() + models_py = tmp_path / "src/dist1/com/company/app/core/models.py" + models_py.touch() + + (tmp_path / "src/dist2/com/company/calc/algo").mkdir(parents=True) + (tmp_path / "src/dist2/com/company/calc/__init__.py").touch() + (tmp_path / "src/dist2/com/company/calc/algo/__init__.py").touch() + algorithms_py = tmp_path / "src/dist2/com/company/calc/algo/algorithms.py" + algorithms_py.touch() + + monkeypatch.syspath_prepend(tmp_path / "src/dist1") + monkeypatch.syspath_prepend(tmp_path / "src/dist2") + return models_py, algorithms_py + + @pytest.mark.parametrize("import_mode", ImportMode) + def test_resolve_pkg_root_and_module_name_ns_multiple_levels( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, import_mode: ImportMode + ) -> None: + models_py, algorithms_py = self.setup_directories(tmp_path, monkeypatch) + + pkg_root, module_name = _resolve_pkg_root_and_module_name( + models_py, consider_namespace_packages=True + ) + assert (pkg_root, module_name) == ( + tmp_path / "src/dist1", + "com.company.app.core.models", + ) - modules = {} - _insert_missing_modules(modules, "") - assert not modules + mod = import_path( + models_py, mode=import_mode, root=tmp_path, consider_namespace_packages=True + ) + assert mod.__name__ == "com.company.app.core.models" + assert mod.__file__ == str(models_py) + # Ensure we do not import the same module again (#11475). + mod2 = import_path( + models_py, mode=import_mode, root=tmp_path, consider_namespace_packages=True + ) + assert mod is mod2 -@pytest.mark.unit() -def test_importlib_package(monkeypatch: pytest.MonkeyPatch, tmp_path: Path): - """ - Importing a package using --importmode=importlib should not import the - package's __init__.py file more than once (#11306). - """ - monkeypatch.chdir(tmp_path) - monkeypatch.syspath_prepend(tmp_path) - - package_name = "importlib_import_package" - tmp_path.joinpath(package_name).mkdir() - init = tmp_path.joinpath(f"{package_name}/__init__.py") - init.write_text( - textwrap.dedent( - """ - from .singleton import Singleton - instance = Singleton() - """ - ), - encoding="ascii", - ) - singleton = tmp_path.joinpath(f"{package_name}/singleton.py") - singleton.write_text( - textwrap.dedent( - """ - class Singleton: - INSTANCES = [] - def __init__(self) -> None: - self.INSTANCES.append(self) - if len(self.INSTANCES) > 1: - raise RuntimeError("Already initialized") - """ - ), - encoding="ascii", - ) + pkg_root, module_name = _resolve_pkg_root_and_module_name( + algorithms_py, consider_namespace_packages=True + ) + assert (pkg_root, module_name) == ( + tmp_path / "src/dist2", + "com.company.calc.algo.algorithms", + ) - mod = import_path(init, root=tmp_path) - assert len(mod.instance.INSTANCES) == 1 + mod = import_path( + algorithms_py, + mode=import_mode, + root=tmp_path, + consider_namespace_packages=True, + ) + assert mod.__name__ == "com.company.calc.algo.algorithms" + assert mod.__file__ == str(algorithms_py) + + # Ensure we do not import the same module again (#11475). + mod2 = import_path( + algorithms_py, + mode=import_mode, + root=tmp_path, + consider_namespace_packages=True, + ) + assert mod is mod2 + + def test_incorrect_namespace_package( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + models_py, algorithms_py = self.setup_directories(tmp_path, monkeypatch) + # Namespace packages must not have an __init__.py at any of its + # directories; if it does, we then fall back to importing just the + # part of the package containing the __init__.py files. + (tmp_path / "src/dist1/com/__init__.py").touch() + + pkg_root, module_name = _resolve_pkg_root_and_module_name( + models_py, consider_namespace_packages=True + ) + assert (pkg_root, module_name) == ( + tmp_path / "src/dist1/com/company", + "app.core.models", + ) From a69afd4e0b276348d82b5852a71bdb4ec98a3d84 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 31 Mar 2024 00:37:07 +0100 Subject: [PATCH 06/11] Fix. --- src/_pytask/path.py | 44 ++++++++++++++++++++++++++++++-------------- tests/test_path.py | 2 +- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/src/_pytask/path.py b/src/_pytask/path.py index 665cc51c5..98f277b12 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -297,39 +297,48 @@ def _import_module_using_spec( def _module_name_from_path(path: Path, root: Path) -> str: """Return a dotted module name based on the given path, anchored on root. - For example: path="projects/src/project/task_foo.py" and root="/projects", the - resulting module name will be "src.project.task_foo". + For example: path="projects/src/tests/test_foo.py" and root="/projects", the + resulting module name will be "src.tests.test_foo". """ path = path.with_suffix("") try: relative_path = path.relative_to(root) except ValueError: - # If we can't get a relative path to root, use the full path, except for the - # first part ("d:\\" or "/" depending on the platform, for example). + # If we can't get a relative path to root, use the full path, except + # for the first part ("d:\\" or "/" depending on the platform, for example). path_parts = path.parts[1:] else: # Use the parts for the relative path to the root path. path_parts = relative_path.parts - # Module name for packages do not contain the __init__ file, unless the - # `__init__.py` file is at the root. + # Module name for packages do not contain the __init__ file, unless + # the `__init__.py` file is at the root. if len(path_parts) >= 2 and path_parts[-1] == "__init__": # noqa: PLR2004 path_parts = path_parts[:-1] + # Module names cannot contain ".", normalize them to "_". This prevents a directory + # having a "." in the name (".env.310" for example) causing extra intermediate + # modules. Also, important to replace "." at the start of paths, as those are + # considered relative imports. + path_parts = tuple(x.replace(".", "_") for x in path_parts) + return ".".join(path_parts) def _insert_missing_modules(modules: dict[str, ModuleType], module_name: str) -> None: - """Insert missing modules when importing modules with :func:`import_path`. + """Insert missing modules in sys.modules. - When we want to import a module as ``src.project.task_foo`` for example, we need to - create empty modules ``src`` and ``src.project`` after inserting - ``src.project.task_foo``, otherwise ``src.project.task_foo`` is not importable by - ``__import__``. + Used by ``import_path`` to create intermediate modules when using mode=importlib. + When we want to import a module as "src.tests.test_foo" for example, we need to + create empty modules "src" and "src.tests" after inserting "src.tests.test_foo", + otherwise "src.tests.test_foo" is not importable by ``__import__``. """ module_parts = module_name.split(".") + child_module: ModuleType | None = None + module: ModuleType | None = None + child_name: str = "" while module_name: if module_name not in modules: try: @@ -339,13 +348,20 @@ def _insert_missing_modules(modules: dict[str, ModuleType], module_name: str) -> # creating a dummy module. if not sys.meta_path: raise ModuleNotFoundError # noqa: TRY301 - importlib.import_module(module_name) + module = importlib.import_module(module_name) except ModuleNotFoundError: module = ModuleType( module_name, - doc="Empty module created by pytask.", + doc="Empty module created by pytest's importmode=importlib.", ) - modules[module_name] = module + else: + module = modules[module_name] + # Add child attribute to the parent that can reference the child modules. + if child_module and not hasattr(module, child_name): + setattr(module, child_name, child_module) + modules[module_name] = module + # Keep track of the child module while moving up the tree. + child_module, child_name = module, module_name.rpartition(".")[-1] module_parts.pop(-1) module_name = ".".join(module_parts) diff --git a/tests/test_path.py b/tests/test_path.py index 36d39e9ab..6a6331ae1 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -181,7 +181,7 @@ def test_no_meta_path_found( import_path(simple_module, root=tmp_path) -@pytest.fixture(params=[True, False]) +@pytest.fixture(params=[False]) def ns_param(request: pytest.FixtureRequest) -> bool: """ Simple parametrized fixture for tests which call import_path() with From 3f94d99b9748e7b33fe81fc025681264a0275af3 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 31 Mar 2024 00:39:38 +0100 Subject: [PATCH 07/11] to changes. --- docs/source/changes.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/changes.md b/docs/source/changes.md index ba7efaf5b..b348b8ec8 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -26,6 +26,8 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and - {pull}`579` fixes an interaction with `--pdb` and `--trace` and task that return. The debugging modes swallowed the return and `None` was returned. Closes {issue}`574`. - {pull}`581` simplifies the code for tracebacks and unpublishes some utility functions. +- {pull}`589` enables `import_path` to resolve the root path and module name of an + imported file. ## 0.4.7 - 2024-03-19 From 29ffd928bcd5e8c47283e734fa7137adb85390f6 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 31 Mar 2024 00:43:05 +0100 Subject: [PATCH 08/11] Fix. --- environment.yml | 1 + src/_pytask/path.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index e773cd340..04062711b 100644 --- a/environment.yml +++ b/environment.yml @@ -46,6 +46,7 @@ dependencies: - sphinx-click - sphinx-copybutton - sphinx-design >=0.3.0 + - sphinx-toolbox - sphinxext-opengraph - pip: diff --git a/src/_pytask/path.py b/src/_pytask/path.py index 98f277b12..8385c68e5 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -266,7 +266,9 @@ def _import_module_using_spec( Tries to import a module by its canonical name, path to the .py file, and its parent location. - :param insert_modules: + Parameters + ---------- + insert_modules If True, will call insert_missing_modules to create empty intermediate modules for made-up module names (when importing test files not reachable from sys.path). Note: we can probably drop insert_missing_modules altogether: instead From 2f9361e49cf6b1e770b461fdb3ca8e8dcd0b6e97 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 31 Mar 2024 01:06:11 +0100 Subject: [PATCH 09/11] remove loose ends. --- src/_pytask/path.py | 38 +++++++++++++------------------------- tests/test_path.py | 34 ++++++++-------------------------- 2 files changed, 21 insertions(+), 51 deletions(-) diff --git a/src/_pytask/path.py b/src/_pytask/path.py index 8385c68e5..4897cb179 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -8,7 +8,6 @@ import itertools import os import sys -from enum import Enum from pathlib import Path from types import ModuleType from typing import Sequence @@ -124,28 +123,17 @@ def find_case_sensitive_path(path: Path, platform: str) -> Path: return path.resolve() if platform == "win32" else path -class ImportMode(Enum): - """Enum to specify the import mode.""" - - importlib = "importlib" - - def import_path( - path: Path, - root: Path, - *, - consider_namespace_packages: bool = False, - mode: ImportMode = ImportMode.importlib, + path: Path, root: Path, *, consider_namespace_packages: bool = False ) -> ModuleType: """Import and return a module from the given path. - The function is taken from pytest when the import mode is set to ``importlib``. It - pytest's recommended import mode for new projects although the default is set to - ``prepend``. More discussion and information can be found in :issue:`373`. + The functions are taken from pytest when the import mode is set to ``importlib``. It + was assumed to be the new default import mode but insurmountable tradeoffs caused + the default to be set to ``prepend``. More discussion and information can be found + in :issue:`373`. """ - mode = ImportMode(mode) - try: pkg_root, module_name = _resolve_pkg_root_and_module_name( path, consider_namespace_packages=consider_namespace_packages @@ -270,11 +258,11 @@ def _import_module_using_spec( ---------- insert_modules If True, will call insert_missing_modules to create empty intermediate modules - for made-up module names (when importing test files not reachable from + for made-up module names (when importing task files not reachable from sys.path). Note: we can probably drop insert_missing_modules altogether: instead - of generating module names such as "src.tests.test_foo", which require + of generating module names such as "src.tasks.task_foo", which require intermediate empty modules, we might just as well generate unique module names - like "src_tests_test_foo". + like "src_tasks_task_foo". """ # Checking with sys.meta_path first in case one of its hooks can import this module, @@ -299,8 +287,8 @@ def _import_module_using_spec( def _module_name_from_path(path: Path, root: Path) -> str: """Return a dotted module name based on the given path, anchored on root. - For example: path="projects/src/tests/test_foo.py" and root="/projects", the - resulting module name will be "src.tests.test_foo". + For example: path="projects/src/tasks/task_foo.py" and root="/projects", the + resulting module name will be "src.tasks.task_foo". """ path = path.with_suffix("") @@ -332,9 +320,9 @@ def _insert_missing_modules(modules: dict[str, ModuleType], module_name: str) -> """Insert missing modules in sys.modules. Used by ``import_path`` to create intermediate modules when using mode=importlib. - When we want to import a module as "src.tests.test_foo" for example, we need to - create empty modules "src" and "src.tests" after inserting "src.tests.test_foo", - otherwise "src.tests.test_foo" is not importable by ``__import__``. + When we want to import a module as "src.tasks.task_foo" for example, we need to + create empty modules "src" and "src.tasks" after inserting "src.tasks.task_foo", + otherwise "src.tasks.task_foo" is not importable by ``__import__``. """ module_parts = module_name.split(".") diff --git a/tests/test_path.py b/tests/test_path.py index 6a6331ae1..f5b4c6634 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -11,7 +11,6 @@ import pytest from _pytask.path import CouldNotResolvePathError -from _pytask.path import ImportMode from _pytask.path import _insert_missing_modules from _pytask.path import _module_name_from_path from _pytask.path import _resolve_pkg_root_and_module_name @@ -213,16 +212,14 @@ class Data: ) ) - module = import_path(fn, mode="importlib", root=tmp_path) + module = import_path(fn, root=tmp_path) Data: Any = module.Data # noqa: N806 data = Data(value="foo") assert data.value == "foo" assert data.__module__ == "_src.project.task_dataclass" # Ensure we do not import the same module again (pytest#11475). - module2 = import_path( - fn, mode="importlib", root=tmp_path, consider_namespace_packages=ns_param - ) + module2 = import_path(fn, root=tmp_path, consider_namespace_packages=ns_param) assert module is module2 def test_importmode_importlib_with_pickle( @@ -246,9 +243,7 @@ def round_trip(): ) ) - module = import_path( - fn, mode="importlib", root=tmp_path, consider_namespace_packages=ns_param - ) + module = import_path(fn, root=tmp_path, consider_namespace_packages=ns_param) round_trip = module.round_trip action = round_trip() assert action() == 42 @@ -296,14 +291,10 @@ def round_trip(obj): s = pickle.dumps(obj) return pickle.loads(s) # noqa: S301 - module = import_path( - fn1, mode="importlib", root=tmp_path, consider_namespace_packages=ns_param - ) + module = import_path(fn1, root=tmp_path, consider_namespace_packages=ns_param) Data1 = module.Data # noqa: N806 - module = import_path( - fn2, mode="importlib", root=tmp_path, consider_namespace_packages=ns_param - ) + module = import_path(fn2, root=tmp_path, consider_namespace_packages=ns_param) Data2 = module.Data # noqa: N806 assert round_trip(Data1(20)) == Data1(20) @@ -438,7 +429,6 @@ def __init__(self) -> None: mod = import_path( init, root=tmp_path, - mode=ImportMode.importlib, consider_namespace_packages=ns_param, ) assert len(mod.instance.INSTANCES) == 1 @@ -446,7 +436,6 @@ def __init__(self) -> None: mod2 = import_path( init, root=tmp_path, - mode=ImportMode.importlib, consider_namespace_packages=ns_param, ) assert mod is mod2 @@ -476,9 +465,8 @@ def setup_directories( monkeypatch.syspath_prepend(tmp_path / "src/dist2") return models_py, algorithms_py - @pytest.mark.parametrize("import_mode", ImportMode) def test_resolve_pkg_root_and_module_name_ns_multiple_levels( - self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, import_mode: ImportMode + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: models_py, algorithms_py = self.setup_directories(tmp_path, monkeypatch) @@ -490,16 +478,12 @@ def test_resolve_pkg_root_and_module_name_ns_multiple_levels( "com.company.app.core.models", ) - mod = import_path( - models_py, mode=import_mode, root=tmp_path, consider_namespace_packages=True - ) + mod = import_path(models_py, root=tmp_path, consider_namespace_packages=True) assert mod.__name__ == "com.company.app.core.models" assert mod.__file__ == str(models_py) # Ensure we do not import the same module again (#11475). - mod2 = import_path( - models_py, mode=import_mode, root=tmp_path, consider_namespace_packages=True - ) + mod2 = import_path(models_py, root=tmp_path, consider_namespace_packages=True) assert mod is mod2 pkg_root, module_name = _resolve_pkg_root_and_module_name( @@ -512,7 +496,6 @@ def test_resolve_pkg_root_and_module_name_ns_multiple_levels( mod = import_path( algorithms_py, - mode=import_mode, root=tmp_path, consider_namespace_packages=True, ) @@ -522,7 +505,6 @@ def test_resolve_pkg_root_and_module_name_ns_multiple_levels( # Ensure we do not import the same module again (#11475). mod2 = import_path( algorithms_py, - mode=import_mode, root=tmp_path, consider_namespace_packages=True, ) From 71e4eedc1ca431fbe6b6873c7c248afb779ea7eb Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 31 Mar 2024 10:56:16 +0200 Subject: [PATCH 10/11] Remove more unused code paths --- src/_pytask/path.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/_pytask/path.py b/src/_pytask/path.py index 4897cb179..6af28b19b 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -145,9 +145,7 @@ def import_path( with contextlib.suppress(KeyError): return sys.modules[module_name] - mod = _import_module_using_spec( - module_name, path, pkg_root, insert_modules=False - ) + mod = _import_module_using_spec(module_name, path, pkg_root) if mod is not None: return mod @@ -247,23 +245,13 @@ class CouldNotResolvePathError(Exception): def _import_module_using_spec( - module_name: str, module_path: Path, module_location: Path, *, insert_modules: bool + module_name: str, module_path: Path, module_location: Path ) -> ModuleType | None: """Import a module using its specification. Tries to import a module by its canonical name, path to the .py file, and its parent location. - Parameters - ---------- - insert_modules - If True, will call insert_missing_modules to create empty intermediate modules - for made-up module names (when importing task files not reachable from - sys.path). Note: we can probably drop insert_missing_modules altogether: instead - of generating module names such as "src.tasks.task_foo", which require - intermediate empty modules, we might just as well generate unique module names - like "src_tasks_task_foo". - """ # Checking with sys.meta_path first in case one of its hooks can import this module, # such as our own assertion-rewrite hook. @@ -277,8 +265,6 @@ def _import_module_using_spec( mod = importlib.util.module_from_spec(spec) sys.modules[module_name] = mod spec.loader.exec_module(mod) # type: ignore[union-attr] - if insert_modules: - _insert_missing_modules(sys.modules, module_name) return mod return None From 6f685390ce68e54a46b2bd30476f25175a4e8c04 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 31 Mar 2024 11:02:07 +0200 Subject: [PATCH 11/11] Remove consider_namespace_packages. --- src/_pytask/path.py | 33 ++-------- tests/test_path.py | 156 +++----------------------------------------- 2 files changed, 14 insertions(+), 175 deletions(-) diff --git a/src/_pytask/path.py b/src/_pytask/path.py index 6af28b19b..aa4dc5753 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -123,9 +123,7 @@ def find_case_sensitive_path(path: Path, platform: str) -> Path: return path.resolve() if platform == "win32" else path -def import_path( - path: Path, root: Path, *, consider_namespace_packages: bool = False -) -> ModuleType: +def import_path(path: Path, root: Path) -> ModuleType: """Import and return a module from the given path. The functions are taken from pytest when the import mode is set to ``importlib``. It @@ -135,9 +133,7 @@ def import_path( """ try: - pkg_root, module_name = _resolve_pkg_root_and_module_name( - path, consider_namespace_packages=consider_namespace_packages - ) + pkg_root, module_name = _resolve_pkg_root_and_module_name(path) except CouldNotResolvePathError: pass else: @@ -186,9 +182,7 @@ def _resolve_package_path(path: Path) -> Path | None: return result -def _resolve_pkg_root_and_module_name( - path: Path, *, consider_namespace_packages: bool = False -) -> tuple[Path, str]: +def _resolve_pkg_root_and_module_name(path: Path) -> tuple[Path, str]: """Resolve the root package directory and module name for the given Python file. Return the path to the directory of the root package that contains the given Python @@ -203,32 +197,13 @@ def _resolve_pkg_root_and_module_name( Passing the full path to `models.py` will yield Path("src") and "app.core.models". - If consider_namespace_packages is True, then we additionally check upwards in the - hierarchy until we find a directory that is reachable from sys.path, which marks it - as a namespace package: - - https://packaging.python.org/en/latest/guides/packaging-namespace-packages - Raises CouldNotResolvePathError if the given path does not belong to a package (missing any __init__.py files). + """ pkg_path = _resolve_package_path(path) if pkg_path is not None: pkg_root = pkg_path.parent - # https://packaging.python.org/en/latest/guides/packaging-namespace-packages/ - if consider_namespace_packages: - # Go upwards in the hierarchy, if we find a parent path included in - # sys.path, it means the package found by _resolve_package_path() actually - # belongs to a namespace package. - for parent in pkg_root.parents: - # If any of the parent paths has a __init__.py, it means it is not a - # namespace package (see the docs linked above). - if (parent / "__init__.py").is_file(): - break - if str(parent) in sys.path: - # Point the pkg_root to the root of the namespace package. - pkg_root = parent - break names = list(path.with_suffix("").relative_to(pkg_root).parts) if names[-1] == "__init__": diff --git a/tests/test_path.py b/tests/test_path.py index f5b4c6634..286b512d5 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -10,10 +10,8 @@ from typing import Any import pytest -from _pytask.path import CouldNotResolvePathError from _pytask.path import _insert_missing_modules from _pytask.path import _module_name_from_path -from _pytask.path import _resolve_pkg_root_and_module_name from _pytask.path import find_case_sensitive_path from _pytask.path import find_closest_ancestor from _pytask.path import find_common_ancestor @@ -180,20 +178,9 @@ def test_no_meta_path_found( import_path(simple_module, root=tmp_path) -@pytest.fixture(params=[False]) -def ns_param(request: pytest.FixtureRequest) -> bool: - """ - Simple parametrized fixture for tests which call import_path() with - consider_namespace_packages using True and False. - """ - return bool(request.param) - - @pytest.mark.unit() class TestImportLibMode: - def test_importmode_importlib_with_dataclass( - self, tmp_path: Path, ns_param: bool - ) -> None: + def test_importmode_importlib_with_dataclass(self, tmp_path: Path) -> None: """ Ensure that importlib mode works with a module containing dataclasses (#373, pytest#7856). @@ -219,12 +206,10 @@ class Data: assert data.__module__ == "_src.project.task_dataclass" # Ensure we do not import the same module again (pytest#11475). - module2 = import_path(fn, root=tmp_path, consider_namespace_packages=ns_param) + module2 = import_path(fn, root=tmp_path) assert module is module2 - def test_importmode_importlib_with_pickle( - self, tmp_path: Path, ns_param: bool - ) -> None: + def test_importmode_importlib_with_pickle(self, tmp_path: Path) -> None: """Ensure that importlib mode works with pickle (#373, pytest#7859).""" fn = tmp_path.joinpath("_src/project/task_pickle.py") fn.parent.mkdir(parents=True) @@ -243,13 +228,13 @@ def round_trip(): ) ) - module = import_path(fn, root=tmp_path, consider_namespace_packages=ns_param) + module = import_path(fn, root=tmp_path) round_trip = module.round_trip action = round_trip() assert action() == 42 def test_importmode_importlib_with_pickle_separate_modules( - self, tmp_path: Path, ns_param: bool + self, tmp_path: Path ) -> None: """ Ensure that importlib mode works can load pickles that look similar but are @@ -291,10 +276,10 @@ def round_trip(obj): s = pickle.dumps(obj) return pickle.loads(s) # noqa: S301 - module = import_path(fn1, root=tmp_path, consider_namespace_packages=ns_param) + module = import_path(fn1, root=tmp_path) Data1 = module.Data # noqa: N806 - module = import_path(fn2, root=tmp_path, consider_namespace_packages=ns_param) + module = import_path(fn2, root=tmp_path) Data2 = module.Data # noqa: N806 assert round_trip(Data1(20)) == Data1(20) @@ -332,32 +317,6 @@ def test_module_name_from_path(self, tmp_path: Path) -> None: ) assert result == "_env_310.tasks.task_foo" - def test_resolve_pkg_root_and_module_name( - self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch - ) -> None: - # Create a directory structure first without __init__.py files. - (tmp_path / "src/app/core").mkdir(parents=True) - models_py = tmp_path / "src/app/core/models.py" - models_py.touch() - with pytest.raises(CouldNotResolvePathError): - _ = _resolve_pkg_root_and_module_name(models_py) - - # Create the __init__.py files, it should now resolve to a proper module name. - (tmp_path / "src/app/__init__.py").touch() - (tmp_path / "src/app/core/__init__.py").touch() - assert _resolve_pkg_root_and_module_name( - models_py, consider_namespace_packages=True - ) == (tmp_path / "src", "app.core.models") - - # If we add tmp_path to sys.path, src becomes a namespace package. - monkeypatch.syspath_prepend(tmp_path) - assert _resolve_pkg_root_and_module_name( - models_py, consider_namespace_packages=True - ) == (tmp_path, "src.app.core.models") - assert _resolve_pkg_root_and_module_name( - models_py, consider_namespace_packages=False - ) == (tmp_path / "src", "app.core.models") - def test_insert_missing_modules( self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: @@ -390,7 +349,7 @@ def test_parent_contains_child_module_attribute( assert modules["xxx.tasks"].foo is modules["xxx.tasks.foo"] def test_importlib_package( - self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, ns_param: bool + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: """ Importing a package using --importmode=importlib should not import the @@ -426,103 +385,8 @@ def __init__(self) -> None: encoding="ascii", ) - mod = import_path( - init, - root=tmp_path, - consider_namespace_packages=ns_param, - ) + mod = import_path(init, root=tmp_path) assert len(mod.instance.INSTANCES) == 1 # Ensure we do not import the same module again (#11475). - mod2 = import_path( - init, - root=tmp_path, - consider_namespace_packages=ns_param, - ) - assert mod is mod2 - - -class TestNamespacePackages: - """Test import_path support when importing from properly namespace packages.""" - - def setup_directories( - self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch - ) -> tuple[Path, Path]: - # Set up a namespace package "com.company", containing - # two subpackages, "app" and "calc". - (tmp_path / "src/dist1/com/company/app/core").mkdir(parents=True) - (tmp_path / "src/dist1/com/company/app/__init__.py").touch() - (tmp_path / "src/dist1/com/company/app/core/__init__.py").touch() - models_py = tmp_path / "src/dist1/com/company/app/core/models.py" - models_py.touch() - - (tmp_path / "src/dist2/com/company/calc/algo").mkdir(parents=True) - (tmp_path / "src/dist2/com/company/calc/__init__.py").touch() - (tmp_path / "src/dist2/com/company/calc/algo/__init__.py").touch() - algorithms_py = tmp_path / "src/dist2/com/company/calc/algo/algorithms.py" - algorithms_py.touch() - - monkeypatch.syspath_prepend(tmp_path / "src/dist1") - monkeypatch.syspath_prepend(tmp_path / "src/dist2") - return models_py, algorithms_py - - def test_resolve_pkg_root_and_module_name_ns_multiple_levels( - self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch - ) -> None: - models_py, algorithms_py = self.setup_directories(tmp_path, monkeypatch) - - pkg_root, module_name = _resolve_pkg_root_and_module_name( - models_py, consider_namespace_packages=True - ) - assert (pkg_root, module_name) == ( - tmp_path / "src/dist1", - "com.company.app.core.models", - ) - - mod = import_path(models_py, root=tmp_path, consider_namespace_packages=True) - assert mod.__name__ == "com.company.app.core.models" - assert mod.__file__ == str(models_py) - - # Ensure we do not import the same module again (#11475). - mod2 = import_path(models_py, root=tmp_path, consider_namespace_packages=True) + mod2 = import_path(init, root=tmp_path) assert mod is mod2 - - pkg_root, module_name = _resolve_pkg_root_and_module_name( - algorithms_py, consider_namespace_packages=True - ) - assert (pkg_root, module_name) == ( - tmp_path / "src/dist2", - "com.company.calc.algo.algorithms", - ) - - mod = import_path( - algorithms_py, - root=tmp_path, - consider_namespace_packages=True, - ) - assert mod.__name__ == "com.company.calc.algo.algorithms" - assert mod.__file__ == str(algorithms_py) - - # Ensure we do not import the same module again (#11475). - mod2 = import_path( - algorithms_py, - root=tmp_path, - consider_namespace_packages=True, - ) - assert mod is mod2 - - def test_incorrect_namespace_package( - self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch - ) -> None: - models_py, algorithms_py = self.setup_directories(tmp_path, monkeypatch) - # Namespace packages must not have an __init__.py at any of its - # directories; if it does, we then fall back to importing just the - # part of the package containing the __init__.py files. - (tmp_path / "src/dist1/com/__init__.py").touch() - - pkg_root, module_name = _resolve_pkg_root_and_module_name( - models_py, consider_namespace_packages=True - ) - assert (pkg_root, module_name) == ( - tmp_path / "src/dist1/com/company", - "app.core.models", - )