diff --git a/.agents/plans/pypi_hub_proxy_feature.md b/.agents/plans/pypi_hub_proxy_feature.md deleted file mode 100644 index b3360d9dbd..0000000000 --- a/.agents/plans/pypi_hub_proxy_feature.md +++ /dev/null @@ -1,296 +0,0 @@ -# Implementation Plan: Canonical Automatic PyPI Proxy Hub - -This document defines the locked, production-ready architectural, Starlark API, -and testing specifications for implementing dynamic PyPI dependency resolution in -`rules_python` using the `venv` flag. - ---- - -## 1. Architectural Strategy: The Canonical `@pypi` Proxy - -The `pip` bzlmod extension will automatically synthesize a canonical `@pypi` -proxy repository rule that orchestrates routing to underlying concrete hubs. - -### Bzlmod-Exclusive Scope - -The Unified PyPI Hub Proxy is an **exclusive feature of `bzlmod`**. Legacy -`WORKSPACE` evaluations using independent `pip_parse` repository macros are not -supported, as bzlmod's module extension architecture provides the required -centralized coordination to inspect and interlink cross-module hubs. - -### Automatic Proxy Construction & Collision Logic - -During the evaluation of the `pip` extension across the dependency graph: -1. **Unconditional Creation**: The extension will **always** synthesize a - proxy repository rule with the apparent name `pypi`, even if zero - `pip.parse` concrete hubs are defined in the dependency graph (in which - case the proxy is completely valid but empty). -2. **Collision Prevention**: If a user explicitly defines a concrete hub - named `pypi` (`pip.parse(hub_name = "pypi")`), the automatic proxy - synthesis is skipped so the user maintains absolute control over that - repository name. - -In `MODULE.bazel`: -```starlark -pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") - -# Concrete hubs defined for different execution contexts -pip.parse(hub_name = "pypi_a", ...) -pip.parse(hub_name = "pypi_b", ...) - -# Designate 'pypi_b' as the default hub for the unified '@pypi' repository -pip.default(default_hub = "pypi_b") - -# The canonical proxy is automatically created unconditionally: -use_repo(pip, "pypi") -``` - -### Unified PyPI Hub - -The canonical `@pypi` proxy repository matches exactly how concrete hubs create -their directory structure: a root package for shared configuration settings, and -a dedicated subdirectory (subpackage) for each PyPI package. - -Here is a complete, representative code example of what the generated files in -`@pypi` will look like when resolving packages between `pypi_a` and `pypi_b`: - -#### 1. `@pypi//BUILD.bazel` (Root Package) -The root package contains the shared `config_setting` targets following the -`_is_venv_` private naming convention. Leading underscores are strictly -applied because these configuration settings are an internal implementation -detail of the proxy repository and are not intended to be a public API. - -```starlark -package(default_visibility = ["//visibility:public"]) - -config_setting( - name = "_is_venv_pypi_a", - flag_values = { - "@rules_python//python/config_settings:venv": "pypi_a", - }, -) - -config_setting( - name = "_is_venv_pypi_b", - flag_values = { - "@rules_python//python/config_settings:venv": "pypi_b", - }, -) -``` - -#### 2. `@pypi//foo/BUILD.bazel` (PyPI Package Subpackage) -Each PyPI package subpackage defines the standard aliases (`pkg`, `whl`, `data`, -`dist_info`, `extracted_wheel_files`), plus a complete **union of all custom -`extra_hub_aliases`** defined across all concrete hubs. - -Each alias resolves dynamically to the active concrete hub based on the root -private configuration settings: - -```starlark -package(default_visibility = ["//visibility:public"]) - -alias( - name = "foo", - actual = ":pkg", -) - -alias( - name = "pkg", - actual = select({ - "//:_is_venv_pypi_a": "@pypi_a//foo:pkg", - "//:_is_venv_pypi_b": "@pypi_b//foo:pkg", - # When venv is "auto" (unset), it defaults to the designated fallback - # (or first defined concrete hub). - "//conditions:default": "@pypi_b//foo:pkg", - }), -) - -alias( - name = "whl", - actual = select({ - "//:_is_venv_pypi_a": "@pypi_a//foo:whl", - "//:_is_venv_pypi_b": "@pypi_b//foo:whl", - "//conditions:default": "@pypi_b//foo:whl", - }), -) - -# ... standard aliases for data, dist_info, extracted_wheel_files ... - -# 3. Unionized custom extra alias (defined in pypi_a but missing in pypi_b): -alias( - name = "my_custom_tool", - actual = select({ - "//:_is_venv_pypi_a": "@pypi_a//foo:my_custom_tool", - # Unrepresented branch routes to execution failure target: - "//:_is_venv_pypi_b": "//:_missing_package_error_pypi_b_foo", - "//conditions:default": "@pypi_a//foo:my_custom_tool", - }), -) -``` - -### Disjoint Hub Packages & Execution-Phase Failure - -If a package exists in one concrete hub but is missing in another (e.g., `scipy` -is in `pypi_b` but not `pypi_a`), our proxy synthesizes a package subpackage for -the union of all packages. - -To ensure that `bazel cquery` and `bazel query` successfully analyze over the -entire transitive build graph without failing, unrepresented select branches -must route to a dedicated **execution-phase error rule**. - -```starlark -# In @pypi//scipy/BUILD.bazel -alias( - name = "pkg", - actual = select({ - # Routes to execution-phase action failure target: - "//:_is_venv_pypi_a": "//:_missing_package_error_pypi_a_scipy", - "//:_is_venv_pypi_b": "@pypi_b//scipy:pkg", - "//conditions:default": "@pypi_b//scipy:pkg", - }), -) -``` - -The synthesized `//:_missing_package_error_XX` rule in `@pypi//BUILD.bazel` -returns standard Starlark Python providers so analysis/cquery passes, but -registers a build action that fails when executed: - -``` -Dependency Error: Third-party package 'scipy' is not available when building under PyPI hub 'pypi_a'. -``` - -### Fallback Hub Precedence (`"auto"`) - -When a target depends on `@pypi//foo` and the active build setting is `"auto"`, -the proxy resolves to a concrete hub using the following precedence: -1. **Designated Fallback**: If the user has explicitly designated a fallback - concrete hub via `pip.default(default_hub = "...")` in their root - `MODULE.bazel`, the proxy routes to it. -2. **First Defined Hub**: If no fallback is explicitly designated via - `pip.default()`, the proxy **automatically routes to the first defined - concrete hub** parsed during extension evaluation (e.g., `pypi_a`). - -```starlark -# Explicitly override the "auto" fallback hub -pip.default( - default_hub = "pypi_b", -) -``` - ---- - -## 2. Core Rule Integration: `config_settings` Transitions - -Users will switch active hubs using the standard, highly generic -`config_settings` transition attribute on executable targets. - -### Build Setting Definition - -In `python/config_settings/BUILD.bazel`: - -```starlark -string_flag( - name = "venv", - build_setting_default = "auto", # Default value is "auto" - visibility = ["//visibility:public"], -) -``` - -In `python/private/common_labels.bzl`: -```starlark - VENV = str(Label("//python/config_settings:venv")), -``` - -In `python/private/transition_labels.bzl`: -```starlark -_BASE_TRANSITION_LABELS = [ - # ... existing transition labels ... - labels.VENV, -] -``` - -Because `py_binary` and `py_test` implement an incoming transition -(`_transition_executable_impl`) that automatically processes any -`config_settings` keys matching `TRANSITION_LABELS`, **this provides complete -transition capabilities with zero changes to our core rule definitions**. - -### Usage in BUILD.bazel - -Libraries consume packages through the canonical proxy: - -```starlark -py_library( - name = "common", - deps = ["@pypi//foo"], # Apparent proxy repository -) -``` - -Binaries change the active hub by transitioning the build setting: - -```starlark -# Resolves @pypi -> pypi_b (default hub / designated fallback) -py_binary( - name = "bin_default", - deps = [":common"], -) - -# Resolves @pypi -> pypi_a via transition -py_binary( - name = "bin_a", - deps = [":common"], - config_settings = { - "//python/config_settings:venv": "pypi_a", - }, -) -``` - -### Analysis Cache & Memory Best Practices - -Because transitions fork the Bazel configuration, building targets with highly -diversified `config_settings` across large build graphs will result in -re-analysis and re-compilation of shared dependencies. - -We will include explicit documentation guidelines advising users to keep their -`venv` transition configurations localized and minimized to preserve Bazel -caching and memory efficiency. - ---- - -## 3. Integration Testing Specification - -We will construct a comprehensive Bazel-in-Bazel integration test suite in -`tests/integration/unified_pypi/` to guarantee correctness and verify -transitions. - -The integration test suite will assert: -1. **`"auto"` Precedence**: Author a test asserting `bazel run //:bin_default` - correctly inherits `"auto"` and resolves dependencies from the designated fallback. -2. **Transitional Resolution**: Author a test asserting two binary targets in - the same package with different `config_settings` successfully resolve - dependencies and execute against their respective concrete hubs (`pypi_a` - vs `pypi_b`). -3. **Command Line Override**: Author a test asserting - `bazel run --//python/config_settings:venv=pypi_a //:bin_default` - successfully forces the executable to run using imports resolved from - `pypi_a`. -4. **Disjoint Execution Failure**: Author a test asserting `bazel cquery` over - a target depending on an unrepresented missing package succeeds, while - `bazel run` on that target gracefully fails during execution with the exact - synthesized error message. -5. **Unionized Extra Hub Aliases**: Author a test asserting that a binary - successfully runs using a custom `extra_hub_aliases` target resolved - through the `@pypi proxy`. - ---- - -## 4. Execution Steps - -1. **Phase 1**: Define `venv` `string_flag` and register it in - `common_labels.bzl` and `transition_labels.bzl`. -2. **Phase 2**: Update `python/private/pypi/extension.bzl` to synthesize the - canonical `pypi` proxy repository rule. -3. **Phase 3**: Implement `missing_package_error` execution failure rule and - the `proxy_hub_repository` generation logic. -4. **Phase 4**: Author the Bazel-in-Bazel integration test suite in - `tests/integration/unified_pypi/`. -5. **Phase 5**: Run all tests and verify full pass before PR submission. diff --git a/python/private/internal_config_repo.bzl b/python/private/internal_config_repo.bzl index 72970cf100..f26556bce1 100644 --- a/python/private/internal_config_repo.bzl +++ b/python/private/internal_config_repo.bzl @@ -30,6 +30,7 @@ config = struct( supports_whl_extraction = {supports_whl_extraction}, enable_pystar = True, enable_deprecation_warnings = {enable_deprecation_warnings}, + extract_needs_chmod = {extract_needs_chmod}, bazel_8_or_later = {bazel_8_or_later}, bazel_9_or_later = {bazel_9_or_later}, bazel_10_or_later = {bazel_10_or_later}, @@ -86,6 +87,7 @@ def _internal_config_repo_impl(rctx): bazel_minor_version = 99999 supports_whl_extraction = False + extract_needs_chmod = bazel_major_version < 8 or (bazel_major_version == 8 and bazel_minor_version < 6) if bazel_major_version >= 8: # Extracting .whl files requires Bazel 8.3.0 or later. if bazel_major_version > 8 or bazel_minor_version >= 3: @@ -105,6 +107,7 @@ def _internal_config_repo_impl(rctx): builtin_py_info_symbol = builtin_py_info_symbol, builtin_py_runtime_info_symbol = builtin_py_runtime_info_symbol, supports_whl_extraction = str(supports_whl_extraction), + extract_needs_chmod = str(extract_needs_chmod), builtin_py_cc_link_params_provider = builtin_py_cc_link_params_provider, bazel_8_or_later = str(bazel_major_version >= 8), bazel_9_or_later = str(bazel_major_version >= 9), diff --git a/python/private/pypi/patch_whl.bzl b/python/private/pypi/patch_whl.bzl index 98a5ad49c8..e8d76eeba5 100644 --- a/python/private/pypi/patch_whl.bzl +++ b/python/private/pypi/patch_whl.bzl @@ -80,6 +80,7 @@ def patch_whl(rctx, *, whl_path, patches): rctx, archive = whl_input, supports_whl_extraction = rp_config.supports_whl_extraction, + extract_needs_chmod = rp_config.extract_needs_chmod, ) if not patches: diff --git a/python/private/pypi/whl_extract.bzl b/python/private/pypi/whl_extract.bzl index 2ebb61a83a..0d61b9a07b 100644 --- a/python/private/pypi/whl_extract.bzl +++ b/python/private/pypi/whl_extract.bzl @@ -18,10 +18,9 @@ def whl_extract(rctx, *, whl_path, logger): archive = whl_path, output = install_dir_path, supports_whl_extraction = rp_config.supports_whl_extraction, + extract_needs_chmod = rp_config.extract_needs_chmod, ) - _maybe_fix_permissions(rctx, whl_path = whl_path, logger = logger) - metadata_file = find_whl_metadata( install_dir = install_dir_path, logger = logger, @@ -65,27 +64,6 @@ def whl_extract(rctx, *, whl_path, logger): # Ensure that there is no data dir left rctx.delete(data_dir) -# TODO: This can be removed when Bazel 8.6+ is the minimum supported version. -def _maybe_fix_permissions(rctx, *, whl_path, logger): - # Fix permissions on extracted files. Some wheels have files without read permissions set, - # which causes errors when trying to read them later. - # We apply this to the root directory to ensure that everything in bin/, site-packages/, - # etc. is readable and executable where appropriate. - os_name = repo_utils.get_platforms_os_name(rctx) - if os_name != "windows": - # On Unix-like systems, recursively add read permissions to all files - # and ensure directories are traversable (need execute permission) - result = repo_utils.execute_unchecked( - rctx, - op = "Fixing wheel permissions {}".format(whl_path), - arguments = ["chmod", "-R", "a+rX", "."], - logger = logger, - ) - if result.return_code != 0: - # It's possible chmod is not available or the filesystem doesn't support it. - # This is fine, we just want to try to fix permissions if possible. - logger.warn(lambda: "Failed to fix file permissions: {}".format(result.stderr)) - def merge_trees(src, dest): """Merge src into the destination path. diff --git a/python/private/repo_utils.bzl b/python/private/repo_utils.bzl index 6fa851b7b3..0e83fda28c 100644 --- a/python/private/repo_utils.bzl +++ b/python/private/repo_utils.bzl @@ -202,7 +202,7 @@ def _execute_internal( output = _outputs_to_str(result, log_stdout = log_stdout, log_stderr = log_stderr), )) - result_kwargs = {k: getattr(result, k) for k in dir(result)} + result_kwargs = {k: getattr(result, k) for k in dir(result) if k not in ["to_json", "to_proto"]} return struct( describe_failure = lambda: _execute_describe_failure( op = op, @@ -511,7 +511,7 @@ def _get_platforms_cpu_name(mrctx): return "riscv64" return arch -def _extract(mrctx, *, archive, supports_whl_extraction = False, **kwargs): +def _extract(mrctx, *, archive, supports_whl_extraction = False, extract_needs_chmod = False, **kwargs): """Extract an archive TODO: remove when the earliest supported bazel version is at least 8.3. @@ -533,6 +533,26 @@ def _extract(mrctx, *, archive, supports_whl_extraction = False, **kwargs): if not mrctx.delete(archive): fail("Failed to remove the symlink after extracting") + if extract_needs_chmod: + _maybe_fix_permissions(mrctx, whl_path = archive) + +def _maybe_fix_permissions(mrctx, *, whl_path, logger = None): + if not logger and hasattr(mrctx, "attr"): + logger = _logger(mrctx) + elif not logger: + fail("logger must be specified when using 'module_ctx'") + + os_name = _get_platforms_os_name(mrctx) + if os_name != "windows": + result = _execute_unchecked( + mrctx, + op = "Fixing wheel permissions {}".format(whl_path), + arguments = ["chmod", "-R", "a+rX", "."], + logger = logger, + ) + if result.return_code != 0: + logger.warn(lambda: "Failed to fix file permissions: {}".format(result.stderr)) + def _rename(mrctx, src, dest): """Rename a file or directory. @@ -575,6 +595,7 @@ repo_utils = struct( get_platforms_os_name = _get_platforms_os_name, is_repo_debug_enabled = _is_repo_debug_enabled, logger = _logger, + maybe_fix_permissions = _maybe_fix_permissions, mkdir = _mkdir, norm_path = _norm_path, relative_to = _relative_to, diff --git a/tests/repo_utils/repo_utils_test.bzl b/tests/repo_utils/repo_utils_test.bzl index ce9e48b5a6..56d125724d 100644 --- a/tests/repo_utils/repo_utils_test.bzl +++ b/tests/repo_utils/repo_utils_test.bzl @@ -2,10 +2,21 @@ load("@rules_testing//lib:test_suite.bzl", "test_suite") load("//python/private:repo_utils.bzl", "repo_utils") # buildifier: disable=bzl-visibility -load("//tests/support:mocks.bzl", "mocks") +load("//tests/support/mocks:mocks.bzl", "mocks") _tests = [] +def _make_rctx(*, os_name, mock_extracts = None, mock_files = None): + return mocks.rctx( + attr = { + "name": "unit", + "_rule_name": "test_rule", + }, + mock_files = mock_files, + mock_extracts = mock_extracts, + os_name = os_name, + ) + def _test_get_platforms_os_name(env): mock_mrctx = mocks.rctx(os_name = "Mac OS X") got = repo_utils.get_platforms_os_name(mock_mrctx) @@ -50,6 +61,68 @@ def _test_is_relative_to(env): _tests.append(_test_is_relative_to) +def _test_extract_calls_chmod_when_enabled(env): + mock_rctx = _make_rctx( + os_name = "linux", + mock_extracts = {"test.whl": {"f1": "c1"}}, + ) + + repo_utils.extract( + mock_rctx, + archive = mock_rctx.path("test.whl"), + output = "out", + supports_whl_extraction = True, + extract_needs_chmod = True, + ) + + env.expect.that_bool(len(mock_rctx.execute_calls) > 0).equals(True) + env.expect.that_str(mock_rctx.execute_calls[0][0]).equals("chmod") + +_tests.append(_test_extract_calls_chmod_when_enabled) + +def _test_extract_skips_chmod_when_disabled(env): + mock_rctx = _make_rctx( + os_name = "linux", + mock_extracts = {"test.whl": {"f1": "c1"}}, + ) + + repo_utils.extract( + mock_rctx, + archive = mock_rctx.path("test.whl"), + output = "out", + supports_whl_extraction = True, + extract_needs_chmod = False, + ) + + env.expect.that_collection(mock_rctx.execute_calls).contains_exactly([]) + +_tests.append(_test_extract_skips_chmod_when_disabled) + +def _test_maybe_fix_permissions_calls_chmod_on_linux(env): + mock_rctx = _make_rctx(os_name = "linux") + + repo_utils.maybe_fix_permissions( + mock_rctx, + whl_path = mock_rctx.path("test.whl"), + ) + + env.expect.that_bool(len(mock_rctx.execute_calls) > 0).equals(True) + env.expect.that_str(mock_rctx.execute_calls[0][0]).equals("chmod") + +_tests.append(_test_maybe_fix_permissions_calls_chmod_on_linux) + +def _test_maybe_fix_permissions_skips_on_windows(env): + mock_rctx = _make_rctx(os_name = "windows") + + repo_utils.maybe_fix_permissions( + mock_rctx, + whl_path = mock_rctx.path("test.whl"), + ) + + env.expect.that_collection(mock_rctx.execute_calls).contains_exactly([]) + +_tests.append(_test_maybe_fix_permissions_skips_on_windows) + def repo_utils_test_suite(name): """Create the test suite. diff --git a/tests/support/mocks.bzl b/tests/support/mocks.bzl deleted file mode 100644 index 2a4ccd0fc4..0000000000 --- a/tests/support/mocks.bzl +++ /dev/null @@ -1,31 +0,0 @@ -"""Mocks for testing.""" - -def _rctx(os_name = "linux", os_arch = "x86_64", environ = None, **kwargs): - """Creates a mock of repository_ctx or module_ctx. - - Args: - os_name: The OS name to mock (e.g., "linux", "Mac OS X", "windows"). - os_arch: The OS architecture to mock (e.g., "x86_64", "aarch64"). - environ: A dictionary representing the environment variables. - **kwargs: Additional attributes to add to the mock struct. - - Returns: - A struct mocking repository_ctx. - """ - if environ == None: - environ = {} - - attrs = { - "getenv": environ.get, - "os": struct( - name = os_name, - arch = os_arch, - ), - } - attrs.update(kwargs) - - return struct(**attrs) - -mocks = struct( - rctx = _rctx, -) diff --git a/tests/support/mocks/mocks.bzl b/tests/support/mocks/mocks.bzl index 84d39fa6e2..6c79b1f4ff 100644 --- a/tests/support/mocks/mocks.bzl +++ b/tests/support/mocks/mocks.bzl @@ -185,7 +185,7 @@ def _mctx_download( return struct(success = True, wait = lambda: struct(success = True)) return struct(success = False, wait = lambda: struct(success = False)) -def _mctx_report_progress(self, message): +def _mrctx_report_progress(self, message): self.report_progress_calls.append(message) return None @@ -259,7 +259,7 @@ def _mctx_new( path = lambda *a, **k: _mctx_path(self, *a, **k), read = lambda *a, **k: _mctx_read(self, *a, **k), download = lambda *a, **k: _mctx_download(self, *a, **k), - report_progress = lambda *a, **k: _mctx_report_progress(self, *a, **k), + report_progress = lambda *a, **k: _mrctx_report_progress(self, *a, **k), add_module = lambda **k: _mctx_add_module(self, **k), ) return self @@ -439,6 +439,7 @@ def _rctx_execute( environment, custom_reporter, ) # @unused + self.execute_calls.append(arguments) return struct(return_code = 0, stdout = "", stderr = "") def _rctx_symlink(self, target, link_name): @@ -485,7 +486,10 @@ def _rctx_new( mock_which = mock_which, mock_downloads = mock_downloads, mock_extracts = mock_extracts, + execute_calls = [], + report_progress_calls = [], attr = struct(**attr), + name = attr.get("name", "mock"), os = struct( name = os_name, arch = arch_name, @@ -494,10 +498,12 @@ def _rctx_new( path = lambda *a, **k: _rctx_path(self, *a, **k), read = lambda *a, **k: _rctx_read(self, *a, **k), file = lambda *a, **k: _rctx_file(self, *a, **k), + getenv = environ.get, template = lambda *a, **k: _rctx_template(self, *a, **k), which = lambda *a, **k: _rctx_which(self, *a, **k), download = lambda *a, **k: _rctx_download(self, *a, **k), extract = lambda *a, **k: _rctx_extract(self, *a, **k), + delete = lambda x: True, download_and_extract = lambda *a, **k: _rctx_download_and_extract( self, *a, @@ -505,6 +511,7 @@ def _rctx_new( ), execute = lambda *a, **k: _rctx_execute(self, *a, **k), symlink = lambda *a, **k: _rctx_symlink(self, *a, **k), + report_progress = lambda *a, **k: _mrctx_report_progress(self, *a, **k), ) return self