From 23c95ab9412b8c3961c978c1db73c82a61f13e66 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 28 Jun 2026 23:05:41 +0900 Subject: [PATCH] wip - implement single repo per wheel per hub remove incorrect code written by AI --- .agents/plans/single_dep_whl_library.md | 137 ++++++++++++++++++ .agents/skills/accessing-sandbox/SKILL.md | 11 ++ docs/readthedocs_build.sh | 3 + python/private/internal_config_repo.bzl | 5 + .../pypi/generate_whl_library_build_bazel.bzl | 1 + python/private/pypi/hub_builder.bzl | 37 +++-- python/private/pypi/hub_repository.bzl | 2 + python/private/pypi/whl_config_setting.bzl | 5 +- python/private/pypi/whl_library_targets.bzl | 120 ++++++++++++++- python/private/pypi/whl_repo_name.bzl | 8 +- 10 files changed, 308 insertions(+), 21 deletions(-) create mode 100644 .agents/plans/single_dep_whl_library.md create mode 100644 .agents/skills/accessing-sandbox/SKILL.md diff --git a/.agents/plans/single_dep_whl_library.md b/.agents/plans/single_dep_whl_library.md new file mode 100644 index 0000000000..cb96520e15 --- /dev/null +++ b/.agents/plans/single_dep_whl_library.md @@ -0,0 +1,137 @@ +# Single dep for spoke repos per `whl_name`. + +The goal is to have `whl_library` instances from wheels (where we pass `url` for a URL or we pass +a `whl_file` label which points to an actual wheel) to be reused across different python versions +and across different configurations where different extras are requested. This means that +if the user is using the same wheel in 2 different configurations, the wheel is reused, because +the extracted contents should not differ. This could be important for: +* platform-specific wheels for custom platforms defined by the user. Otherwise the + platform-specific wheel behaviour should remain unchanged. +* cross-platform wheels for are the main affected item here. + +The code should be gated by a feature flag, which can be flipped to be enabled using an +environmental variable via + +The file to modify `python/private/internal_config_repo.bzl`. Use the +`RULES_PYTHON_WHL_LIBRARY_OPTIMIZED=1` to enable the feature. Add it to the +`docs/readthedocs_build.sh` file so that we are exercising the code. And document this in +the `docs/environment-variables.md` file. +If the flag is off, then it is equivalent to the current +`main` branch behaviour, if it is on, then it is equivalent to turning all of the behaviour +implemented here on. + +Since this is a refactor for users not using private APIs, the env variable will be switched/removed +later together with old code. + +All of the code should be written in a TDD style where we add a failing test for a feature and then +we implement the new code. + +## Change the naming of the spoke repositories: + +> The hub repositories should be named `_. + +The names should be changed: +* from `+pip+dev_pip_314_requests_py3_none_any_2a0d60c1_linux_x86_64_linux_x86_64_freethreaded` + to `+pip+dev_pip_requests_py3_none_any_2a0d60c1` +* from `+pip+dev_pip_314_roman_numerals_py3_none_any_647ba99c` + to `+pip+dev_pip_roman_numerals_py3_none_any_647ba99c` +* from `+pip+dev_pip_314_markupsafe_cp314_cp314t_manylinux_2_17_x86_64_fed51ac4` + to `+pip+dev_pip_markupsafe_cp314_cp314t_manylinux_2_17_x86_64_fed51ac4` + +So the naming convention after the change is: +`_____` + +Where `` is the hub repository name and the rest of the segments come from the wheel name +itself. + +The sdist building `whl_library` instances should change the naming to: +`___` + +The file to modify `python/private/pypi/whl_repo_name.bzl` + +## Change the `whl_library_targets` + +Using the `METADATA` file that is parsed from `whl_metadata` function where the `Provides-Extra` is +retrieved from the file. Right now we generate a single target which includes all of the extras that +are required. Instead do the following target generation: +* `pkg` (no extras) - target with no extras, this is the main target that includes the python sources, + the other targets just include extra dependencies. +* `pkg__extra` - target for each extra in the provides-extra list. If the target depends on + `self[another_extra]`, then explode the nodes so that the target only depends on `pkg` target + + extra dependencies. Use `py_library` for this. Also be smart about generating targets: + - If the only extra is in the env marker, but the dependency is not in the hub repo, then skip + generating the whole `pkg__extra` target. + - Propose any extra ideas here. + +Related files: +* `python/private/pypi/whl_library.bzl` +* `python/private/pypi/whl_metadata.bzl` +* `python/private/pypi/whl_library_targets.bzl` + +## Change `hub_builder` and `hub_repository` + +Instead of passing the `requirement[extra1,extra2]` to the `whl_library`, pass it to the +`hub_repository` via `whl_config_setting` so that the `render_pkg_aliases` is using the information to alias to the right +extra target based on what is requested. This should retain the behaviour where different target +platforms are allowed to target `foo[baz]` and `foo[bar]` and we select the +`@dev_pip_spoke_repo//:pkg__baz` and `@dev_pip_spoke_repo//:pkg__bar` appropriately. + +We should also generate extra targets here: +* `pkg` should point to the target with all specified extras as passed to the `hub_repository`. This is to keep backwards compatibility with how it used to be done + previously. This also keeps compatibility with `rules_pycross`. Add this as a comment + when doing this. The spoke repos should not be used directly, so this difference in + behaviour is OK. + This reuses the targets declared in the hub repository described below. + It is backed by a `py_library` target if there are multiple extras, or an `alias` if it + is only a single extra. It is the same target as described below. +* `pkg[]` should point to the target in the spoke without extras +* `pkg[extra]` should point to the target in the spoke with particular extras. Do this for each + provided extra. So for `requirements[extra1,extra2]` passed to the hub repo, 2 extra targets will + be created. If the extras have not been specified for certain platforms via the + `whl_config_setting`, then the `select` statement should raise an `error` for no match. + Document the mapping explicitly: hub `pkg[]` → spoke `pkg`, hub `pkg[extra]` → spoke `pkg__extra`. + The fact that some of the targets in the spoke repos are unreachable is intentional - this is + because the hub repository contents are created before the `whl_library` downloads the whl and + inspects the METADATA. + The reverse direction is not a concern because the requirements file locking results in + consistent file. If something like this happens, then we should provide a message to the + user that something went wrong and that they should create a ticket in rules_python bug + tracker. +* If there is `requirement[extra1,extra2]` passed, that means that we should create a special alias + that includes both of the targets at once. For this use `py_library` instead of `alias` and pass + `:pkg[extra1]` and `:pkg[extra2]` to the `deps` of the `py_library`. This won't cause + a failure because for a particular platform that is requested this should work by + construction, because the dependency targets will be also defined for the particular platform. + +* If the `requirement` is passed without any extras, then hub `pkg` should alias to `pkg[]`. +* If the target is `py_library`, then we should name it without `[]` to avoid any aspects traversing + `py_library` targets changing behaviour. Only alias targets can have `[]`. + +Related files: +* `python/private/pypi/render_pkg_aliases.bzl` +* `python/private/pypi/hub_builder.bzl` +* `python/private/pypi/hub_repository.bzl` +* `python/private/pypi/whl_config_setting.bzl` - consider extending this struct to specify which + extras are requested for which platform. + +## Modify `pip_repository` + +Do similar changes to the `WORKSPACE` code to ensure easier maintenance of `whl_library` so that all +code paths to `whl_library` remain consistent. + +Implement this by using the information that we retrieve by parsing the `requirements.txt` files +using the `parse_requirements` function. Parse the requirement itself using the existing parser in +the referenced file `pep508_requirement.bzl`. + +Related files: +* `python/private/pypi/pip_repository.bzl` +* `python/private/pypi/requirements.bzl.tmpl.workspace` +* `python/private/pypi/pep508_requirement.bzl` + +## Modify `unified_hub_repo` + +Pass the extras to the `unified_hub_repo` as well so that the extra targets are created. The target +topology in the unified hub repo should correspond to the hub_repository. + +File: +* `python/private/pypi/unified_hub_repo.bzl` diff --git a/.agents/skills/accessing-sandbox/SKILL.md b/.agents/skills/accessing-sandbox/SKILL.md new file mode 100644 index 0000000000..cdc1778eb8 --- /dev/null +++ b/.agents/skills/accessing-sandbox/SKILL.md @@ -0,0 +1,11 @@ +# Accessing build failures + +When looking at the test results and the build sandbox, do not ask permission to access the paths +via the absolute path, instead access them via the convenience symlinks in the root of the git +repository. + +Patterns: +* Use `bazel-bin/tests/pypi/hub_builder` to access the test results for the `//tests/pypi/hub_builder` +* Use `bazel-rules_python/external/` to access the external repos fetch or materialized by the + extensions +* Use `bazel-testlogs/tests/pypi/hub_builder/test_simple` to access the logs for the `//tests/pypi/hub_builder:test_simple` test. diff --git a/docs/readthedocs_build.sh b/docs/readthedocs_build.sh index cd60792a3f..d830ec4be8 100755 --- a/docs/readthedocs_build.sh +++ b/docs/readthedocs_build.sh @@ -1,5 +1,8 @@ #!/usr/bin/env bash +# Exercising the whl_library optimized mode to ensure it is working +export RULES_PYTHON_WHL_LIBRARY_OPTIMIZED=1 + set -eou pipefail declare -a extra_env diff --git a/python/private/internal_config_repo.bzl b/python/private/internal_config_repo.bzl index 8ee6a2d017..35393b28b5 100644 --- a/python/private/internal_config_repo.bzl +++ b/python/private/internal_config_repo.bzl @@ -24,6 +24,9 @@ load(":repo_utils.bzl", "repo_utils") _ENABLE_DEPRECATION_WARNINGS_ENVVAR_NAME = "RULES_PYTHON_DEPRECATION_WARNINGS" _ENABLE_DEPRECATION_WARNINGS_DEFAULT = "0" +_WHL_LIBRARY_OPTIMIZED_ENVVAR_NAME = "RULES_PYTHON_WHL_LIBRARY_OPTIMIZED" +_WHL_LIBRARY_OPTIMIZED_DEFAULT = "0" + _CONFIG_TEMPLATE = """ config = struct( build_python_zip_default = {build_python_zip_default}, @@ -31,6 +34,7 @@ config = struct( enable_pystar = True, enable_deprecation_warnings = {enable_deprecation_warnings}, extract_needs_chmod = {extract_needs_chmod}, + whl_library_optimized = {whl_library_optimized}, bazel_8_or_later = {bazel_8_or_later}, bazel_9_or_later = {bazel_9_or_later}, bazel_10_or_later = {bazel_10_or_later}, @@ -104,6 +108,7 @@ def _internal_config_repo_impl(rctx): rctx.file("rules_python_config.bzl", _CONFIG_TEMPLATE.format( build_python_zip_default = repo_utils.get_platforms_os_name(rctx) == "windows", enable_deprecation_warnings = _bool_from_environ(rctx, _ENABLE_DEPRECATION_WARNINGS_ENVVAR_NAME, _ENABLE_DEPRECATION_WARNINGS_DEFAULT), + whl_library_optimized = _bool_from_environ(rctx, _WHL_LIBRARY_OPTIMIZED_ENVVAR_NAME, _WHL_LIBRARY_OPTIMIZED_DEFAULT), builtin_py_info_symbol = builtin_py_info_symbol, builtin_py_runtime_info_symbol = builtin_py_runtime_info_symbol, supports_whl_extraction = str(supports_whl_extraction), diff --git a/python/private/pypi/generate_whl_library_build_bazel.bzl b/python/private/pypi/generate_whl_library_build_bazel.bzl index a9a29081f7..3c8c096d24 100644 --- a/python/private/pypi/generate_whl_library_build_bazel.bzl +++ b/python/private/pypi/generate_whl_library_build_bazel.bzl @@ -27,6 +27,7 @@ _RENDER = { "extras": render.list, "group_deps": render.list, "include": str, + "provides_extra": render.list, "requires_dist": render.list, "srcs_exclude": render.list, "tags": render.list, diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl index 7717f36731..391ebcffb5 100644 --- a/python/private/pypi/hub_builder.bzl +++ b/python/private/pypi/hub_builder.bzl @@ -11,6 +11,7 @@ load(":attrs.bzl", "use_isolated") load(":parse_requirements.bzl", "parse_requirements") load(":pep508_env.bzl", "env") load(":pep508_evaluate.bzl", "evaluate") +load(":pep508_requirement.bzl", "requirement") load(":python_tag.bzl", "python_tag") load(":requirements_files_by_platform.bzl", "requirements_files_by_platform") load(":whl_config_setting.bzl", "whl_config_setting") @@ -308,13 +309,10 @@ def _add_whl_library(self, *, python_version, whl, repo): # disallow building from sdist. return - # TODO @aignas 2025-06-29: we should not need the version in the repo_name if - # we are using pipstar and we are downloading the wheel using the downloader - # - # However, for that we should first have a different way to reference closures with - # extras. For example, if some package depends on `foo[extra]` and another depends on - # `foo`, we should have 2 py_library targets. - repo_name = "{}_{}_{}".format(self.name, version_label(python_version), repo.repo_name) + if self._config.whl_library_optimized: + repo_name = "{}_{}".format(self.name, repo.repo_name) + else: + repo_name = "{}_{}_{}".format(self.name, version_label(python_version), repo.repo_name) if repo_name in self._whl_libraries: diff = _diff_dict(self._whl_libraries[repo_name], repo.args) @@ -538,6 +536,7 @@ def _create_whl_repos( ) for src in whl.srcs: repo = _whl_repo( + self, src = src, index_url = whl.index_url, whl_library_args = whl_library_args, @@ -549,6 +548,7 @@ def _create_whl_repos( is_multiple_versions = whl.is_multiple_versions, interpreter = interpreter, enable_pipstar_extract = enable_pipstar_extract, + optimized = self._config.whl_library_optimized, ) _add_whl_library( self, @@ -612,6 +612,7 @@ def _whl_library_args(self, *, whl, whl_modifications): return whl_library_args def _whl_repo( + self, *, src, whl_library_args, @@ -623,9 +624,21 @@ def _whl_repo( python_version, use_downloader, interpreter, - enable_pipstar_extract = False): + enable_pipstar_extract = False, + optimized = False): args = dict(whl_library_args) - args["requirement"] = src.requirement_line + if self._config.whl_library_optimized: + line = requirement(src.requirement_line) + extras = line.extras + + # Strip extras from the requirement for whls in optimized mode so that + # the wheel can be reused across different configurations requesting + # different extras. + args["requirement"] = line.name + else: + args["requirement"] = src.requirement_line + extras = [] + is_whl = src.filename.endswith(".whl") if src.extra_pip_args and not is_whl: @@ -658,6 +671,7 @@ def _whl_repo( config_setting = whl_config_setting( version = python_version, target_platforms = target_platforms or None, + extras = extras, ), ) @@ -678,14 +692,15 @@ def _whl_repo( # TODO @aignas 2025-11-02: once we have pipstar enabled we can add extra # targets to each hub for each extra combination and solve this more cleanly as opposed to # duplicating whl_library repositories. - target_platforms = src.target_platforms if is_multiple_versions else [] + target_platforms = src.target_platforms if (is_multiple_versions and not optimized) else [] return struct( - repo_name = whl_repo_name(src.filename, src.sha256, *target_platforms), + repo_name = whl_repo_name(src.filename, src.sha256, target_platforms = target_platforms, optimized = optimized), args = args, config_setting = whl_config_setting( version = python_version, target_platforms = src.target_platforms, + extras = extras, ), ) diff --git a/python/private/pypi/hub_repository.bzl b/python/private/pypi/hub_repository.bzl index f915aa1c77..c8447da305 100644 --- a/python/private/pypi/hub_repository.bzl +++ b/python/private/pypi/hub_repository.bzl @@ -146,4 +146,6 @@ def _whl_config_setting_dict(a): ret["target_platforms"] = a.target_platforms if a.version: ret["version"] = a.version + if a.extras: + ret["extras"] = a.extras return ret diff --git a/python/private/pypi/whl_config_setting.bzl b/python/private/pypi/whl_config_setting.bzl index 1d868b1b65..e514220aa7 100644 --- a/python/private/pypi/whl_config_setting.bzl +++ b/python/private/pypi/whl_config_setting.bzl @@ -14,7 +14,7 @@ "A small function to create an alias for a whl distribution" -def whl_config_setting(*, version = None, target_platforms = None): +def whl_config_setting(*, version = None, target_platforms = None, extras = None): """The bzl_packages value used by by the render_pkg_aliases function. This contains the minimum amount of information required to generate correct @@ -27,6 +27,8 @@ def whl_config_setting(*, version = None, target_platforms = None): is no match found during a select. target_platforms: {type}`list[str] | None` the list of target_platforms for this distribution. + extras: {type}`list[str] | None` the list of extras for this particular target platform to + enable. Returns: a struct with the validated and parsed values. @@ -55,4 +57,5 @@ def whl_config_setting(*, version = None, target_platforms = None): # Make the struct hashable target_platforms = tuple(target_platforms) if target_platforms else None, version = version, + extras = extras or None, ) diff --git a/python/private/pypi/whl_library_targets.bzl b/python/private/pypi/whl_library_targets.bzl index 01a89aadcc..93e46ad0a7 100644 --- a/python/private/pypi/whl_library_targets.bzl +++ b/python/private/pypi/whl_library_targets.bzl @@ -15,8 +15,9 @@ """Macro to generate all of the targets present in a {obj}`whl_library`.""" load("@bazel_skylib//rules:copy_file.bzl", "copy_file") +load("@rules_python//python:defs.bzl", "py_library") load("//python:py_binary.bzl", "py_binary") -load("//python:py_library.bzl", "py_library") +load("//python:py_library.bzl", py_library_rule = "py_library") load("//python/private:normalize_name.bzl", "normalize_name") load(":env_marker_setting.bzl", "env_marker_setting") load( @@ -54,6 +55,7 @@ def whl_library_targets_from_requires( metadata_version = "", requires_dist = [], extras = [], + provides_extra = [], entry_points = {}, include = [], group_deps = [], @@ -71,23 +73,87 @@ def whl_library_targets_from_requires( requires_dist: {type}`list[str]` The list of `Requires-Dist` values from the whl `METADATA`. extras: {type}`list[str]` The list of requested extras. This essentially includes extra transitive dependencies in the final targets depending on the wheel `METADATA`. + provides_extra: {type}`list[str]` The list of extras that this package provides. entry_points: {type}`list[dict]` A list of parsed entry point definitions. include: {type}`list[str]` The list of packages to include. **kwargs: Extra args passed to the {obj}`whl_library_targets` """ - package_deps = _parse_requires_dist( + if provides_extra: + _whl_library_targets_from_requires_optimized( + name = name, + metadata_name = metadata_name, + metadata_version = metadata_version, + requires_dist = requires_dist, + provides_extra = provides_extra, + entry_points = entry_points, + include = include, + group_deps = group_deps, + **kwargs + ) + else: + package_deps = _parse_requires_dist( + name = metadata_name, + requires_dist = requires_dist, + excludes = group_deps, + extras = extras, + include = include, + ) + + whl_library_targets( + name = name, + dependencies = package_deps.deps, + dependencies_with_markers = package_deps.deps_select, + entry_points = entry_points, + tags = [ + "pypi_name={}".format(metadata_name), + "pypi_version={}".format(metadata_version), + ], + **kwargs + ) + +def _whl_library_targets_from_requires_optimized( + *, + name, + metadata_name, + metadata_version, + requires_dist, + provides_extra, + entry_points, + include, + group_deps, + **kwargs): + dep_template = kwargs.pop("dep_template", "") + dep_tmpl = dep_template.format(name = "{}", target = PY_LIBRARY_PUBLIC_LABEL) + + base_deps = _parse_requires_dist( name = metadata_name, requires_dist = requires_dist, excludes = group_deps, - extras = extras, + extras = [], include = include, ) + extra_deps_map = {} + for extra in (provides_extra or []): + extra_deps_map[extra] = _parse_requires_dist( + name = metadata_name, + requires_dist = requires_dist, + excludes = group_deps, + extras = [extra], + include = include, + ) + + all_deps_select = dict(base_deps.deps_select) + for extra_deps in extra_deps_map.values(): + for d, m in extra_deps.deps_select.items(): + all_deps_select.setdefault(d, m) + whl_library_targets( name = name, - dependencies = package_deps.deps, - dependencies_with_markers = package_deps.deps_select, + dependencies = base_deps.deps, + dependencies_with_markers = all_deps_select, entry_points = entry_points, + dep_template = dep_template, tags = [ "pypi_name={}".format(metadata_name), "pypi_version={}".format(metadata_version), @@ -95,6 +161,48 @@ def whl_library_targets_from_requires( **kwargs ) + base_deps_set = {d: True for d in base_deps.deps} + + for extra in (provides_extra or []): + extra_deps = extra_deps_map[extra] + extra_only_deps = sorted([ + d + for d in extra_deps.deps + if d not in base_deps_set + ]) + extra_only_deps_select = { + d: m + for d, m in extra_deps.deps_select.items() + if d not in base_deps_set + } + + if not extra_only_deps and not extra_only_deps_select: + continue + + deps_entries = [":pkg"] + [ + dep_tmpl.format(d) + for d in extra_only_deps + ] + + if extra_only_deps_select: + select_entries = { + ":is_include_{}_true".format(d): [dep_tmpl.format(d)] + for d in sorted(extra_only_deps_select) + } + select_entries.setdefault("//conditions:default", []) + deps_entries.append(select(select_entries)) + + py_library( + name = "pkg__{}".format(extra), + deps = deps_entries, + tags = [ + "pypi_name={}".format(metadata_name), + "pypi_version={}".format(metadata_version), + "pypi_extra={}".format(extra), + ], + visibility = ["//visibility:public"], + ) + def _parse_requires_dist( *, name, @@ -134,7 +242,7 @@ def whl_library_targets( rules = struct( copy_file = copy_file, py_binary = py_binary, - py_library = py_library, + py_library = py_library_rule, venv_entry_point = venv_entry_point, venv_rewrite_shebang = venv_rewrite_shebang, env_marker_setting = env_marker_setting, diff --git a/python/private/pypi/whl_repo_name.bzl b/python/private/pypi/whl_repo_name.bzl index 29d774c361..4243958218 100644 --- a/python/private/pypi/whl_repo_name.bzl +++ b/python/private/pypi/whl_repo_name.bzl @@ -18,14 +18,15 @@ load("//python/private:normalize_name.bzl", "normalize_name") load(":parse_whl_name.bzl", "parse_whl_name") -def whl_repo_name(filename, sha256, *target_platforms): +def whl_repo_name(filename, sha256, target_platforms = ()): """Return a valid whl_library repo name given a distribution filename. Args: filename: {type}`str` the filename of the distribution. sha256: {type}`str` the sha256 of the distribution. - *target_platforms: {type}`list[str]` the extra suffixes to append. + target_platforms: {type}`tuple[str]` the extra suffixes to append. Only used when we need to support different extras per version. + Deprecated. Returns: a string that can be used in {obj}`whl_library`. @@ -61,7 +62,8 @@ def whl_repo_name(filename, sha256, *target_platforms): elif version: parts.insert(1, version) - parts.extend([p.partition("_")[-1] for p in target_platforms]) + if target_platforms: + parts.extend(target_platforms) return "_".join(parts)