From 0986c1075ff0b625664aec0582ed718a9a1be183 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 14:14:08 +0900 Subject: [PATCH 01/15] feat: add initial facts impl --- python/private/pypi/extension.bzl | 10 +- python/private/pypi/hub_builder.bzl | 8 +- python/private/pypi/parse_requirements.bzl | 18 +- python/private/pypi/pypi_cache.bzl | 191 ++++++++++++++++++++- python/private/pypi/simpleapi_download.bzl | 12 +- 5 files changed, 216 insertions(+), 23 deletions(-) diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl index 5fded728bf..736ba0dba3 100644 --- a/python/private/pypi/extension.bzl +++ b/python/private/pypi/extension.bzl @@ -225,7 +225,7 @@ You cannot use both the additive_build_content and additive_build_content_file a # dict[str repo, HubBuilder] # See `hub_builder.bzl%hub_builder()` for `HubBuilder` pip_hub_map = {} - simpleapi_cache = pypi_cache() + simpleapi_cache = pypi_cache(module_ctx = module_ctx) for mod in module_ctx.modules: for pip_attr in mod.tags.parse: @@ -293,6 +293,7 @@ You cannot use both the additive_build_content and additive_build_content_file a config = config, exposed_packages = exposed_packages, extra_aliases = extra_aliases, + facts = simpleapi_cache.get_facts(), hub_group_map = hub_group_map, hub_whl_map = hub_whl_map, whl_libraries = whl_libraries, @@ -372,7 +373,11 @@ def _pip_impl(module_ctx): module_ctx: module contents """ - mods = parse_modules(module_ctx, enable_pipstar = rp_config.enable_pipstar, enable_pipstar_extract = rp_config.enable_pipstar and rp_config.bazel_8_or_later) + mods = parse_modules( + module_ctx, + enable_pipstar = rp_config.enable_pipstar, + enable_pipstar_extract = rp_config.enable_pipstar and rp_config.bazel_8_or_later, + ) # Build all of the wheel modifications if the tag class is called. _whl_mods_impl(mods.whl_mods) @@ -396,6 +401,7 @@ def _pip_impl(module_ctx): return module_ctx.extension_metadata( reproducible = True, + facts = mods.facts or {}, ) _default_attrs = { diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl index 069d519e3c..e84f3b0ae8 100644 --- a/python/private/pypi/hub_builder.bzl +++ b/python/private/pypi/hub_builder.bzl @@ -395,11 +395,11 @@ def _set_get_index_urls(self, pip_attr): index_url = pip_attr.experimental_index_url, extra_index_urls = pip_attr.experimental_extra_index_urls or [], index_url_overrides = pip_attr.experimental_index_url_overrides or {}, - sources = [ - d - for d in distributions + sources = { + d: versions + for d, versions in distributions.items() if _use_downloader(self, python_version, d) - ], + }, envsubst = pip_attr.envsubst, # Auth related info netrc = pip_attr.netrc, diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl index acc35b3208..06cda9e382 100644 --- a/python/private/pypi/parse_requirements.bzl +++ b/python/private/pypi/parse_requirements.bzl @@ -53,7 +53,7 @@ def parse_requirements( os, arch combinations. extra_pip_args (string list): Extra pip arguments to perform extra validations and to be joined with args found in files. - get_index_urls: Callable[[ctx, list[str]], dict], a callable to get all + get_index_urls: Callable[[ctx, dict[str, list[str]]], dict], a callable to get all of the distribution URLs from a PyPI index. Accepts ctx and distribution names to query. evaluate_markers: A function to use to evaluate the requirements. @@ -170,15 +170,17 @@ def parse_requirements( index_urls = {} if get_index_urls: + distributions = {} + for reqs in requirements_by_platform.values(): + for req in reqs.values(): + if req.srcs.url: + continue + + distributions.setdefault(req.distribution, []).append(req.srcs.version) + index_urls = get_index_urls( ctx, - # Use list({}) as a way to have a set - list({ - req.distribution: None - for reqs in requirements_by_platform.values() - for req in reqs.values() - if not req.srcs.url - }), + distributions, ) ret = [] diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl index 4dc824c10c..053d671d22 100644 --- a/python/private/pypi/pypi_cache.bzl +++ b/python/private/pypi/pypi_cache.bzl @@ -8,18 +8,34 @@ In the future the same will be used to: - Store PyPI index query results as facts in the MODULE.bazel.lock file """ -def pypi_cache(store = None): +load(":version_from_filename.bzl", "version_from_filename") + +_FACT_VERSION = "v1" + +def pypi_cache(module_ctx = None, store = None): """The cache for PyPI index queries. Currently the key is of the following structure: - (url, real_url) + (url, real_url, versions) + + Args: + module_ctx: The module context + store: The in-memory store, should implement dict interface for get and setdefault + + Returns: + A cache struct """ + mcache = memory_cache(store) + facts = {} + fcache = facts_cache(getattr(module_ctx, "facts", None), facts) # buildifier: disable=uninitialized self = struct( - _store = store or {}, + _mcache = mcache, + _facts = fcache, setdefault = lambda key, parsed_result: _pypi_cache_setdefault(self, key, parsed_result), get = lambda key: _pypi_cache_get(self, key), + get_facts = lambda: _get_facts(facts), ) # buildifier: enable=uninitialized @@ -40,7 +56,13 @@ def _pypi_cache_setdefault(self, key, parsed_result): Returns: The `parse_result`. """ - return self._store.setdefault(key, parsed_result) + index_url, real_url, versions = key + self._mcache.setdefault(real_url, None, parsed_result) + if not versions or not self._facts: + return parsed_result + + filtered = _filter_packages(parsed_result, versions) + return self._facts.setdefault(index_url, filtered) def _pypi_cache_get(self, key): """Return the parsed result from the cache. @@ -52,4 +74,163 @@ def _pypi_cache_get(self, key): Returns: The {type}`struct` or `None` based on if the result is in the cache or not. """ - return self._store.get(key) + index_url, real_url, versions = key + cached = self._mcache.get(real_url, versions) + if not self._facts: + return cached + + if not cached and versions: + # Could not get from in-memory, read from lockfile facts + cached = self._facts.get(index_url, versions) + + return cached + +def _get_facts(facts): + return facts + +def memory_cache(cache = None): + """SimpleAPI cache for making fewer calls. + + Args: + cache: the storage to store things in memory. + + Returns: + struct with 2 methods, `get` and `setdefault`. + """ + if cache == None: + cache = {} + + return struct( + get = lambda real_url, versions: _filter_packages(cache.get(real_url), versions), + setdefault = lambda real_url, versions, value: _filter_packages(cache.get(real_url), versions), + ) + +def _filter_packages(dists, requested_versions): + if dists == None: + return None + + if not requested_versions: + return dists + + sha256s_by_version = {} + whls = {} + sdists = {} + + for sha256, d in dists.sdists.items(): + if d.version not in requested_versions: + continue + + sdists[sha256] = d + sha256s_by_version.setdefault(d.version, []).append(sha256) + + for sha256, d in dists.whls.items(): + if d.version not in requested_versions: + continue + + whls[sha256] = d + sha256s_by_version.setdefault(d.version, []).append(sha256) + + if not whls and not sdists: + # TODO @aignas 2026-03-08: add logging + #print("WARN: no dists matched for versions {}".format(requested_versions)) + return None + + return struct( + whls = whls, + sdists = sdists, + sha256s_by_version = sha256s_by_version, + ) + +def facts_cache(known_facts, facts, facts_version = _FACT_VERSION): + if known_facts == None: + return None + + return struct( + get = lambda index_url, versions: _get_from_facts( + facts, + known_facts, + index_url, + versions, + facts_version, + ), + setdefault = lambda url, value: _store_facts(facts, facts_version, url, value), + known_facts = known_facts, + facts = facts, + ) + +def _get_from_facts(facts, known_facts, index_url, requested_versions, facts_version): + if known_facts.get("fact_version") != facts_version: + # cannot trust known facts, different version that we know how to parse + return None + + known_sources = {} + + root_url, _, distribution = index_url.rstrip("/").rpartition("/") + distribution = distribution.rstrip("/") + root_url = root_url.rstrip("/") + + for url, sha256 in known_facts.get("dist_hashes", {}).get(root_url, {}).get(distribution, {}).items(): + filename = known_facts.get("dist_filenames", {}).get(root_url, {}).get(distribution, {}).get(sha256) + if not filename: + _, _, filename = url.rpartition("/") + + version = version_from_filename(filename) + if version not in requested_versions: + # TODO @aignas 2026-01-21: do the check by requested shas at some point + # We don't have sufficient info in the lock file, need to call the API + # + continue + + if filename.endswith(".whl"): + dists = known_sources.setdefault("whls", {}) + else: + dists = known_sources.setdefault("sdists", {}) + + known_sources.setdefault("sha256s_by_version", {}).setdefault(version, []).append(sha256) + + dists.setdefault(sha256, struct( + sha256 = sha256, + filename = filename, + version = version, + url = url, + yanked = known_facts.get("dist_yanked", {}).get(root_url, {}).get(distribution, {}).get(sha256, ""), + )) + + if not known_sources: + # We found nothing in facts + return None + + output = struct( + whls = known_sources.get("whls", {}), + sdists = known_sources.get("sdists", {}), + sha256s_by_version = known_sources.get("sha256s_by_version", {}), + ) + + # Persist these facts for the next run because we have used them. + return _store_facts(facts, facts_version, index_url, output) + +def _store_facts(facts, fact_version, index_url, value): + """Store values as facts in the lock file. + + The main idea is to ensure that the lock file is small and it is only storing what + we would need to fetch from the internet. Any derivative information we can + from this that can be achieved using pure Starlark functions should be done in + Starlark. + """ + if not value: + return value + + facts["fact_version"] = fact_version + + root_url, _, distribution = index_url.rstrip("/").rpartition("/") + distribution = distribution.rstrip("/") + root_url = root_url.rstrip("/") + + for sha256, d in (value.sdists | value.whls).items(): + facts.setdefault("dist_hashes", {}).setdefault(root_url, {}).setdefault(distribution, {}).setdefault(d.url, sha256) + if not d.url.endswith(d.filename): + facts.setdefault("dist_filenames", {}).setdefault(root_url, {}).setdefault(distribution, {}).setdefault(d.url, d.filename) + if d.yanked: + facts.setdefault("dist_yanked", {}).setdefault(root_url, {}).setdefault(distribution, {}).setdefault(sha256, d.yanked) + + return value diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index 0f776ad434..6ef025d359 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -81,6 +81,8 @@ def simpleapi_download( index_urls = [attr.index_url] + attr.extra_index_urls read_simpleapi = read_simpleapi or _read_simpleapi + input_sources = attr.sources + found_on_index = {} warn_overrides = False ctx.report_progress("Fetch package lists from PyPI index") @@ -90,8 +92,8 @@ def simpleapi_download( warn_overrides = True async_downloads = {} - sources = [pkg for pkg in attr.sources if pkg not in found_on_index] - for pkg in sources: + sources = {pkg: versions for pkg, versions in input_sources.items() if pkg not in found_on_index} + for pkg, versions in sources.items(): pkg_normalized = normalize_name(pkg) url = urllib.strip_empty_path_segments("{index_url}/{distribution}/".format( index_url = index_url_overrides.get(pkg_normalized, index_url).rstrip("/"), @@ -100,6 +102,7 @@ def simpleapi_download( result = read_simpleapi( ctx = ctx, attr = attr, + versions = versions, url = url, cache = cache, get_auth = get_auth, @@ -166,7 +169,7 @@ If you would like to skip downloading metadata for these packages please add 'si return contents -def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs): +def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download_kwargs): """Read SimpleAPI. Args: @@ -179,6 +182,7 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs): * auth_patterns: The auth_patterns parameter for ctx.download, see {obj}`http_file` for docs. cache: {type}`struct` the `pypi_cache` instance. + versions: {type}`list[str] The versions that have been requested. get_auth: A function to get auth information. Used in tests. **download_kwargs: Any extra params to ctx.download. Note that output and auth will be passed for you. @@ -194,7 +198,7 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs): real_url = urllib.strip_empty_path_segments(envsubst(url, attr.envsubst, ctx.getenv)) - cache_key = (url, real_url) + cache_key = (url, real_url, versions) cached_result = cache.get(cache_key) if cached_result: return struct(success = True, output = cached_result) From e991a23feba6d92a7a701d4f409a5bab1661d2f2 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 14:41:23 +0900 Subject: [PATCH 02/15] fix a bug and add a test suite --- python/private/pypi/pypi_cache.bzl | 47 +++++++++++++++------- tests/pypi/pypi_cache/BUILD.bazel | 5 +++ tests/pypi/pypi_cache/pypi_cache_tests.bzl | 43 ++++++++++++++++++++ 3 files changed, 81 insertions(+), 14 deletions(-) create mode 100644 tests/pypi/pypi_cache/BUILD.bazel create mode 100644 tests/pypi/pypi_cache/pypi_cache_tests.bzl diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl index 053d671d22..1315e71709 100644 --- a/python/private/pypi/pypi_cache.bzl +++ b/python/private/pypi/pypi_cache.bzl @@ -26,8 +26,7 @@ def pypi_cache(module_ctx = None, store = None): A cache struct """ mcache = memory_cache(store) - facts = {} - fcache = facts_cache(getattr(module_ctx, "facts", None), facts) + fcache = facts_cache(getattr(module_ctx, "facts", None)) # buildifier: disable=uninitialized self = struct( @@ -35,7 +34,7 @@ def pypi_cache(module_ctx = None, store = None): _facts = fcache, setdefault = lambda key, parsed_result: _pypi_cache_setdefault(self, key, parsed_result), get = lambda key: _pypi_cache_get(self, key), - get_facts = lambda: _get_facts(facts), + get_facts = lambda: _pypi_cache_get_facts(self), ) # buildifier: enable=uninitialized @@ -57,7 +56,7 @@ def _pypi_cache_setdefault(self, key, parsed_result): The `parse_result`. """ index_url, real_url, versions = key - self._mcache.setdefault(real_url, None, parsed_result) + self._mcache.setdefault(real_url, parsed_result) if not versions or not self._facts: return parsed_result @@ -75,7 +74,7 @@ def _pypi_cache_get(self, key): The {type}`struct` or `None` based on if the result is in the cache or not. """ index_url, real_url, versions = key - cached = self._mcache.get(real_url, versions) + cached = _filter_packages(self._mcache.get(real_url), versions) if not self._facts: return cached @@ -85,12 +84,18 @@ def _pypi_cache_get(self, key): return cached -def _get_facts(facts): - return facts +def _pypi_cache_get_facts(self): + if not self._fcache: + return {} + + return self._fcache.facts def memory_cache(cache = None): """SimpleAPI cache for making fewer calls. + We are using the `real_url` as the key in the cache functions on purpose in order to get the + best possible cache hits. + Args: cache: the storage to store things in memory. @@ -101,15 +106,12 @@ def memory_cache(cache = None): cache = {} return struct( - get = lambda real_url, versions: _filter_packages(cache.get(real_url), versions), - setdefault = lambda real_url, versions, value: _filter_packages(cache.get(real_url), versions), + get = lambda real_url: cache.get(real_url), + setdefault = lambda real_url, value: cache.setdefault(real_url, value), ) def _filter_packages(dists, requested_versions): - if dists == None: - return None - - if not requested_versions: + if dists == None or not requested_versions: return dists sha256s_by_version = {} @@ -141,10 +143,27 @@ def _filter_packages(dists, requested_versions): sha256s_by_version = sha256s_by_version, ) -def facts_cache(known_facts, facts, facts_version = _FACT_VERSION): +def facts_cache(known_facts, facts_version = _FACT_VERSION): + """The facts cache. + + Here we have a way to store things as facts and the main thing to keep in mind is that we should + not use the real_url in case it contains credentials in it (e.g. is of form `https://:@`). + + Args: + known_facts: An opaque object coming from {obj}`module_ctx.facts`. + facts_version: {type}`str` the version of the facts schema, used for short-circuiting. + + Returns: + A struct that has: + * `get` method for getting values from the facts cache. + * `setdefault` method for setting values in the cache. + * `facts` attribute that should be passed to the {obj}`module_ctx.extension_metadata` to persist facts. + """ if known_facts == None: return None + facts = {} + return struct( get = lambda index_url, versions: _get_from_facts( facts, diff --git a/tests/pypi/pypi_cache/BUILD.bazel b/tests/pypi/pypi_cache/BUILD.bazel new file mode 100644 index 0000000000..03c20623cd --- /dev/null +++ b/tests/pypi/pypi_cache/BUILD.bazel @@ -0,0 +1,5 @@ +load(":pypi_cache_tests.bzl", "pypi_cache_test_suite") + +pypi_cache_test_suite( + name = "pypi_cache_tests", +) diff --git a/tests/pypi/pypi_cache/pypi_cache_tests.bzl b/tests/pypi/pypi_cache/pypi_cache_tests.bzl new file mode 100644 index 0000000000..3264de0248 --- /dev/null +++ b/tests/pypi/pypi_cache/pypi_cache_tests.bzl @@ -0,0 +1,43 @@ +"" + +load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("//python/private/pypi:pypi_cache.bzl", "pypi_cache") # buildifier: disable=bzl-visibility + +_tests = [] + +def _test_memory_cache_hit(env): + """Verifies that the cache returns stored values for the same real_url.""" + store = {} + + # We pass None for module_ctx to focus solely on memory_cache behavior + cache = pypi_cache(module_ctx = None, store = store) + + # Mocked parsed result from a PyPI-like index + fake_result = struct( + sdists = { + "sha_1": struct(version = "1.0.0", filename = "pkg-1.0.0.tar.gz"), + }, + whls = { + "sha_2": struct(version = "1.1.0", filename = "pkg-1.1.0-py3-none-any.whl"), + }, + ) + + # Key format: (index_url, real_url, versions) + key = ("https://{PYPI_INDEX_URL}/pkg", "https://pypi.org/simple/pkg", ["1.0.0", "1.1.0"]) + + # When set the cache + cache.setdefault(key, fake_result) + + # And get a value back + got = cache.get(key) + + env.expect.that_dict(got.sdists).contains_exactly(fake_result.sdists) + env.expect.that_dict(got.whls).contains_exactly(fake_result.whls) + +_tests.append(_test_memory_cache_hit) + +def pypi_cache_test_suite(name): + test_suite( + name = name, + basic_tests = _tests, + ) From dd2552e935d7c122b3d198ccb73eddac264aa74d Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 17:54:07 +0900 Subject: [PATCH 03/15] finish the test --- tests/pypi/pypi_cache/pypi_cache_tests.bzl | 52 ++++++++++++++++++++-- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/tests/pypi/pypi_cache/pypi_cache_tests.bzl b/tests/pypi/pypi_cache/pypi_cache_tests.bzl index 3264de0248..9327326ffb 100644 --- a/tests/pypi/pypi_cache/pypi_cache_tests.bzl +++ b/tests/pypi/pypi_cache/pypi_cache_tests.bzl @@ -1,16 +1,44 @@ "" load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("@rules_testing//lib:truth.bzl", "subjects") load("//python/private/pypi:pypi_cache.bzl", "pypi_cache") # buildifier: disable=bzl-visibility _tests = [] +def _cache(env, **kwargs): + cache = pypi_cache(**kwargs) + + attrs = { + "sdists": subjects.dict, + "sha256s_by_version": subjects.dict, + "whls": subjects.dict, + } + + def _expect(value): + if not value: + return env.expect.that_str(value) + + return env.expect.that_struct( + value, + attrs = attrs, + ) + + return struct( + setdefault = lambda *args, **kwargs: _expect( + cache.setdefault(*args, **kwargs), + ), + get = lambda *args, **kwargs: _expect( + cache.get(*args, **kwargs), + ), + ) + def _test_memory_cache_hit(env): """Verifies that the cache returns stored values for the same real_url.""" store = {} # We pass None for module_ctx to focus solely on memory_cache behavior - cache = pypi_cache(module_ctx = None, store = store) + cache = _cache(env, module_ctx = None, store = store) # Mocked parsed result from a PyPI-like index fake_result = struct( @@ -20,6 +48,10 @@ def _test_memory_cache_hit(env): whls = { "sha_2": struct(version = "1.1.0", filename = "pkg-1.1.0-py3-none-any.whl"), }, + sha256s_by_version = { + "1.0.0": ["sha_1"], + "1.1.0": ["sha_2"], + }, ) # Key format: (index_url, real_url, versions) @@ -31,8 +63,22 @@ def _test_memory_cache_hit(env): # And get a value back got = cache.get(key) - env.expect.that_dict(got.sdists).contains_exactly(fake_result.sdists) - env.expect.that_dict(got.whls).contains_exactly(fake_result.whls) + got.sdists().contains_exactly(fake_result.sdists) + got.whls().contains_exactly(fake_result.whls) + got.sha256s_by_version().contains_exactly(fake_result.sha256s_by_version) + + # A different key with fewer versions + key = ("https://{PYPI_INDEX_URL}/pkg", "https://pypi.org/simple/pkg", ["1.0.0"]) + + got = cache.get(key) + got.sdists().contains_exactly(fake_result.sdists) + got.whls().contains_exactly({}) + got.sha256s_by_version().contains_exactly({"1.0.0": ["sha_1"]}) + + # A key with no matches + key = ("https://{PYPI_INDEX_URL}/pkg", "https://pypi.org/simple/pkg", ["1.2.0"]) + + cache.get(key).equals(None) _tests.append(_test_memory_cache_hit) From 32e2dede858e273e8a034c276be2375fb93e8e6a Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 18:32:42 +0900 Subject: [PATCH 04/15] add one more test --- python/private/pypi/pypi_cache.bzl | 4 +- tests/pypi/pypi_cache/pypi_cache_tests.bzl | 65 ++++++++++++++++++++++ 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl index 1315e71709..3651ad8d44 100644 --- a/python/private/pypi/pypi_cache.bzl +++ b/python/private/pypi/pypi_cache.bzl @@ -85,10 +85,10 @@ def _pypi_cache_get(self, key): return cached def _pypi_cache_get_facts(self): - if not self._fcache: + if not self._facts: return {} - return self._fcache.facts + return self._facts.facts def memory_cache(cache = None): """SimpleAPI cache for making fewer calls. diff --git a/tests/pypi/pypi_cache/pypi_cache_tests.bzl b/tests/pypi/pypi_cache/pypi_cache_tests.bzl index 9327326ffb..cc40c3b14a 100644 --- a/tests/pypi/pypi_cache/pypi_cache_tests.bzl +++ b/tests/pypi/pypi_cache/pypi_cache_tests.bzl @@ -31,6 +31,7 @@ def _cache(env, **kwargs): get = lambda *args, **kwargs: _expect( cache.get(*args, **kwargs), ), + get_facts = lambda: env.expect.that_dict(cache.get_facts()), ) def _test_memory_cache_hit(env): @@ -82,6 +83,70 @@ def _test_memory_cache_hit(env): _tests.append(_test_memory_cache_hit) +def _test_pypi_cache_writes_to_facts(env): + """Verifies that setting a value in the cache also populates the facts store.""" + store = {} + + # 1. Setup a mock module_ctx with an empty facts dict + # Your implementation looks for getattr(module_ctx, "facts", None) + mock_facts = {} + mock_ctx = struct(facts = mock_facts) + + cache = _cache(env, module_ctx = mock_ctx, store = store) + + fake_result = struct( + sdists = { + "sha_sdist": struct( + version = "1.0.0", + filename = "pkg-1.0.0.tar.gz", + url = "https://pypi.org/files/pkg-1.0.0.tar.gz", + yanked = "", + ), + }, + whls = { + "sha_whl": struct( + version = "1.0.0", + filename = "pkg-1.0.0-py3-none-any.whl", + url = "https://pypi.org/files/pkg-1.0.0-py3-none-any.whl", + yanked = "Security issue", + ), + }, + ) + + # Key format: (index_url, real_url, versions) + # The facts logic uses index_url to derive the root_url and distribution + index_url = "https://pypi.org/simple/pkg" + key = (index_url, "https://pypi.org/simple/pkg", ["1.0.0"]) + + # 2. When we set the cache + cache.setdefault(key, fake_result) + + # 3. Retrieve the internal facts dictionary + # Based on your _pypi_cache_get_facts implementation + facts = cache.get_facts() + + # 4. Assertions on the facts schema + facts.contains_exactly({ + "dist_hashes": { + "https://pypi.org/simple": { + "pkg": { + "https://pypi.org/files/pkg-1.0.0-py3-none-any.whl": "sha_whl", + "https://pypi.org/files/pkg-1.0.0.tar.gz": "sha_sdist", + }, + }, + }, + "dist_yanked": { + "https://pypi.org/simple": { + "pkg": { + "sha_whl": "Security issue", + }, + }, + }, + "fact_version": "v1", # Facts version + }) + +_tests.append(_test_pypi_cache_writes_to_facts) + def pypi_cache_test_suite(name): test_suite( name = name, From 6858b694c6b6c141980023d4d9ab750ba3184d8b Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 18:39:24 +0900 Subject: [PATCH 05/15] improve test --- tests/pypi/pypi_cache/pypi_cache_tests.bzl | 37 ++++++++++------------ 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/tests/pypi/pypi_cache/pypi_cache_tests.bzl b/tests/pypi/pypi_cache/pypi_cache_tests.bzl index cc40c3b14a..23265690bc 100644 --- a/tests/pypi/pypi_cache/pypi_cache_tests.bzl +++ b/tests/pypi/pypi_cache/pypi_cache_tests.bzl @@ -85,14 +85,8 @@ _tests.append(_test_memory_cache_hit) def _test_pypi_cache_writes_to_facts(env): """Verifies that setting a value in the cache also populates the facts store.""" - store = {} - - # 1. Setup a mock module_ctx with an empty facts dict - # Your implementation looks for getattr(module_ctx, "facts", None) - mock_facts = {} - mock_ctx = struct(facts = mock_facts) - - cache = _cache(env, module_ctx = mock_ctx, store = store) + mock_ctx = struct(facts = {}) + cache = _cache(env, module_ctx = mock_ctx) fake_result = struct( sdists = { @@ -111,24 +105,27 @@ def _test_pypi_cache_writes_to_facts(env): yanked = "Security issue", ), }, + sha256s_by_version = { + "1.0.0": ["sha_sdist", "sha_whl"], + }, ) - # Key format: (index_url, real_url, versions) - # The facts logic uses index_url to derive the root_url and distribution - index_url = "https://pypi.org/simple/pkg" - key = (index_url, "https://pypi.org/simple/pkg", ["1.0.0"]) + key = ("https://{PYPI_INDEX_URL}/pkg/", "https://pypi.org/simple/pkg/", ["1.0.0"]) - # 2. When we set the cache + # When we set the cache cache.setdefault(key, fake_result) - # 3. Retrieve the internal facts dictionary - # Based on your _pypi_cache_get_facts implementation - facts = cache.get_facts() + # Then the key returns us the same items + got = cache.get(key) + got.whls().contains_exactly(fake_result.whls) + got.sdists().contains_exactly(fake_result.sdists) + got.sha256s_by_version().contains_exactly(fake_result.sha256s_by_version) - # 4. Assertions on the facts schema - facts.contains_exactly({ + # Then when we get facts at the end + cache.get_facts().contains_exactly({ "dist_hashes": { - "https://pypi.org/simple": { + # We are not using the real index URL, because we may have credentials in here + "https://{PYPI_INDEX_URL}": { "pkg": { "https://pypi.org/files/pkg-1.0.0-py3-none-any.whl": "sha_whl", "https://pypi.org/files/pkg-1.0.0.tar.gz": "sha_sdist", @@ -136,7 +133,7 @@ def _test_pypi_cache_writes_to_facts(env): }, }, "dist_yanked": { - "https://pypi.org/simple": { + "https://{PYPI_INDEX_URL}": { "pkg": { "sha_whl": "Security issue", }, From 6be2e6e98321d3b92fcba582fbb0ef7ed1a5655c Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 18:44:01 +0900 Subject: [PATCH 06/15] improve the test more --- python/private/pypi/pypi_cache.bzl | 2 +- tests/pypi/pypi_cache/pypi_cache_tests.bzl | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl index 3651ad8d44..78676e917f 100644 --- a/python/private/pypi/pypi_cache.bzl +++ b/python/private/pypi/pypi_cache.bzl @@ -249,7 +249,7 @@ def _store_facts(facts, fact_version, index_url, value): facts.setdefault("dist_hashes", {}).setdefault(root_url, {}).setdefault(distribution, {}).setdefault(d.url, sha256) if not d.url.endswith(d.filename): facts.setdefault("dist_filenames", {}).setdefault(root_url, {}).setdefault(distribution, {}).setdefault(d.url, d.filename) - if d.yanked: + if d.yanked != None: facts.setdefault("dist_yanked", {}).setdefault(root_url, {}).setdefault(distribution, {}).setdefault(sha256, d.yanked) return value diff --git a/tests/pypi/pypi_cache/pypi_cache_tests.bzl b/tests/pypi/pypi_cache/pypi_cache_tests.bzl index 23265690bc..486ffb99d5 100644 --- a/tests/pypi/pypi_cache/pypi_cache_tests.bzl +++ b/tests/pypi/pypi_cache/pypi_cache_tests.bzl @@ -104,9 +104,17 @@ def _test_pypi_cache_writes_to_facts(env): url = "https://pypi.org/files/pkg-1.0.0-py3-none-any.whl", yanked = "Security issue", ), + # This won't get stored + "sha_whl_2": struct( + version = "1.1.0", + filename = "pkg-1.1.0-py3-none-any.whl", + url = "https://pypi.org/files/pkg-1.1.0-py3-none-any.whl", + yanked = None, + ), }, sha256s_by_version = { "1.0.0": ["sha_sdist", "sha_whl"], + "1.1.0": ["sha_whl_2"], }, ) @@ -117,9 +125,13 @@ def _test_pypi_cache_writes_to_facts(env): # Then the key returns us the same items got = cache.get(key) - got.whls().contains_exactly(fake_result.whls) + got.whls().contains_exactly({ + "sha_whl": fake_result.whls["sha_whl"], + }) got.sdists().contains_exactly(fake_result.sdists) - got.sha256s_by_version().contains_exactly(fake_result.sha256s_by_version) + got.sha256s_by_version().contains_exactly({ + "1.0.0": fake_result.sha256s_by_version["1.0.0"], + }) # Then when we get facts at the end cache.get_facts().contains_exactly({ @@ -135,6 +147,7 @@ def _test_pypi_cache_writes_to_facts(env): "dist_yanked": { "https://{PYPI_INDEX_URL}": { "pkg": { + "sha_sdist": "", "sha_whl": "Security issue", }, }, From 9f696d52bee4d0e475afed19b215d7475fccf46d Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 18:54:43 +0900 Subject: [PATCH 07/15] finish adding tests for reading the cache --- python/private/pypi/pypi_cache.bzl | 23 ++++++- tests/pypi/pypi_cache/pypi_cache_tests.bzl | 75 ++++++++++++++++++++++ 2 files changed, 95 insertions(+), 3 deletions(-) diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl index 78676e917f..1834d97d7b 100644 --- a/python/private/pypi/pypi_cache.bzl +++ b/python/private/pypi/pypi_cache.bzl @@ -140,7 +140,10 @@ def _filter_packages(dists, requested_versions): return struct( whls = whls, sdists = sdists, - sha256s_by_version = sha256s_by_version, + sha256s_by_version = { + k: sorted(v) + for k, v in sha256s_by_version.items() + }, ) def facts_cache(known_facts, facts_version = _FACT_VERSION): @@ -188,6 +191,8 @@ def _get_from_facts(facts, known_facts, index_url, requested_versions, facts_ver distribution = distribution.rstrip("/") root_url = root_url.rstrip("/") + retrieved_versions = {} + for url, sha256 in known_facts.get("dist_hashes", {}).get(root_url, {}).get(distribution, {}).items(): filename = known_facts.get("dist_filenames", {}).get(root_url, {}).get(distribution, {}).get(sha256) if not filename: @@ -200,6 +205,8 @@ def _get_from_facts(facts, known_facts, index_url, requested_versions, facts_ver # continue + retrieved_versions[version] = True + if filename.endswith(".whl"): dists = known_sources.setdefault("whls", {}) else: @@ -211,18 +218,28 @@ def _get_from_facts(facts, known_facts, index_url, requested_versions, facts_ver sha256 = sha256, filename = filename, version = version, + metadata_url = "", + metadata_sha256 = "", url = url, - yanked = known_facts.get("dist_yanked", {}).get(root_url, {}).get(distribution, {}).get(sha256, ""), + yanked = known_facts.get("dist_yanked", {}).get(root_url, {}).get(distribution, {}).get(sha256), )) if not known_sources: # We found nothing in facts return None + if len(requested_versions) != len(retrieved_versions): + # If the results are incomplete, then return None, so that we can fetch sources from the + # internet again. + return None + output = struct( whls = known_sources.get("whls", {}), sdists = known_sources.get("sdists", {}), - sha256s_by_version = known_sources.get("sha256s_by_version", {}), + sha256s_by_version = { + k: sorted(v) + for k, v in known_sources.get("sha256s_by_version", {}).items() + }, ) # Persist these facts for the next run because we have used them. diff --git a/tests/pypi/pypi_cache/pypi_cache_tests.bzl b/tests/pypi/pypi_cache/pypi_cache_tests.bzl index 486ffb99d5..82cdeb33f9 100644 --- a/tests/pypi/pypi_cache/pypi_cache_tests.bzl +++ b/tests/pypi/pypi_cache/pypi_cache_tests.bzl @@ -157,6 +157,81 @@ def _test_pypi_cache_writes_to_facts(env): _tests.append(_test_pypi_cache_writes_to_facts) +def _test_pypi_cache_reads_from_facts(env): + """Verifies that setting a value in the cache also populates the facts store.""" + mock_ctx = struct(facts = { + "dist_hashes": { + # We are not using the real index URL, because we may have credentials in here + "https://{PYPI_INDEX_URL}": { + "pkg": { + "https://pypi.org/files/pkg-1.0.0-py3-none-any.whl": "sha_whl", + "https://pypi.org/files/pkg-1.0.0.tar.gz": "sha_sdist", + }, + }, + }, + "dist_yanked": { + "https://{PYPI_INDEX_URL}": { + "pkg": { + "sha_sdist": "", + "sha_whl": "Security issue", + }, + }, + }, + "fact_version": "v1", # Facts version + }) + cache = _cache(env, module_ctx = mock_ctx) + + key = ("https://{PYPI_INDEX_URL}/pkg/", "https://pypi.org/simple/pkg/", ["1.0.0"]) + + # Then we would get empty facts because we haven't accessed any of the known facts. + # This simulates the dropping of the facts of requirements that are no longer needed. + cache.get_facts().contains_exactly({}) + + # When we get the + got = cache.get(key) + + expected_result = struct( + sdists = { + "sha_sdist": struct( + sha256 = "sha_sdist", + version = "1.0.0", + filename = "pkg-1.0.0.tar.gz", + metadata_url = "", + metadata_sha256 = "", + url = "https://pypi.org/files/pkg-1.0.0.tar.gz", + yanked = "", + ), + }, + whls = { + "sha_whl": struct( + sha256 = "sha_whl", + version = "1.0.0", + filename = "pkg-1.0.0-py3-none-any.whl", + url = "https://pypi.org/files/pkg-1.0.0-py3-none-any.whl", + metadata_url = "", + metadata_sha256 = "", + yanked = "Security issue", + ), + }, + sha256s_by_version = { + "1.0.0": ["sha_sdist", "sha_whl"], + }, + ) + + got.whls().contains_exactly(expected_result.whls) + got.sdists().contains_exactly(expected_result.sdists) + got.sha256s_by_version().contains_exactly(expected_result.sha256s_by_version) + + # Then when we store the same facts back again, because we accessed the cached keys. + cache.get_facts().contains_exactly(mock_ctx.facts) + + # When we request more than what we have, we will return nothing + key = ("https://{PYPI_INDEX_URL}/pkg/", "https://pypi.org/simple/pkg/", ["1.0.0", "1.1.0"]) + got = cache.get(key) + got.equals(None) + +_tests.append(_test_pypi_cache_reads_from_facts) + def pypi_cache_test_suite(name): test_suite( name = name, From b90cc5135e59ce73196a7d2464e4293056e4cf2c Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 18:56:31 +0900 Subject: [PATCH 08/15] add notes --- python/private/pypi/pypi_cache.bzl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl index 1834d97d7b..f366693813 100644 --- a/python/private/pypi/pypi_cache.bzl +++ b/python/private/pypi/pypi_cache.bzl @@ -60,6 +60,7 @@ def _pypi_cache_setdefault(self, key, parsed_result): if not versions or not self._facts: return parsed_result + # Filter the packages to only what is needed before writing to the facts cache filtered = _filter_packages(parsed_result, versions) return self._facts.setdefault(index_url, filtered) @@ -74,6 +75,10 @@ def _pypi_cache_get(self, key): The {type}`struct` or `None` based on if the result is in the cache or not. """ index_url, real_url, versions = key + + # When retrieving from memory cache, filter down to only what is needed. If the + # cache is empty, we will attempt to read from facts, however, reading from memory + # first allows us to not parse the contents of the lock file that may add up. cached = _filter_packages(self._mcache.get(real_url), versions) if not self._facts: return cached From 1155814e0c8b3dec1868a7a5c82824f9158bd3b5 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 19:03:30 +0900 Subject: [PATCH 09/15] minor fixup --- python/private/pypi/BUILD.bazel | 3 +++ python/private/pypi/extension.bzl | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index 34318d7920..6b4822333c 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -359,6 +359,9 @@ bzl_library( bzl_library( name = "pypi_cache_bzl", srcs = ["pypi_cache.bzl"], + deps = [ + ":version_from_filename_bzl", + ], ) bzl_library( diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl index 736ba0dba3..1dfdef32b3 100644 --- a/python/private/pypi/extension.bzl +++ b/python/private/pypi/extension.bzl @@ -401,7 +401,7 @@ def _pip_impl(module_ctx): return module_ctx.extension_metadata( reproducible = True, - facts = mods.facts or {}, + facts = mods.facts or None, ) _default_attrs = { From 0ee949756e643c8262d89be7d8ffc65be92ae8d2 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 19:05:28 +0900 Subject: [PATCH 10/15] doc: add changelog --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 189e46f3f8..8703c3bc1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -70,7 +70,10 @@ END_UNRELEASED_TEMPLATE {#v0-0-0-added} ### Added -* Nothing added. +* (pypi) Write SimpleAPI contents to the `MODULE.bazel.lock` file if using + {obj}`experimental_index_url` which should speed up consecutive initializations and should no + longer require the network access if the cache is hydrated. + Implements [#2731](https://github.com/bazel-contrib/rules_python/issues/2731). {#v1-9-0} ## [1.9.0] - 2026-02-21 From 210717d87d9ce5892b067bc936b1b5822e34bebb Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 19:11:27 +0900 Subject: [PATCH 11/15] fix tests --- .../simpleapi_download_tests.bzl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl index 391e352e08..298dbe728a 100644 --- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl +++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl @@ -23,11 +23,12 @@ _tests = [] def _test_simple(env): calls = [] - def read_simpleapi(ctx, url, attr, cache, get_auth, block): + def read_simpleapi(ctx, url, versions, attr, cache, get_auth, block): _ = ctx # buildifier: disable=unused-variable _ = attr _ = cache _ = get_auth + _ = versions env.expect.that_bool(block).equals(False) calls.append(url) if "foo" in url and "main" in url: @@ -54,7 +55,7 @@ def _test_simple(env): index_url_overrides = {}, index_url = "main", extra_index_urls = ["extra"], - sources = ["foo", "bar", "baz"], + sources = {"bar": None, "baz": None, "foo": None}, envsubst = [], ), cache = pypi_cache(), @@ -95,11 +96,12 @@ def _test_fail(env): calls = [] fails = [] - def read_simpleapi(ctx, url, attr, cache, get_auth, block): + def read_simpleapi(ctx, url, versions, attr, cache, get_auth, block): _ = ctx # buildifier: disable=unused-variable _ = attr _ = cache _ = get_auth + _ = versions env.expect.that_bool(block).equals(False) calls.append(url) if "foo" in url: @@ -133,7 +135,7 @@ def _test_fail(env): }, index_url = "main", extra_index_urls = ["extra"], - sources = ["foo", "bar", "baz"], + sources = {"bar": None, "baz": None, "foo": None}, envsubst = [], ), cache = pypi_cache(), @@ -186,7 +188,7 @@ def _test_download_url(env): index_url_overrides = {}, index_url = "https://example.com/main/simple/", extra_index_urls = [], - sources = ["foo", "bar", "baz"], + sources = {"bar": None, "baz": None, "foo": None}, envsubst = [], ), cache = pypi_cache(), @@ -222,7 +224,7 @@ def _test_download_url_parallel(env): index_url_overrides = {}, index_url = "https://example.com/main/simple/", extra_index_urls = [], - sources = ["foo", "bar", "baz"], + sources = {"bar": None, "baz": None, "foo": None}, envsubst = [], ), cache = pypi_cache(), @@ -258,7 +260,7 @@ def _test_download_envsubst_url(env): index_url_overrides = {}, index_url = "$INDEX_URL", extra_index_urls = [], - sources = ["foo", "bar", "baz"], + sources = {"bar": None, "baz": None, "foo": None}, envsubst = ["INDEX_URL"], ), cache = pypi_cache(), From 2de109680a0253478c2f7c06c6aed2cc904df6f5 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 19:13:10 +0900 Subject: [PATCH 12/15] fix tests 2 --- tests/pypi/hub_builder/hub_builder_tests.bzl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl index 27040d36d7..1a6c2efa9b 100644 --- a/tests/pypi/hub_builder/hub_builder_tests.bzl +++ b/tests/pypi/hub_builder/hub_builder_tests.bzl @@ -1052,7 +1052,12 @@ git_dep @ git+https://git.server/repo/project@deadbeefdeadbeef index_url = "pypi.org", index_url_overrides = {}, netrc = None, - sources = ["simple", "plat_pkg", "pip_fallback", "some_other_pkg"], + sources = { + "pip_fallback": ["0.0.1"], + "plat_pkg": ["0.0.4"], + "simple": ["0.0.1"], + "some_other_pkg": ["0.0.1"], + }, ), "cache": {}, "parallel_download": False, From 92ffba777d25b84f2c786109b7fc0141149fb7b3 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 19:15:18 +0900 Subject: [PATCH 13/15] fix tests 3 --- python/private/pypi/simpleapi_download.bzl | 2 +- tests/pypi/simpleapi_download/simpleapi_download_tests.bzl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index 6ef025d359..ff18887ec1 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -131,7 +131,7 @@ def simpleapi_download( contents[download.pkg_normalized] = _with_index_url(download.url, result.output) found_on_index[pkg] = index_url - failed_sources = [pkg for pkg in attr.sources if pkg not in found_on_index] + failed_sources = [pkg for pkg in input_sources if pkg not in found_on_index] if failed_sources: pkg_index_urls = { pkg: index_url_overrides.get( diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl index 298dbe728a..953df5c107 100644 --- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl +++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl @@ -148,13 +148,13 @@ def _test_fail(env): """ Failed to download metadata of the following packages from urls: { - "foo": "invalid", "bar": ["main", "extra"], + "foo": "invalid", } If you would like to skip downloading metadata for these packages please add 'simpleapi_skip=[ - "foo", "bar", + "foo", ]' to your 'pip.parse' call. """, ]) From 5b36c1b684c35dcf8804644d2a8c4907f27aa3d0 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 19:20:30 +0900 Subject: [PATCH 14/15] only pass facts if we have any --- python/private/pypi/extension.bzl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl index 1dfdef32b3..72336eba09 100644 --- a/python/private/pypi/extension.bzl +++ b/python/private/pypi/extension.bzl @@ -399,10 +399,13 @@ def _pip_impl(module_ctx): groups = mods.hub_group_map.get(hub_name), ) - return module_ctx.extension_metadata( - reproducible = True, - facts = mods.facts or None, - ) + if mods.facts: + return module_ctx.extension_metadata( + reproducible = True, + facts = mods.facts, + ) + else: + return module_ctx.extension_metadata(reproducible = True) _default_attrs = { "arch_name": attr.string( From f8fda7c4b25826410380eb44aef86be888dfcea5 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 19:21:07 +0900 Subject: [PATCH 15/15] only pass facts if we have any --- python/private/pypi/extension.bzl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl index 72336eba09..9d1e42e299 100644 --- a/python/private/pypi/extension.bzl +++ b/python/private/pypi/extension.bzl @@ -399,6 +399,8 @@ def _pip_impl(module_ctx): groups = mods.hub_group_map.get(hub_name), ) + # The code is smart to not return facts if we don't support the mechanism for that. + # Hence we should not pass it to the metadata if mods.facts: return module_ctx.extension_metadata( reproducible = True,