Skip to content

Commit 965dd51

Browse files
Yanpei-Wangaignas
andauthored
feat(pypi/parse_requirements): get dists by version when no hash provied (#2695)
This pull request modifies the SimpleAPI HTML parsing to add a new field where we can get the `sha256` values by package version. This allows us to very easily fallback to all packages of a particular version when using `experimental_index_url` if the hashes are not specified. The code deciding which packages to query the SimpleAPI for has been also modified to only omit queries for packages that are included via direct URL references. If we fail to get the data from the SimpleAPI, we will fallback to `pip` and try to install it via the legacy behaviour. Fixes #2023 Work towards #260 Work towards #1357 Work towards #2363 --------- Co-authored-by: Ignas Anikevicius <[email protected]>
1 parent ca91cea commit 965dd51

13 files changed

+331
-70
lines changed

CHANGELOG.md

+8
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,14 @@ Unreleased changes template.
8181

8282
{#v0-0-0-added}
8383
### Added
84+
* (pypi) From now on `sha256` values in the `requirements.txt` is no longer
85+
mandatory when enabling {attr}`pip.parse.experimental_index_url` feature.
86+
This means that `rules_python` will attempt to fetch metadata for all
87+
packages through SimpleAPI unless they are pulled through direct URL
88+
references. Fixes [#2023](https://github.com/bazel-contrib/rules_python/issues/2023).
89+
In case you see issues with `rules_python` being too eager to fetch the SimpleAPI
90+
metadata, you can use the newly added {attr}`pip.parse.experimental_skip_sources`
91+
to skip metadata fetching for those packages.
8492
* (uv) A {obj}`lock` rule that is the replacement for the
8593
{obj}`compile_pip_requirements`. This may still have rough corners
8694
so please report issues with it in the

docs/pypi-dependencies.md

+7-5
Original file line numberDiff line numberDiff line change
@@ -386,11 +386,13 @@ This does not mean that `rules_python` is fetching the wheels eagerly, but it
386386
rather means that it is calling the PyPI server to get the Simple API response
387387
to get the list of all available source and wheel distributions. Once it has
388388
got all of the available distributions, it will select the right ones depending
389-
on the `sha256` values in your `requirements_lock.txt` file. The compatible
390-
distribution URLs will be then written to the `MODULE.bazel.lock` file. Currently
391-
users wishing to use the lock file with `rules_python` with this feature have
392-
to set an environment variable `RULES_PYTHON_OS_ARCH_LOCK_FILE=0` which will
393-
become default in the next release.
389+
on the `sha256` values in your `requirements_lock.txt` file. If `sha256` hashes
390+
are not present in the requirements file, we will fallback to matching by version
391+
specified in the lock file. The compatible distribution URLs will be then
392+
written to the `MODULE.bazel.lock` file. Currently users wishing to use the
393+
lock file with `rules_python` with this feature have to set an environment
394+
variable `RULES_PYTHON_OS_ARCH_LOCK_FILE=0` which will become default in the
395+
next release.
394396

395397
Fetching the distribution information from the PyPI allows `rules_python` to
396398
know which `whl` should be used on which target platform and it will determine

python/private/pypi/extension.bzl

+26-1
Original file line numberDiff line numberDiff line change
@@ -459,13 +459,21 @@ You cannot use both the additive_build_content and additive_build_content_file a
459459
get_index_urls = None
460460
if pip_attr.experimental_index_url:
461461
is_reproducible = False
462+
skip_sources = [
463+
normalize_name(s)
464+
for s in pip_attr.simpleapi_skip
465+
]
462466
get_index_urls = lambda ctx, distributions: simpleapi_download(
463467
ctx,
464468
attr = struct(
465469
index_url = pip_attr.experimental_index_url,
466470
extra_index_urls = pip_attr.experimental_extra_index_urls or [],
467471
index_url_overrides = pip_attr.experimental_index_url_overrides or {},
468-
sources = distributions,
472+
sources = [
473+
d
474+
for d in distributions
475+
if normalize_name(d) not in skip_sources
476+
],
469477
envsubst = pip_attr.envsubst,
470478
# Auth related info
471479
netrc = pip_attr.netrc,
@@ -682,6 +690,11 @@ This is equivalent to `--index-url` `pip` option.
682690
If {attr}`download_only` is set, then `sdist` archives will be discarded and `pip.parse` will
683691
operate in wheel-only mode.
684692
:::
693+
694+
:::{versionchanged} VERSION_NEXT_FEATURE
695+
Index metadata will be used to deduct `sha256` values for packages even if the
696+
`sha256` values are not present in the requirements.txt lock file.
697+
:::
685698
""",
686699
),
687700
"experimental_index_url_overrides": attr.string_dict(
@@ -749,6 +762,18 @@ The Python version the dependencies are targetting, in Major.Minor format
749762
If an interpreter isn't explicitly provided (using `python_interpreter` or
750763
`python_interpreter_target`), then the version specified here must have
751764
a corresponding `python.toolchain()` configured.
765+
""",
766+
),
767+
"simpleapi_skip": attr.string_list(
768+
doc = """\
769+
The list of packages to skip fetching metadata for from SimpleAPI index. You should
770+
normally not need this attribute, but in case you do, please report this as a bug
771+
to `rules_python` and use this attribute until the bug is fixed.
772+
773+
EXPERIMENTAL: this may be removed without notice.
774+
775+
:::{versionadded} VERSION_NEXT_FEATURE
776+
:::
752777
""",
753778
),
754779
"whl_modifications": attr.label_keyed_string_dict(

python/private/pypi/parse_requirements.bzl

+10-5
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def parse_requirements(
184184
req.distribution: None
185185
for reqs in requirements_by_platform.values()
186186
for req in reqs.values()
187-
if req.srcs.shas
187+
if not req.srcs.url
188188
}),
189189
)
190190

@@ -315,10 +315,15 @@ def _add_dists(*, requirement, index_urls, logger = None):
315315
whls = []
316316
sdist = None
317317

318-
# TODO @aignas 2024-05-22: it is in theory possible to add all
319-
# requirements by version instead of by sha256. This may be useful
320-
# for some projects.
321-
for sha256 in requirement.srcs.shas:
318+
# First try to find distributions by SHA256 if provided
319+
shas_to_use = requirement.srcs.shas
320+
if not shas_to_use:
321+
version = requirement.srcs.version
322+
shas_to_use = index_urls.sha256s_by_version.get(version, [])
323+
if logger:
324+
logger.warn(lambda: "requirement file has been generated without hashes, will use all hashes for the given version {} that could find on the index:\n {}".format(version, shas_to_use))
325+
326+
for sha256 in shas_to_use:
322327
# For now if the artifact is marked as yanked we just ignore it.
323328
#
324329
# See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api

python/private/pypi/parse_simpleapi_html.bzl

+33-2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def parse_simpleapi_html(*, url, content):
2626
Returns:
2727
A list of structs with:
2828
* filename: The filename of the artifact.
29+
* version: The version of the artifact.
2930
* url: The URL to download the artifact.
3031
* sha256: The sha256 of the artifact.
3132
* metadata_sha256: The whl METADATA sha256 if we can download it. If this is
@@ -51,15 +52,20 @@ def parse_simpleapi_html(*, url, content):
5152

5253
# Each line follows the following pattern
5354
# <a href="https://...#sha256=..." attribute1="foo" ... attributeN="bar">filename</a><br />
55+
sha256_by_version = {}
5456
for line in lines[1:]:
5557
dist_url, _, tail = line.partition("#sha256=")
58+
dist_url = _absolute_url(url, dist_url)
59+
5660
sha256, _, tail = tail.partition("\"")
5761

5862
# See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api
5963
yanked = "data-yanked" in line
6064

6165
head, _, _ = tail.rpartition("</a>")
6266
maybe_metadata, _, filename = head.rpartition(">")
67+
version = _version(filename)
68+
sha256_by_version.setdefault(version, []).append(sha256)
6369

6470
metadata_sha256 = ""
6571
metadata_url = ""
@@ -75,7 +81,8 @@ def parse_simpleapi_html(*, url, content):
7581
if filename.endswith(".whl"):
7682
whls[sha256] = struct(
7783
filename = filename,
78-
url = _absolute_url(url, dist_url),
84+
version = version,
85+
url = dist_url,
7986
sha256 = sha256,
8087
metadata_sha256 = metadata_sha256,
8188
metadata_url = _absolute_url(url, metadata_url) if metadata_url else "",
@@ -84,7 +91,8 @@ def parse_simpleapi_html(*, url, content):
8491
else:
8592
sdists[sha256] = struct(
8693
filename = filename,
87-
url = _absolute_url(url, dist_url),
94+
version = version,
95+
url = dist_url,
8896
sha256 = sha256,
8997
metadata_sha256 = "",
9098
metadata_url = "",
@@ -94,8 +102,31 @@ def parse_simpleapi_html(*, url, content):
94102
return struct(
95103
sdists = sdists,
96104
whls = whls,
105+
sha256_by_version = sha256_by_version,
97106
)
98107

108+
_SDIST_EXTS = [
109+
".tar", # handles any compression
110+
".zip",
111+
]
112+
113+
def _version(filename):
114+
# See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format
115+
116+
_, _, tail = filename.partition("-")
117+
version, _, _ = tail.partition("-")
118+
if version != tail:
119+
# The format is {name}-{version}-{whl_specifiers}.whl
120+
return version
121+
122+
# NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path
123+
124+
# {name}-{version}.{ext}
125+
for ext in _SDIST_EXTS:
126+
version, _, _ = version.partition(ext) # build or name
127+
128+
return version
129+
99130
def _get_root_directory(url):
100131
scheme_end = url.find("://")
101132
if scheme_end == -1:

python/private/pypi/simpleapi_download.bzl

+11-4
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,17 @@ def simpleapi_download(
127127

128128
failed_sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
129129
if failed_sources:
130-
_fail("Failed to download metadata for {} for from urls: {}".format(
131-
failed_sources,
132-
index_urls,
133-
))
130+
_fail(
131+
"\n".join([
132+
"Failed to download metadata for {} for from urls: {}.".format(
133+
failed_sources,
134+
index_urls,
135+
),
136+
"If you would like to skip downloading metadata for these packages please add 'simpleapi_skip={}' to your 'pip.parse' call.".format(
137+
render.list(failed_sources),
138+
),
139+
]),
140+
)
134141
return None
135142

136143
if warn_overrides:

python/private/pypi/whl_library.bzl

+6
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,12 @@ def _whl_library_impl(rctx):
270270
sha256 = rctx.attr.sha256,
271271
auth = get_auth(rctx, urls),
272272
)
273+
if not rctx.attr.sha256:
274+
# this is only seen when there is a direct URL reference without sha256
275+
logger.warn("Please update the requirement line to include the hash:\n{} \\\n --hash=sha256:{}".format(
276+
rctx.attr.requirement,
277+
result.sha256,
278+
))
273279

274280
if not result.success:
275281
fail("could not download the '{}' from {}:\n{}".format(filename, urls, result))

python/private/pypi/whl_repo_name.bzl

+14-3
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,19 @@ def whl_repo_name(filename, sha256):
3232

3333
if not filename.endswith(".whl"):
3434
# Then the filename is basically foo-3.2.1.<ext>
35-
parts.append(normalize_name(filename.rpartition("-")[0]))
36-
parts.append("sdist")
35+
name, _, tail = filename.rpartition("-")
36+
parts.append(normalize_name(name))
37+
if sha256:
38+
parts.append("sdist")
39+
version = ""
40+
else:
41+
for ext in [".tar", ".zip"]:
42+
tail, _, _ = tail.partition(ext)
43+
version = tail.replace(".", "_").replace("!", "_")
3744
else:
3845
parsed = parse_whl_name(filename)
3946
name = normalize_name(parsed.distribution)
47+
version = parsed.version.replace(".", "_").replace("!", "_")
4048
python_tag, _, _ = parsed.python_tag.partition(".")
4149
abi_tag, _, _ = parsed.abi_tag.partition(".")
4250
platform_tag, _, _ = parsed.platform_tag.partition(".")
@@ -46,7 +54,10 @@ def whl_repo_name(filename, sha256):
4654
parts.append(abi_tag)
4755
parts.append(platform_tag)
4856

49-
parts.append(sha256[:8])
57+
if sha256:
58+
parts.append(sha256[:8])
59+
elif version:
60+
parts.insert(1, version)
5061

5162
return "_".join(parts)
5263

0 commit comments

Comments
 (0)