Skip to content

Commit bb56a77

Browse files
aignasewianda
authored andcommitted
refactor(pypi): translate wheel METADATA parsing to starlark (bazel-contrib#2629)
This PR starts using the newly introduced (bazel-contrib#2692) PEP508 compliant requirement marker parser in starlark and moves the dependency generation from the Python language (`whl_installer`) to the Starlark in the `whl_library` repository rule. This PR is (almost) a pure refactor where no bugs are fixed, but this is foundational work that also adds notes on how things will be moved to macros (i.e. analysis phase) so that we can fix a few long standing bugs and prepare for stabilizing the `experimental_index_url` (bazel-contrib#260). Refactor: * I have migrated all of the unit tests from Python to starlark for deps generation from METADATA `Requires-Dist` fields. * Read the `METADATA` file itself in Starlark. Work towards bazel-contrib#260, bazel-contrib#2319, bazel-contrib#2241 Fixes bazel-contrib#2423
1 parent aa0d16c commit bb56a77

21 files changed

+1099
-1231
lines changed

Diff for: python/private/pypi/BUILD.bazel

+19
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,18 @@ bzl_library(
221221
],
222222
)
223223

224+
bzl_library(
225+
name = "pep508_deps_bzl",
226+
srcs = ["pep508_deps.bzl"],
227+
deps = [
228+
":pep508_env_bzl",
229+
":pep508_evaluate_bzl",
230+
":pep508_platform_bzl",
231+
":pep508_requirement_bzl",
232+
"//python/private:normalize_name_bzl",
233+
],
234+
)
235+
224236
bzl_library(
225237
name = "pep508_env_bzl",
226238
srcs = ["pep508_env.bzl"],
@@ -368,7 +380,9 @@ bzl_library(
368380
":generate_whl_library_build_bazel_bzl",
369381
":parse_whl_name_bzl",
370382
":patch_whl_bzl",
383+
":pep508_deps_bzl",
371384
":pypi_repo_utils_bzl",
385+
":whl_metadata_bzl",
372386
":whl_target_platforms_bzl",
373387
"//python/private:auth_bzl",
374388
"//python/private:envsubst_bzl",
@@ -377,6 +391,11 @@ bzl_library(
377391
],
378392
)
379393

394+
bzl_library(
395+
name = "whl_metadata_bzl",
396+
srcs = ["whl_metadata.bzl"],
397+
)
398+
380399
bzl_library(
381400
name = "whl_repo_name_bzl",
382401
srcs = ["whl_repo_name.bzl"],

Diff for: python/private/pypi/pep508_deps.bzl

+351
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,351 @@
1+
# Copyright 2025 The Bazel Authors. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""This module is for implementing PEP508 compliant METADATA deps parsing.
16+
"""
17+
18+
load("//python/private:normalize_name.bzl", "normalize_name")
19+
load(":pep508_env.bzl", "env")
20+
load(":pep508_evaluate.bzl", "evaluate")
21+
load(":pep508_platform.bzl", "platform", "platform_from_str")
22+
load(":pep508_requirement.bzl", "requirement")
23+
24+
_ALL_OS_VALUES = [
25+
"windows",
26+
"osx",
27+
"linux",
28+
]
29+
_ALL_ARCH_VALUES = [
30+
"aarch64",
31+
"ppc64",
32+
"ppc64le",
33+
"s390x",
34+
"x86_32",
35+
"x86_64",
36+
]
37+
38+
def deps(name, *, requires_dist, platforms = [], extras = [], host_python_version = None):
39+
"""Parse the RequiresDist from wheel METADATA
40+
41+
Args:
42+
name: {type}`str` the name of the wheel.
43+
requires_dist: {type}`list[str]` the list of RequiresDist lines from the
44+
METADATA file.
45+
extras: {type}`list[str]` the requested extras to generate targets for.
46+
platforms: {type}`list[str]` the list of target platform strings.
47+
host_python_version: {type}`str` the host python version.
48+
49+
Returns:
50+
A struct with attributes:
51+
* deps: {type}`list[str]` dependencies to include unconditionally.
52+
* deps_select: {type}`dict[str, list[str]]` dependencies to include on particular
53+
subset of target platforms.
54+
"""
55+
reqs = sorted(
56+
[requirement(r) for r in requires_dist],
57+
key = lambda x: "{}:{}:".format(x.name, sorted(x.extras), x.marker),
58+
)
59+
deps = {}
60+
deps_select = {}
61+
name = normalize_name(name)
62+
want_extras = _resolve_extras(name, reqs, extras)
63+
64+
# drop self edges
65+
reqs = [r for r in reqs if r.name != name]
66+
67+
platforms = [
68+
platform_from_str(p, python_version = host_python_version)
69+
for p in platforms
70+
] or [
71+
platform_from_str("", python_version = host_python_version),
72+
]
73+
74+
abis = sorted({p.abi: True for p in platforms if p.abi})
75+
if host_python_version and len(abis) > 1:
76+
_, _, minor_version = host_python_version.partition(".")
77+
minor_version, _, _ = minor_version.partition(".")
78+
default_abi = "cp3" + minor_version
79+
elif len(abis) > 1:
80+
fail(
81+
"all python versions need to be specified explicitly, got: {}".format(platforms),
82+
)
83+
else:
84+
default_abi = None
85+
86+
for req in reqs:
87+
_add_req(
88+
deps,
89+
deps_select,
90+
req,
91+
extras = want_extras,
92+
platforms = platforms,
93+
default_abi = default_abi,
94+
)
95+
96+
return struct(
97+
deps = sorted(deps),
98+
deps_select = {
99+
_platform_str(p): sorted(deps)
100+
for p, deps in deps_select.items()
101+
},
102+
)
103+
104+
def _platform_str(self):
105+
if self.abi == None:
106+
if not self.os and not self.arch:
107+
return "//conditions:default"
108+
elif not self.arch:
109+
return "@platforms//os:{}".format(self.os)
110+
else:
111+
return "{}_{}".format(self.os, self.arch)
112+
113+
minor_version = self.abi[3:]
114+
if self.arch == None and self.os == None:
115+
return str(Label("//python/config_settings:is_python_3.{}".format(minor_version)))
116+
117+
return "cp3{}_{}_{}".format(
118+
minor_version,
119+
self.os or "anyos",
120+
self.arch or "anyarch",
121+
)
122+
123+
def _platform_specializations(self, cpu_values = _ALL_ARCH_VALUES, os_values = _ALL_OS_VALUES):
124+
"""Return the platform itself and all its unambiguous specializations.
125+
126+
For more info about specializations see
127+
https://bazel.build/docs/configurable-attributes
128+
"""
129+
specializations = []
130+
specializations.append(self)
131+
if self.arch == None:
132+
specializations.extend([
133+
platform(os = self.os, arch = arch, abi = self.abi)
134+
for arch in cpu_values
135+
])
136+
if self.os == None:
137+
specializations.extend([
138+
platform(os = os, arch = self.arch, abi = self.abi)
139+
for os in os_values
140+
])
141+
if self.os == None and self.arch == None:
142+
specializations.extend([
143+
platform(os = os, arch = arch, abi = self.abi)
144+
for os in os_values
145+
for arch in cpu_values
146+
])
147+
return specializations
148+
149+
def _add(deps, deps_select, dep, platform):
150+
dep = normalize_name(dep)
151+
152+
if platform == None:
153+
deps[dep] = True
154+
155+
# If the dep is in the platform-specific list, remove it from the select.
156+
pop_keys = []
157+
for p, _deps in deps_select.items():
158+
if dep not in _deps:
159+
continue
160+
161+
_deps.pop(dep)
162+
if not _deps:
163+
pop_keys.append(p)
164+
165+
for p in pop_keys:
166+
deps_select.pop(p)
167+
return
168+
169+
if dep in deps:
170+
# If the dep is already in the main dependency list, no need to add it in the
171+
# platform-specific dependency list.
172+
return
173+
174+
# Add the platform-specific branch
175+
deps_select.setdefault(platform, {})
176+
177+
# Add the dep to specializations of the given platform if they
178+
# exist in the select statement.
179+
for p in _platform_specializations(platform):
180+
if p not in deps_select:
181+
continue
182+
183+
deps_select[p][dep] = True
184+
185+
if len(deps_select[platform]) == 1:
186+
# We are adding a new item to the select and we need to ensure that
187+
# existing dependencies from less specialized platforms are propagated
188+
# to the newly added dependency set.
189+
for p, _deps in deps_select.items():
190+
# Check if the existing platform overlaps with the given platform
191+
if p == platform or platform not in _platform_specializations(p):
192+
continue
193+
194+
deps_select[platform].update(_deps)
195+
196+
def _maybe_add_common_dep(deps, deps_select, platforms, dep):
197+
abis = sorted({p.abi: True for p in platforms if p.abi})
198+
if len(abis) < 2:
199+
return
200+
201+
platforms = [platform()] + [
202+
platform(abi = abi)
203+
for abi in abis
204+
]
205+
206+
# If the dep is targeting all target python versions, lets add it to
207+
# the common dependency list to simplify the select statements.
208+
for p in platforms:
209+
if p not in deps_select:
210+
return
211+
212+
if dep not in deps_select[p]:
213+
return
214+
215+
# All of the python version-specific branches have the dep, so lets add
216+
# it to the common deps.
217+
deps[dep] = True
218+
for p in platforms:
219+
deps_select[p].pop(dep)
220+
if not deps_select[p]:
221+
deps_select.pop(p)
222+
223+
def _resolve_extras(self_name, reqs, extras):
224+
"""Resolve extras which are due to depending on self[some_other_extra].
225+
226+
Some packages may have cyclic dependencies resulting from extras being used, one example is
227+
`etils`, where we have one set of extras as aliases for other extras
228+
and we have an extra called 'all' that includes all other extras.
229+
230+
Example: github.com/google/etils/blob/a0b71032095db14acf6b33516bca6d885fe09e35/pyproject.toml#L32.
231+
232+
When the `requirements.txt` is generated by `pip-tools`, then it is likely that
233+
this step is not needed, but for other `requirements.txt` files this may be useful.
234+
235+
NOTE @aignas 2023-12-08: the extra resolution is not platform dependent,
236+
but in order for it to become platform dependent we would have to have
237+
separate targets for each extra in extras.
238+
"""
239+
240+
# Resolve any extra extras due to self-edges, empty string means no
241+
# extras The empty string in the set is just a way to make the handling
242+
# of no extras and a single extra easier and having a set of {"", "foo"}
243+
# is equivalent to having {"foo"}.
244+
extras = extras or [""]
245+
246+
self_reqs = []
247+
for req in reqs:
248+
if req.name != self_name:
249+
continue
250+
251+
if req.marker == None:
252+
# I am pretty sure we cannot reach this code as it does not
253+
# make sense to specify packages in this way, but since it is
254+
# easy to handle, lets do it.
255+
#
256+
# TODO @aignas 2023-12-08: add a test
257+
extras = extras + req.extras
258+
else:
259+
# process these in a separate loop
260+
self_reqs.append(req)
261+
262+
# A double loop is not strictly optimal, but always correct without recursion
263+
for req in self_reqs:
264+
if [True for extra in extras if evaluate(req.marker, env = {"extra": extra})]:
265+
extras = extras + req.extras
266+
else:
267+
continue
268+
269+
# Iterate through all packages to ensure that we include all of the extras from previously
270+
# visited packages.
271+
for req_ in self_reqs:
272+
if [True for extra in extras if evaluate(req.marker, env = {"extra": extra})]:
273+
extras = extras + req_.extras
274+
275+
# Poor mans set
276+
return sorted({x: None for x in extras})
277+
278+
def _add_req(deps, deps_select, req, *, extras, platforms, default_abi = None):
279+
if not req.marker:
280+
_add(deps, deps_select, req.name, None)
281+
return
282+
283+
# NOTE @aignas 2023-12-08: in order to have reasonable select statements
284+
# we do have to have some parsing of the markers, so it begs the question
285+
# if packaging should be reimplemented in Starlark to have the best solution
286+
# for now we will implement it in Python and see what the best parsing result
287+
# can be before making this decision.
288+
match_os = len([
289+
tag
290+
for tag in [
291+
"os_name",
292+
"sys_platform",
293+
"platform_system",
294+
]
295+
if tag in req.marker
296+
]) > 0
297+
match_arch = "platform_machine" in req.marker
298+
match_version = "version" in req.marker
299+
300+
if not (match_os or match_arch or match_version):
301+
if [
302+
True
303+
for extra in extras
304+
for p in platforms
305+
if evaluate(
306+
req.marker,
307+
env = env(
308+
target_platform = p,
309+
extra = extra,
310+
),
311+
)
312+
]:
313+
_add(deps, deps_select, req.name, None)
314+
return
315+
316+
for plat in platforms:
317+
if not [
318+
True
319+
for extra in extras
320+
if evaluate(
321+
req.marker,
322+
env = env(
323+
target_platform = plat,
324+
extra = extra,
325+
),
326+
)
327+
]:
328+
continue
329+
330+
if match_arch and default_abi:
331+
_add(deps, deps_select, req.name, plat)
332+
if plat.abi == default_abi:
333+
_add(deps, deps_select, req.name, platform(os = plat.os, arch = plat.arch))
334+
elif match_arch:
335+
_add(deps, deps_select, req.name, platform(os = plat.os, arch = plat.arch))
336+
elif match_os and default_abi:
337+
_add(deps, deps_select, req.name, platform(os = plat.os, abi = plat.abi))
338+
if plat.abi == default_abi:
339+
_add(deps, deps_select, req.name, platform(os = plat.os))
340+
elif match_os:
341+
_add(deps, deps_select, req.name, platform(os = plat.os))
342+
elif match_version and default_abi:
343+
_add(deps, deps_select, req.name, platform(abi = plat.abi))
344+
if plat.abi == default_abi:
345+
_add(deps, deps_select, req.name, platform())
346+
elif match_version:
347+
_add(deps, deps_select, req.name, None)
348+
else:
349+
fail("BUG: {} support is not implemented".format(req.marker))
350+
351+
_maybe_add_common_dep(deps, deps_select, platforms, req.name)

0 commit comments

Comments
 (0)