Skip to content

Commit eef839b

Browse files
WillMorrisonaignas
andauthored
fix: Avoid creating URLs with empty path segments from index URLs in environment variables (#2557)
This change updates `_read_simpleapi` such that it correctly handles the case where the index URL is specified in an environment variable and contains a trailing slash. The URL construction would have introduced an empty path segment, which is now removed. Fixes: #2554 --------- Co-authored-by: Ignas Anikevicius <[email protected]>
1 parent c0bd668 commit eef839b

File tree

3 files changed

+153
-7
lines changed

3 files changed

+153
-7
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ Unreleased changes template.
5757
{#v0-0-0-fixed}
5858
### Fixed
5959
* (gazelle) Providing multiple input requirements files to `gazelle_python_manifest` now works correctly.
60+
* (pypi) Handle trailing slashes in pip index URLs in environment variables,
61+
fixes [#2554](https://github.com/bazelbuild/rules_python/issues/2554).
6062

6163
{#v0-0-0-added}
6264
### Added

python/private/pypi/simpleapi_download.bzl

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ A file that houses private functions used in the `bzlmod` extension with the sam
1717
"""
1818

1919
load("@bazel_features//:features.bzl", "bazel_features")
20-
load("//python/private:auth.bzl", "get_auth")
20+
load("//python/private:auth.bzl", _get_auth = "get_auth")
2121
load("//python/private:envsubst.bzl", "envsubst")
2222
load("//python/private:normalize_name.bzl", "normalize_name")
2323
load("//python/private:text_util.bzl", "render")
@@ -30,6 +30,7 @@ def simpleapi_download(
3030
cache,
3131
parallel_download = True,
3232
read_simpleapi = None,
33+
get_auth = None,
3334
_fail = fail):
3435
"""Download Simple API HTML.
3536
@@ -59,6 +60,7 @@ def simpleapi_download(
5960
parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads.
6061
read_simpleapi: a function for reading and parsing of the SimpleAPI contents.
6162
Used in tests.
63+
get_auth: A function to get auth information passed to read_simpleapi. Used in tests.
6264
_fail: a function to print a failure. Used in tests.
6365
6466
Returns:
@@ -98,6 +100,7 @@ def simpleapi_download(
98100
),
99101
attr = attr,
100102
cache = cache,
103+
get_auth = get_auth,
101104
**download_kwargs
102105
)
103106
if hasattr(result, "wait"):
@@ -144,7 +147,7 @@ def simpleapi_download(
144147

145148
return contents
146149

147-
def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
150+
def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
148151
"""Read SimpleAPI.
149152
150153
Args:
@@ -157,6 +160,7 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
157160
* auth_patterns: The auth_patterns parameter for ctx.download, see
158161
http_file for docs.
159162
cache: A dict for storing the results.
163+
get_auth: A function to get auth information. Used in tests.
160164
**download_kwargs: Any extra params to ctx.download.
161165
Note that output and auth will be passed for you.
162166
@@ -169,11 +173,11 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
169173
# them to ctx.download if we want to correctly handle the relative URLs.
170174
# TODO: Add a test that env subbed index urls do not leak into the lock file.
171175

172-
real_url = envsubst(
176+
real_url = strip_empty_path_segments(envsubst(
173177
url,
174178
attr.envsubst,
175179
ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get,
176-
)
180+
))
177181

178182
cache_key = real_url
179183
if cache_key in cache:
@@ -194,6 +198,8 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
194198

195199
output = ctx.path(output_str.strip("_").lower() + ".html")
196200

201+
get_auth = get_auth or _get_auth
202+
197203
# NOTE: this may have block = True or block = False in the download_kwargs
198204
download = ctx.download(
199205
url = [real_url],
@@ -211,6 +217,27 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
211217

212218
return _read_index_result(ctx, download, output, real_url, cache, cache_key)
213219

220+
def strip_empty_path_segments(url):
221+
"""Removes empty path segments from a URL. Does nothing for urls with no scheme.
222+
223+
Public only for testing.
224+
225+
Args:
226+
url: The url to remove empty path segments from
227+
228+
Returns:
229+
The url with empty path segments removed and any trailing slash preserved.
230+
If the url had no scheme it is returned unchanged.
231+
"""
232+
scheme, _, rest = url.partition("://")
233+
if rest == "":
234+
return url
235+
stripped = "/".join([p for p in rest.split("/") if p])
236+
if url.endswith("/"):
237+
return "{}://{}/".format(scheme, stripped)
238+
else:
239+
return "{}://{}".format(scheme, stripped)
240+
214241
def _read_index_result(ctx, result, output, url, cache, cache_key):
215242
if not result.success:
216243
return struct(success = False)

tests/pypi/simpleapi_download/simpleapi_download_tests.bzl

Lines changed: 120 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,18 @@
1515
""
1616

1717
load("@rules_testing//lib:test_suite.bzl", "test_suite")
18-
load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download") # buildifier: disable=bzl-visibility
18+
load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments") # buildifier: disable=bzl-visibility
1919

2020
_tests = []
2121

2222
def _test_simple(env):
2323
calls = []
2424

25-
def read_simpleapi(ctx, url, attr, cache, block):
25+
def read_simpleapi(ctx, url, attr, cache, get_auth, block):
2626
_ = ctx # buildifier: disable=unused-variable
2727
_ = attr
2828
_ = cache
29+
_ = get_auth
2930
env.expect.that_bool(block).equals(False)
3031
calls.append(url)
3132
if "foo" in url and "main" in url:
@@ -73,10 +74,11 @@ def _test_fail(env):
7374
calls = []
7475
fails = []
7576

76-
def read_simpleapi(ctx, url, attr, cache, block):
77+
def read_simpleapi(ctx, url, attr, cache, get_auth, block):
7778
_ = ctx # buildifier: disable=unused-variable
7879
_ = attr
7980
_ = cache
81+
_ = get_auth
8082
env.expect.that_bool(block).equals(False)
8183
calls.append(url)
8284
if "foo" in url:
@@ -119,6 +121,121 @@ def _test_fail(env):
119121

120122
_tests.append(_test_fail)
121123

124+
def _test_download_url(env):
125+
downloads = {}
126+
127+
def download(url, output, **kwargs):
128+
_ = kwargs # buildifier: disable=unused-variable
129+
downloads[url[0]] = output
130+
return struct(success = True)
131+
132+
simpleapi_download(
133+
ctx = struct(
134+
os = struct(environ = {}),
135+
download = download,
136+
read = lambda i: "contents of " + i,
137+
path = lambda i: "path/for/" + i,
138+
),
139+
attr = struct(
140+
index_url_overrides = {},
141+
index_url = "https://example.com/main/simple/",
142+
extra_index_urls = [],
143+
sources = ["foo", "bar", "baz"],
144+
envsubst = [],
145+
),
146+
cache = {},
147+
parallel_download = False,
148+
get_auth = lambda ctx, urls, ctx_attr: struct(),
149+
)
150+
151+
env.expect.that_dict(downloads).contains_exactly({
152+
"https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
153+
"https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
154+
"https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
155+
})
156+
157+
_tests.append(_test_download_url)
158+
159+
def _test_download_url_parallel(env):
160+
downloads = {}
161+
162+
def download(url, output, **kwargs):
163+
_ = kwargs # buildifier: disable=unused-variable
164+
downloads[url[0]] = output
165+
return struct(wait = lambda: struct(success = True))
166+
167+
simpleapi_download(
168+
ctx = struct(
169+
os = struct(environ = {}),
170+
download = download,
171+
read = lambda i: "contents of " + i,
172+
path = lambda i: "path/for/" + i,
173+
),
174+
attr = struct(
175+
index_url_overrides = {},
176+
index_url = "https://example.com/main/simple/",
177+
extra_index_urls = [],
178+
sources = ["foo", "bar", "baz"],
179+
envsubst = [],
180+
),
181+
cache = {},
182+
parallel_download = True,
183+
get_auth = lambda ctx, urls, ctx_attr: struct(),
184+
)
185+
186+
env.expect.that_dict(downloads).contains_exactly({
187+
"https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
188+
"https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
189+
"https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
190+
})
191+
192+
_tests.append(_test_download_url_parallel)
193+
194+
def _test_download_envsubst_url(env):
195+
downloads = {}
196+
197+
def download(url, output, **kwargs):
198+
_ = kwargs # buildifier: disable=unused-variable
199+
downloads[url[0]] = output
200+
return struct(success = True)
201+
202+
simpleapi_download(
203+
ctx = struct(
204+
os = struct(environ = {"INDEX_URL": "https://example.com/main/simple/"}),
205+
download = download,
206+
read = lambda i: "contents of " + i,
207+
path = lambda i: "path/for/" + i,
208+
),
209+
attr = struct(
210+
index_url_overrides = {},
211+
index_url = "$INDEX_URL",
212+
extra_index_urls = [],
213+
sources = ["foo", "bar", "baz"],
214+
envsubst = ["INDEX_URL"],
215+
),
216+
cache = {},
217+
parallel_download = False,
218+
get_auth = lambda ctx, urls, ctx_attr: struct(),
219+
)
220+
221+
env.expect.that_dict(downloads).contains_exactly({
222+
"https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html",
223+
"https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html",
224+
"https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html",
225+
})
226+
227+
_tests.append(_test_download_envsubst_url)
228+
229+
def _test_strip_empty_path_segments(env):
230+
env.expect.that_str(strip_empty_path_segments("no/scheme//is/unchanged")).equals("no/scheme//is/unchanged")
231+
env.expect.that_str(strip_empty_path_segments("scheme://with/no/empty/segments")).equals("scheme://with/no/empty/segments")
232+
env.expect.that_str(strip_empty_path_segments("scheme://with//empty/segments")).equals("scheme://with/empty/segments")
233+
env.expect.that_str(strip_empty_path_segments("scheme://with///multiple//empty/segments")).equals("scheme://with/multiple/empty/segments")
234+
env.expect.that_str(strip_empty_path_segments("scheme://with//trailing/slash/")).equals("scheme://with/trailing/slash/")
235+
env.expect.that_str(strip_empty_path_segments("scheme://with/trailing/slashes///")).equals("scheme://with/trailing/slashes/")
236+
237+
_tests.append(_test_strip_empty_path_segments)
238+
122239
def simpleapi_download_test_suite(name):
123240
"""Create the test suite.
124241

0 commit comments

Comments
 (0)