Skip to content

Commit 331549a

Browse files
committed
👷Implement build-time pip bundling @ ensurepip
Prior to this patch, Pip wheels were stored in the Git repository of CPython. Git is optimized for text but these artifacts are binary. So the unpleasant side effect of doing this is that the bare Git repository size is being increased by the zip archive side every time it is added, removed or modified. It's time to put a stop to this. The patch implements an `ensurepip.bundle` module that is meant to be called through `runpy` to download the Pip wheel and place it into the same location as before. It removes the wheel file from the Git repository and prevents re-adding it by defining a new `.gitignore` configuration file. The idea is that the builders of CPython are supposed to invoke the following command during the build time: ```console $ python -m ensurepip.bundle ``` This command will verify the existing wheel's SHA-256 hash and, if it does not match, or doesn't exist, it will proceed to download the artifact from PyPI. It will confirm its SHA-256 hash before placing it into the `Lib/ensurepip/_bundled/` directory. Every single line added or modified as a part of this change is also covered with tests. Every new module has 100% coverage. The only uncovered lines under `Lib/ensurepip/` are the ones that are absolutely unrelated to this effort. Resolves python#80789. Ref: https://bugs.python.org/issue36608.
1 parent d0160c7 commit 331549a

16 files changed

+665
-242
lines changed

.github/workflows/build.yml

+13-2
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ jobs:
231231
--prefix=/opt/python-dev \
232232
--with-openssl="$(brew --prefix [email protected])"
233233
- name: Build CPython
234-
run: make -j4
234+
run: make all-with-ensurepip-dists-bundled -j4
235235
- name: Display build info
236236
run: make pythoninfo
237237
- name: Tests
@@ -300,6 +300,11 @@ jobs:
300300
- name: Remount sources writable for tests
301301
# some tests write to srcdir, lack of pyc files slows down testing
302302
run: sudo mount $CPYTHON_RO_SRCDIR -oremount,rw
303+
- name: Bundle ensurepip dists
304+
env:
305+
SSL_CERT_DIR: /etc/ssl/certs
306+
run: make bundle-ensurepip-dists
307+
working-directory: ${{ env.CPYTHON_BUILDDIR }}
303308
- name: Tests
304309
working-directory: ${{ env.CPYTHON_BUILDDIR }}
305310
run: xvfb-run make buildbottest TESTOPTS="-j4 -uall,-cpu"
@@ -352,7 +357,9 @@ jobs:
352357
- name: Configure CPython
353358
run: ./configure --config-cache --with-pydebug --with-openssl=$OPENSSL_DIR
354359
- name: Build CPython
355-
run: make -j4
360+
env:
361+
SSL_CERT_DIR: /etc/ssl/certs
362+
run: make all-with-ensurepip-dists-bundled -j4
356363
- name: Display build info
357364
run: make pythoninfo
358365
- name: SSL tests
@@ -415,6 +422,10 @@ jobs:
415422
- name: Build CPython out-of-tree
416423
working-directory: ${{ env.CPYTHON_BUILDDIR }}
417424
run: make -j4
425+
- name: Bundle ensurepip dists
426+
env:
427+
SSL_CERT_DIR: /etc/ssl/certs
428+
run: make bundle-ensurepip-dists
418429
- name: Display build info
419430
working-directory: ${{ env.CPYTHON_BUILDDIR }}
420431
run: make pythoninfo

.github/workflows/reusable-docs.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ jobs:
100100
- name: 'Configure CPython'
101101
run: ./configure --with-pydebug
102102
- name: 'Build CPython'
103-
run: make -j4
103+
run: make all-with-ensurepip-dists-bundled -j4
104104
- name: 'Install build dependencies'
105105
run: make -C Doc/ PYTHON=../python venv
106106
# Use "xvfb-run" since some doctest tests open GUI windows

.github/workflows/verify-ensurepip-wheels.yml

-33
This file was deleted.

Lib/ensurepip/__init__.py

+20-88
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,16 @@
1-
import collections
1+
"""Bundled Pip installer."""
2+
23
import os
3-
import os.path
4+
import pathlib
5+
import shutil
46
import subprocess
57
import sys
6-
import sysconfig
78
import tempfile
8-
from importlib import resources
9-
109

11-
__all__ = ["version", "bootstrap"]
12-
_PACKAGE_NAMES = ('pip',)
13-
_PIP_VERSION = "23.2.1"
14-
_PROJECTS = [
15-
("pip", _PIP_VERSION, "py3"),
16-
]
10+
from ._wheelhouses import discover_ondisk_packages
1711

18-
# Packages bundled in ensurepip._bundled have wheel_name set.
19-
# Packages from WHEEL_PKG_DIR have wheel_path set.
20-
_Package = collections.namedtuple('Package',
21-
('version', 'wheel_name', 'wheel_path'))
2212

23-
# Directory of system wheel packages. Some Linux distribution packaging
24-
# policies recommend against bundling dependencies. For example, Fedora
25-
# installs wheel packages in the /usr/share/python-wheels/ directory and don't
26-
# install the ensurepip._bundled package.
27-
_WHEEL_PKG_DIR = sysconfig.get_config_var('WHEEL_PKG_DIR')
28-
29-
30-
def _find_packages(path):
31-
packages = {}
32-
try:
33-
filenames = os.listdir(path)
34-
except OSError:
35-
# Ignore: path doesn't exist or permission error
36-
filenames = ()
37-
# Make the code deterministic if a directory contains multiple wheel files
38-
# of the same package, but don't attempt to implement correct version
39-
# comparison since this case should not happen.
40-
filenames = sorted(filenames)
41-
for filename in filenames:
42-
# filename is like 'pip-21.2.4-py3-none-any.whl'
43-
if not filename.endswith(".whl"):
44-
continue
45-
for name in _PACKAGE_NAMES:
46-
prefix = name + '-'
47-
if filename.startswith(prefix):
48-
break
49-
else:
50-
continue
51-
52-
# Extract '21.2.4' from 'pip-21.2.4-py3-none-any.whl'
53-
version = filename.removeprefix(prefix).partition('-')[0]
54-
wheel_path = os.path.join(path, filename)
55-
packages[name] = _Package(version, None, wheel_path)
56-
return packages
57-
58-
59-
def _get_packages():
60-
global _PACKAGES, _WHEEL_PKG_DIR
61-
if _PACKAGES is not None:
62-
return _PACKAGES
63-
64-
packages = {}
65-
for name, version, py_tag in _PROJECTS:
66-
wheel_name = f"{name}-{version}-{py_tag}-none-any.whl"
67-
packages[name] = _Package(version, wheel_name, None)
68-
if _WHEEL_PKG_DIR:
69-
dir_packages = _find_packages(_WHEEL_PKG_DIR)
70-
# only used the wheel package directory if all packages are found there
71-
if all(name in dir_packages for name in _PACKAGE_NAMES):
72-
packages = dir_packages
73-
_PACKAGES = packages
74-
return packages
75-
_PACKAGES = None
13+
__all__ = ("version", "bootstrap")
7614

7715

7816
def _run_pip(args, additional_paths=None):
@@ -105,7 +43,7 @@ def version():
10543
"""
10644
Returns a string specifying the bundled version of pip.
10745
"""
108-
return _get_packages()['pip'].version
46+
return discover_ondisk_packages()['pip'].project_version
10947

11048

11149
def _disable_pip_configuration_settings():
@@ -164,27 +102,18 @@ def _bootstrap(*, root=None, upgrade=False, user=False,
164102
# omit pip
165103
os.environ["ENSUREPIP_OPTIONS"] = "install"
166104

105+
ondisk_dist_pkgs_map = discover_ondisk_packages()
167106
with tempfile.TemporaryDirectory() as tmpdir:
168107
# Put our bundled wheels into a temporary directory and construct the
169108
# additional paths that need added to sys.path
109+
tmpdir_path = pathlib.Path(tmpdir)
170110
additional_paths = []
171-
for name, package in _get_packages().items():
172-
if package.wheel_name:
173-
# Use bundled wheel package
174-
wheel_name = package.wheel_name
175-
wheel_path = resources.files("ensurepip") / "_bundled" / wheel_name
176-
whl = wheel_path.read_bytes()
177-
else:
178-
# Use the wheel package directory
179-
with open(package.wheel_path, "rb") as fp:
180-
whl = fp.read()
181-
wheel_name = os.path.basename(package.wheel_path)
182-
183-
filename = os.path.join(tmpdir, wheel_name)
184-
with open(filename, "wb") as fp:
185-
fp.write(whl)
186-
187-
additional_paths.append(filename)
111+
for package in ondisk_dist_pkgs_map.values():
112+
with package.as_pathlib_ctx() as bundled_wheel_path:
113+
tmp_wheel_path = tmpdir_path / bundled_wheel_path.name
114+
shutil.copy2(bundled_wheel_path, tmp_wheel_path)
115+
116+
additional_paths.append(str(tmp_wheel_path))
188117

189118
# Construct the arguments to be passed to the pip command
190119
args = ["install", "--no-cache-dir", "--no-index", "--find-links", tmpdir]
@@ -197,7 +126,9 @@ def _bootstrap(*, root=None, upgrade=False, user=False,
197126
if verbosity:
198127
args += ["-" + "v" * verbosity]
199128

200-
return _run_pip([*args, *_PACKAGE_NAMES], additional_paths)
129+
bundled_project_names = list(ondisk_dist_pkgs_map.keys())
130+
return _run_pip(args + bundled_project_names, additional_paths)
131+
201132

202133
def _uninstall_helper(*, verbosity=0):
203134
"""Helper to support a clean default uninstall process on Windows
@@ -227,7 +158,8 @@ def _uninstall_helper(*, verbosity=0):
227158
if verbosity:
228159
args += ["-" + "v" * verbosity]
229160

230-
return _run_pip([*args, *reversed(_PACKAGE_NAMES)])
161+
bundled_project_names = list(discover_ondisk_packages().keys())
162+
return _run_pip(args + bundled_project_names)
231163

232164

233165
def _main(argv=None):

Lib/ensurepip/_bundled/.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*
2+
!.gitignore
3+
!README.md

Lib/ensurepip/_bundled/README.md

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Upstream packaging
2+
3+
To populate this directory, the initial build packagers are supposed
4+
to invoke the following command:
5+
6+
```console
7+
$ python -m ensurepip.bundle
8+
```
9+
10+
It will download a pre-defined version of the Pip wheel. Its SHA-256
11+
hash is guaranteed to match the one on PyPI.
12+
13+
# Downstream packaging
14+
15+
Packagers of the downstream distributions are welcome to put an
16+
alternative wheel version in the directory defined by the
17+
`WHEEL_PKG_DIR` configuration setting. If this is done,
18+
19+
```console
20+
$ python -m ensurepip
21+
```
22+
23+
will prefer the replacement distribution package over the bundled one.
Binary file not shown.

Lib/ensurepip/_bundler.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""Build time dist downloading and bundling logic."""
2+
3+
from __future__ import annotations
4+
5+
import sys
6+
from contextlib import suppress
7+
from importlib.resources import as_file as _traversable_to_pathlib_ctx
8+
9+
from ._structs import BUNDLED_WHEELS_PATH, REMOTE_DIST_PKGS
10+
11+
12+
def ensure_wheels_are_downloaded(*, verbosity: bool = False) -> None:
13+
"""Download wheels into bundle if they are not there yet."""
14+
for pkg in REMOTE_DIST_PKGS:
15+
existing_whl_file_path = BUNDLED_WHEELS_PATH / pkg.wheel_file_name
16+
with suppress(FileNotFoundError):
17+
if pkg.matches(existing_whl_file_path.read_bytes()):
18+
if verbosity:
19+
print(
20+
f'A valid `{pkg.wheel_file_name}` is already '
21+
'present in cache. Skipping download.',
22+
file=sys.stderr,
23+
)
24+
continue
25+
26+
if verbosity:
27+
print(
28+
f'Downloading `{pkg.wheel_file_name}`...',
29+
file=sys.stderr,
30+
)
31+
downloaded_whl_contents = pkg.download_verified_wheel_contents()
32+
33+
if verbosity:
34+
print(
35+
f'Saving `{pkg.wheel_file_name}` to disk...',
36+
file=sys.stderr,
37+
)
38+
with _traversable_to_pathlib_ctx(BUNDLED_WHEELS_PATH) as bundled_dir:
39+
whl_file_path = bundled_dir / pkg.wheel_file_name
40+
whl_file_path.write_bytes(downloaded_whl_contents)

0 commit comments

Comments
 (0)