Skip to content

Commit 87a9cf1

Browse files
dizzy57aignas
andauthored
fix(py_wheel): produce deterministic wheel files (#1453)
Current implementation does not produce deterministic output because: - `ZipFile.writestr()` leaks current date and time - `ZipFile.write()` leaks the source file's mtime and mode bits (permissions) into the resulting zip archive. By manually creating our own `ZipInfo` objects we can explicitly set date and time fields to `Jan 1, 1980, 00:00` (minimum value allowed by the zip file standard), and ensure that other file attributes are uniform across all entries in a zip file. --------- Co-authored-by: Ignas Anikevicius <[email protected]>
1 parent fde5fc1 commit 87a9cf1

File tree

3 files changed

+89
-11
lines changed

3 files changed

+89
-11
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ A brief description of the categories of changes:
105105

106106
* (gazelle) Improve runfiles lookup hermeticity.
107107

108+
* (py_wheel) Produce deterministic wheel files
109+
108110
## [0.25.0] - 2023-08-22
109111

110112
### Changed

examples/wheel/wheel_test.py

+58
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import hashlib
1516
import os
1617
import platform
1718
import subprocess
@@ -43,9 +44,29 @@ def _get_path(self, filename):
4344
else:
4445
return path
4546

47+
def assertFileSha256Equal(self, filename, sha):
48+
hash = hashlib.sha256()
49+
with open(filename, "rb") as f:
50+
while True:
51+
buf = f.read(2**20)
52+
if not buf:
53+
break
54+
hash.update(buf)
55+
self.assertEqual(hash.hexdigest(), sha)
56+
57+
def assertAllEntriesHasReproducibleMetadata(self, zf):
58+
for zinfo in zf.infolist():
59+
self.assertEqual(zinfo.date_time, (1980, 1, 1, 0, 0, 0), msg=zinfo.filename)
60+
self.assertEqual(zinfo.create_system, 3, msg=zinfo.filename)
61+
self.assertEqual(zinfo.external_attr, 0o777 << 16, msg=zinfo.filename)
62+
self.assertEqual(
63+
zinfo.compress_type, zipfile.ZIP_DEFLATED, msg=zinfo.filename
64+
)
65+
4666
def test_py_library_wheel(self):
4767
filename = self._get_path("example_minimal_library-0.0.1-py3-none-any.whl")
4868
with zipfile.ZipFile(filename) as zf:
69+
self.assertAllEntriesHasReproducibleMetadata(zf)
4970
self.assertEqual(
5071
zf.namelist(),
5172
[
@@ -56,12 +77,16 @@ def test_py_library_wheel(self):
5677
"example_minimal_library-0.0.1.dist-info/RECORD",
5778
],
5879
)
80+
self.assertFileSha256Equal(
81+
filename, "6da8e06a3fdd9ae5ee9fa8f796610723c05a4b0d7fde0ec5179401e956204139"
82+
)
5983

6084
def test_py_package_wheel(self):
6185
filename = self._get_path(
6286
"example_minimal_package-0.0.1-py3-none-any.whl",
6387
)
6488
with zipfile.ZipFile(filename) as zf:
89+
self.assertAllEntriesHasReproducibleMetadata(zf)
6590
self.assertEqual(
6691
zf.namelist(),
6792
[
@@ -74,12 +99,16 @@ def test_py_package_wheel(self):
7499
"example_minimal_package-0.0.1.dist-info/RECORD",
75100
],
76101
)
102+
self.assertFileSha256Equal(
103+
filename, "2948b0b5e0aa421e0b40f78b74018bbc2f218165f211da0a4609e431e8e52bee"
104+
)
77105

78106
def test_customized_wheel(self):
79107
filename = self._get_path(
80108
"example_customized-0.0.1-py3-none-any.whl",
81109
)
82110
with zipfile.ZipFile(filename) as zf:
111+
self.assertAllEntriesHasReproducibleMetadata(zf)
83112
self.assertEqual(
84113
zf.namelist(),
85114
[
@@ -159,12 +188,16 @@ def test_customized_wheel(self):
159188
first = first.main:f
160189
second = second.main:s""",
161190
)
191+
self.assertFileSha256Equal(
192+
filename, "66f0c1bfe2cedb2f4cf08d4fe955096860186c0a2f3524e0cb02387a55ac3e63"
193+
)
162194

163195
def test_legacy_filename_escaping(self):
164196
filename = self._get_path(
165197
"file_name_escaping-0.0.1_r7-py3-none-any.whl",
166198
)
167199
with zipfile.ZipFile(filename) as zf:
200+
self.assertAllEntriesHasReproducibleMetadata(zf)
168201
self.assertEquals(
169202
zf.namelist(),
170203
[
@@ -193,6 +226,9 @@ def test_legacy_filename_escaping(self):
193226
UNKNOWN
194227
""",
195228
)
229+
self.assertFileSha256Equal(
230+
filename, "593c6ab58627f2446d0f1ef2956fd6d42104eedce4493c72d462f7ebf8cb74fa"
231+
)
196232

197233
def test_filename_escaping(self):
198234
filename = self._get_path(
@@ -234,6 +270,7 @@ def test_custom_package_root_wheel(self):
234270
)
235271

236272
with zipfile.ZipFile(filename) as zf:
273+
self.assertAllEntriesHasReproducibleMetadata(zf)
237274
self.assertEqual(
238275
zf.namelist(),
239276
[
@@ -255,13 +292,17 @@ def test_custom_package_root_wheel(self):
255292
# Ensure RECORD files do not have leading forward slashes
256293
for line in record_contents.splitlines():
257294
self.assertFalse(line.startswith("/"))
295+
self.assertFileSha256Equal(
296+
filename, "1b1fa3a4e840211084ef80049d07947b845c99bedb2778496d30e0c1524686ac"
297+
)
258298

259299
def test_custom_package_root_multi_prefix_wheel(self):
260300
filename = self._get_path(
261301
"example_custom_package_root_multi_prefix-0.0.1-py3-none-any.whl",
262302
)
263303

264304
with zipfile.ZipFile(filename) as zf:
305+
self.assertAllEntriesHasReproducibleMetadata(zf)
265306
self.assertEqual(
266307
zf.namelist(),
267308
[
@@ -282,13 +323,17 @@ def test_custom_package_root_multi_prefix_wheel(self):
282323
# Ensure RECORD files do not have leading forward slashes
283324
for line in record_contents.splitlines():
284325
self.assertFalse(line.startswith("/"))
326+
self.assertFileSha256Equal(
327+
filename, "f0422d7a338de3c76bf2525927fd93c0f47f2e9c60ecc0944e3e32b642c28137"
328+
)
285329

286330
def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
287331
filename = self._get_path(
288332
"example_custom_package_root_multi_prefix_reverse_order-0.0.1-py3-none-any.whl",
289333
)
290334

291335
with zipfile.ZipFile(filename) as zf:
336+
self.assertAllEntriesHasReproducibleMetadata(zf)
292337
self.assertEqual(
293338
zf.namelist(),
294339
[
@@ -309,12 +354,16 @@ def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
309354
# Ensure RECORD files do not have leading forward slashes
310355
for line in record_contents.splitlines():
311356
self.assertFalse(line.startswith("/"))
357+
self.assertFileSha256Equal(
358+
filename, "4f9e8c917b4050f121ac81e9a2bb65723ef09a1b90b35d93792ac3a62a60efa3"
359+
)
312360

313361
def test_python_requires_wheel(self):
314362
filename = self._get_path(
315363
"example_python_requires_in_a_package-0.0.1-py3-none-any.whl",
316364
)
317365
with zipfile.ZipFile(filename) as zf:
366+
self.assertAllEntriesHasReproducibleMetadata(zf)
318367
metadata_contents = zf.read(
319368
"example_python_requires_in_a_package-0.0.1.dist-info/METADATA"
320369
)
@@ -330,6 +379,9 @@ def test_python_requires_wheel(self):
330379
UNKNOWN
331380
""",
332381
)
382+
self.assertFileSha256Equal(
383+
filename, "9bfe8197d379f88715458a75e45c1f521a8b9d3cc43fe19b407c4ab207228b7c"
384+
)
333385

334386
def test_python_abi3_binary_wheel(self):
335387
arch = "amd64"
@@ -346,6 +398,7 @@ def test_python_abi3_binary_wheel(self):
346398
f"example_python_abi3_binary_wheel-0.0.1-cp38-abi3-{os_string}_{arch}.whl",
347399
)
348400
with zipfile.ZipFile(filename) as zf:
401+
self.assertAllEntriesHasReproducibleMetadata(zf)
349402
metadata_contents = zf.read(
350403
"example_python_abi3_binary_wheel-0.0.1.dist-info/METADATA"
351404
)
@@ -380,6 +433,7 @@ def test_rule_creates_directory_and_is_included_in_wheel(self):
380433
)
381434

382435
with zipfile.ZipFile(filename) as zf:
436+
self.assertAllEntriesHasReproducibleMetadata(zf)
383437
self.assertEqual(
384438
zf.namelist(),
385439
[
@@ -390,13 +444,17 @@ def test_rule_creates_directory_and_is_included_in_wheel(self):
390444
"use_rule_with_dir_in_outs-0.0.1.dist-info/RECORD",
391445
],
392446
)
447+
self.assertFileSha256Equal(
448+
filename, "8ad5f639cc41ac6ac67eb70f6553a7fdecabaf3a1b952c3134eaea59610c2a64"
449+
)
393450

394451
def test_rule_expands_workspace_status_keys_in_wheel_metadata(self):
395452
filename = self._get_path(
396453
"example_minimal_library_BUILD_USER_-0.1._BUILD_TIMESTAMP_-py3-none-any.whl"
397454
)
398455

399456
with zipfile.ZipFile(filename) as zf:
457+
self.assertAllEntriesHasReproducibleMetadata(zf)
400458
metadata_file = None
401459
for f in zf.namelist():
402460
self.assertNotIn("_BUILD_TIMESTAMP_", f)

tools/wheelmaker.py

+29-11
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,15 @@
1414

1515
import argparse
1616
import base64
17-
import collections
1817
import hashlib
1918
import os
2019
import re
2120
import sys
2221
import zipfile
2322
from pathlib import Path
2423

24+
_ZIP_EPOCH = (1980, 1, 1, 0, 0, 0)
25+
2526

2627
def commonpath(path1, path2):
2728
ret = []
@@ -189,7 +190,8 @@ def add_string(self, filename, contents):
189190
"""Add given 'contents' as filename to the distribution."""
190191
if sys.version_info[0] > 2 and isinstance(contents, str):
191192
contents = contents.encode("utf-8", "surrogateescape")
192-
self._zipfile.writestr(filename, contents)
193+
zinfo = self._zipinfo(filename)
194+
self._zipfile.writestr(zinfo, contents)
193195
hash = hashlib.sha256()
194196
hash.update(contents)
195197
self._add_to_record(filename, self._serialize_digest(hash), len(contents))
@@ -219,20 +221,36 @@ def arcname_from(name):
219221
return
220222

221223
arcname = arcname_from(package_filename)
224+
zinfo = self._zipinfo(arcname)
222225

223-
self._zipfile.write(real_filename, arcname=arcname)
224-
# Find the hash and length
226+
# Write file to the zip archive while computing the hash and length
225227
hash = hashlib.sha256()
226228
size = 0
227-
with open(real_filename, "rb") as f:
228-
while True:
229-
block = f.read(2**20)
230-
if not block:
231-
break
232-
hash.update(block)
233-
size += len(block)
229+
with open(real_filename, "rb") as fsrc:
230+
with self._zipfile.open(zinfo, "w") as fdst:
231+
while True:
232+
block = fsrc.read(2**20)
233+
if not block:
234+
break
235+
fdst.write(block)
236+
hash.update(block)
237+
size += len(block)
234238
self._add_to_record(arcname, self._serialize_digest(hash), size)
235239

240+
def _zipinfo(self, filename):
241+
"""Construct deterministic ZipInfo entry for a file named filename"""
242+
# Strip leading path separators to mirror ZipInfo.from_file behavior
243+
separators = os.path.sep
244+
if os.path.altsep is not None:
245+
separators += os.path.altsep
246+
arcname = filename.lstrip(separators)
247+
248+
zinfo = zipfile.ZipInfo(filename=arcname, date_time=_ZIP_EPOCH)
249+
zinfo.create_system = 3 # ZipInfo entry created on a unix-y system
250+
zinfo.external_attr = 0o777 << 16 # permissions: rwxrwxrwx
251+
zinfo.compress_type = self._zipfile.compression
252+
return zinfo
253+
236254
def add_wheelfile(self):
237255
"""Write WHEEL file to the distribution"""
238256
# TODO(pstradomski): Support non-purelib wheels.

0 commit comments

Comments
 (0)