Skip to content

Commit 08d09cf

Browse files
authored
gh-112346: Always set OS byte to 255, simpler gzip.compress function. (GH-120486)
This matches the output behavior in 3.10 and earlier; the optimization in 3.11 allowed the zlib library's "os" value to be filled in instead in the circumstance when mtime was 0. this keeps things consistent.
1 parent 31d1d72 commit 08d09cf

File tree

4 files changed

+26
-34
lines changed

4 files changed

+26
-34
lines changed

Doc/library/gzip.rst

+5-3
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,7 @@ The module defines the following items:
188188

189189
Compress the *data*, returning a :class:`bytes` object containing
190190
the compressed data. *compresslevel* and *mtime* have the same meaning as in
191-
the :class:`GzipFile` constructor above. When *mtime* is set to ``0``, this
192-
function is equivalent to :func:`zlib.compress` with *wbits* set to ``31``.
193-
The zlib function is faster.
191+
the :class:`GzipFile` constructor above.
194192

195193
.. versionadded:: 3.2
196194
.. versionchanged:: 3.8
@@ -200,6 +198,10 @@ The module defines the following items:
200198
streamed fashion. Calls with *mtime* set to ``0`` are delegated to
201199
:func:`zlib.compress` for better speed.
202200

201+
.. versionchanged:: 3.13
202+
The gzip header OS byte is guaranteed to be set to 255 when this function
203+
is used as was the case in 3.10 and earlier.
204+
203205
.. function:: decompress(data)
204206

205207
Decompress the *data*, returning a :class:`bytes` object containing the

Lib/gzip.py

+8-30
Original file line numberDiff line numberDiff line change
@@ -580,43 +580,21 @@ def _rewind(self):
580580
self._new_member = True
581581

582582

583-
def _create_simple_gzip_header(compresslevel: int,
584-
mtime = None) -> bytes:
585-
"""
586-
Write a simple gzip header with no extra fields.
587-
:param compresslevel: Compresslevel used to determine the xfl bytes.
588-
:param mtime: The mtime (must support conversion to a 32-bit integer).
589-
:return: A bytes object representing the gzip header.
590-
"""
591-
if mtime is None:
592-
mtime = time.time()
593-
if compresslevel == _COMPRESS_LEVEL_BEST:
594-
xfl = 2
595-
elif compresslevel == _COMPRESS_LEVEL_FAST:
596-
xfl = 4
597-
else:
598-
xfl = 0
599-
# Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
600-
# fields added to header), mtime, xfl and os (255 for unknown OS).
601-
return struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255)
602-
603-
604583
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
605584
"""Compress data in one shot and return the compressed string.
606585
607586
compresslevel sets the compression level in range of 0-9.
608587
mtime can be used to set the modification time. The modification time is
609588
set to the current time by default.
610589
"""
611-
if mtime == 0:
612-
# Use zlib as it creates the header with 0 mtime by default.
613-
# This is faster and with less overhead.
614-
return zlib.compress(data, level=compresslevel, wbits=31)
615-
header = _create_simple_gzip_header(compresslevel, mtime)
616-
trailer = struct.pack("<LL", zlib.crc32(data), (len(data) & 0xffffffff))
617-
# Wbits=-15 creates a raw deflate block.
618-
return (header + zlib.compress(data, level=compresslevel, wbits=-15) +
619-
trailer)
590+
# Wbits=31 automatically includes a gzip header and trailer.
591+
gzip_data = zlib.compress(data, level=compresslevel, wbits=31)
592+
if mtime is None:
593+
mtime = time.time()
594+
# Reuse gzip header created by zlib, replace mtime and OS byte for
595+
# consistency.
596+
header = struct.pack("<4sLBB", gzip_data, int(mtime), gzip_data[8], 255)
597+
return header + gzip_data[10:]
620598

621599

622600
def decompress(data):

Lib/test/test_gzip.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -714,14 +714,24 @@ def test_compress_mtime(self):
714714
self.assertEqual(f.mtime, mtime)
715715

716716
def test_compress_correct_level(self):
717-
# gzip.compress calls with mtime == 0 take a different code path.
718717
for mtime in (0, 42):
719718
with self.subTest(mtime=mtime):
720719
nocompress = gzip.compress(data1, compresslevel=0, mtime=mtime)
721720
yescompress = gzip.compress(data1, compresslevel=1, mtime=mtime)
722721
self.assertIn(data1, nocompress)
723722
self.assertNotIn(data1, yescompress)
724723

724+
def test_issue112346(self):
725+
# The OS byte should be 255, this should not change between Python versions.
726+
for mtime in (0, 42):
727+
with self.subTest(mtime=mtime):
728+
compress = gzip.compress(data1, compresslevel=1, mtime=mtime)
729+
self.assertEqual(
730+
struct.unpack("<IxB", compress[4:10]),
731+
(mtime, 255),
732+
"Gzip header does not properly set either mtime or OS byte."
733+
)
734+
725735
def test_decompress(self):
726736
for data in (data1, data2):
727737
buf = io.BytesIO()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The OS byte in gzip headers is now always set to 255 when using
2+
:func:`gzip.compress`.

0 commit comments

Comments
 (0)