Skip to content

Commit 75ce78d

Browse files
committed
Add max_<type>_len option to unpacker. (fixes #97).
Fix build error on 32bit environment (fixes #102).
1 parent c43fb48 commit 75ce78d

File tree

4 files changed

+246
-47
lines changed

4 files changed

+246
-47
lines changed

msgpack/_unpacker.pyx

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ cdef extern from "unpack.h":
2828
PyObject* ext_hook
2929
char *encoding
3030
char *unicode_errors
31+
Py_ssize_t max_str_len
32+
Py_ssize_t max_bin_len
33+
Py_ssize_t max_array_len
34+
Py_ssize_t max_map_len
35+
Py_ssize_t max_ext_len
3136

3237
ctypedef struct unpack_context:
3338
msgpack_user user
@@ -46,10 +51,18 @@ cdef extern from "unpack.h":
4651
cdef inline init_ctx(unpack_context *ctx,
4752
object object_hook, object object_pairs_hook,
4853
object list_hook, object ext_hook,
49-
bint use_list, char* encoding, char* unicode_errors):
54+
bint use_list, char* encoding, char* unicode_errors,
55+
Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
56+
Py_ssize_t max_array_len, Py_ssize_t max_map_len,
57+
Py_ssize_t max_ext_len):
5058
unpack_init(ctx)
5159
ctx.user.use_list = use_list
5260
ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL
61+
ctx.user.max_str_len = max_str_len
62+
ctx.user.max_bin_len = max_bin_len
63+
ctx.user.max_array_len = max_array_len
64+
ctx.user.max_map_len = max_map_len
65+
ctx.user.max_ext_len = max_ext_len
5366

5467
if object_hook is not None and object_pairs_hook is not None:
5568
raise TypeError("object_pairs_hook and object_hook are mutually exclusive.")
@@ -85,7 +98,12 @@ def default_read_extended_type(typecode, data):
8598

8699
def unpackb(object packed, object object_hook=None, object list_hook=None,
87100
bint use_list=1, encoding=None, unicode_errors="strict",
88-
object_pairs_hook=None, ext_hook=ExtType):
101+
object_pairs_hook=None, ext_hook=ExtType,
102+
Py_ssize_t max_str_len=2147483647, # 2**32-1
103+
Py_ssize_t max_bin_len=2147483647,
104+
Py_ssize_t max_array_len=2147483647,
105+
Py_ssize_t max_map_len=2147483647,
106+
Py_ssize_t max_ext_len=2147483647):
89107
"""
90108
Unpack packed_bytes to object. Returns an unpacked object.
91109
@@ -115,7 +133,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
115133
cerr = PyBytes_AsString(unicode_errors)
116134

117135
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
118-
use_list, cenc, cerr)
136+
use_list, cenc, cerr,
137+
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
119138
ret = unpack_construct(&ctx, buf, buf_len, &off)
120139
if ret == 1:
121140
obj = unpack_data(&ctx)
@@ -144,8 +163,7 @@ def unpack(object stream, object object_hook=None, object list_hook=None,
144163

145164

146165
cdef class Unpacker(object):
147-
"""
148-
Streaming unpacker.
166+
"""Streaming unpacker.
149167
150168
arguments:
151169
@@ -183,6 +201,19 @@ cdef class Unpacker(object):
183201
Raises `BufferFull` exception when it is insufficient.
184202
You shoud set this parameter when unpacking data from untrasted source.
185203
204+
:param int max_str_len:
205+
Limits max length of str. (default: 2**31-1)
206+
207+
:param int max_bin_len:
208+
Limits max length of bin. (default: 2**31-1)
209+
210+
:param int max_array_len:
211+
Limits max length of array. (default: 2**31-1)
212+
213+
:param int max_map_len:
214+
Limits max length of map. (default: 2**31-1)
215+
216+
186217
example of streaming deserialize from file-like object::
187218
188219
unpacker = Unpacker(file_like)
@@ -221,7 +252,12 @@ cdef class Unpacker(object):
221252
def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
222253
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
223254
str encoding=None, str unicode_errors='strict', int max_buffer_size=0,
224-
object ext_hook=ExtType):
255+
object ext_hook=ExtType,
256+
Py_ssize_t max_str_len=2147483647, # 2**32-1
257+
Py_ssize_t max_bin_len=2147483647,
258+
Py_ssize_t max_array_len=2147483647,
259+
Py_ssize_t max_map_len=2147483647,
260+
Py_ssize_t max_ext_len=2147483647):
225261
cdef char *cenc=NULL,
226262
cdef char *cerr=NULL
227263

@@ -265,7 +301,9 @@ cdef class Unpacker(object):
265301
cerr = PyBytes_AsString(self.unicode_errors)
266302

267303
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
268-
ext_hook, use_list, cenc, cerr)
304+
ext_hook, use_list, cenc, cerr,
305+
max_str_len, max_bin_len, max_array_len,
306+
max_map_len, max_ext_len)
269307

270308
def feed(self, object next_bytes):
271309
"""Append `next_bytes` to internal buffer."""
@@ -365,7 +403,7 @@ cdef class Unpacker(object):
365403
raise ValueError("Unpack failed: error = %d" % (ret,))
366404

367405
def read_bytes(self, Py_ssize_t nbytes):
368-
"""read a specified number of raw bytes from the stream"""
406+
"""Read a specified number of raw bytes from the stream"""
369407
cdef size_t nread
370408
nread = min(self.buf_tail - self.buf_head, nbytes)
371409
ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread)
@@ -375,8 +413,7 @@ cdef class Unpacker(object):
375413
return ret
376414

377415
def unpack(self, object write_bytes=None):
378-
"""
379-
unpack one object
416+
"""Unpack one object
380417
381418
If write_bytes is not None, it will be called with parts of the raw
382419
message as it is unpacked.
@@ -386,8 +423,7 @@ cdef class Unpacker(object):
386423
return self._unpack(unpack_construct, write_bytes)
387424

388425
def skip(self, object write_bytes=None):
389-
"""
390-
read and ignore one object, returning None
426+
"""Read and ignore one object, returning None
391427
392428
If write_bytes is not None, it will be called with parts of the raw
393429
message as it is unpacked.

msgpack/fallback.py

Lines changed: 98 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -102,62 +102,84 @@ def unpackb(packed, **kwargs):
102102

103103

104104
class Unpacker(object):
105-
"""
106-
Streaming unpacker.
105+
"""Streaming unpacker.
106+
107+
arguments:
107108
108-
`file_like` is a file-like object having a `.read(n)` method.
109-
When `Unpacker` is initialized with a `file_like`, `.feed()` is not
110-
usable.
109+
:param file_like:
110+
File-like object having `.read(n)` method.
111+
If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable.
111112
112-
`read_size` is used for `file_like.read(read_size)`.
113+
:param int read_size:
114+
Used as `file_like.read(read_size)`. (default: `min(1024**2, max_buffer_size)`)
113115
114-
If `use_list` is True (default), msgpack lists are deserialized to Python
115-
lists. Otherwise they are deserialized to tuples.
116+
:param bool use_list:
117+
If true, unpack msgpack array to Python list.
118+
Otherwise, unpack to Python tuple. (default: True)
116119
117-
`object_hook` is the same as in simplejson. If it is not None, it should
118-
be callable and Unpacker calls it with a dict argument after deserializing
119-
a map.
120+
:param callable object_hook:
121+
When specified, it should be callable.
122+
Unpacker calls it with a dict argument after unpacking msgpack map.
123+
(See also simplejson)
120124
121-
`object_pairs_hook` is the same as in simplejson. If it is not None, it
122-
should be callable and Unpacker calls it with a list of key-value pairs
123-
after deserializing a map.
125+
:param callable object_pairs_hook:
126+
When specified, it should be callable.
127+
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
128+
(See also simplejson)
124129
125-
`ext_hook` is callback for ext (User defined) type. It called with two
126-
arguments: (code, bytes). default: `msgpack.ExtType`
130+
:param str encoding:
131+
Encoding used for decoding msgpack raw.
132+
If it is None (default), msgpack raw is deserialized to Python bytes.
127133
128-
`encoding` is the encoding used for decoding msgpack bytes. If it is
129-
None (default), msgpack bytes are deserialized to Python bytes.
134+
:param str unicode_errors:
135+
Used for decoding msgpack raw with *encoding*.
136+
(default: `'strict'`)
130137
131-
`unicode_errors` is used for decoding bytes.
138+
:param int max_buffer_size:
139+
Limits size of data waiting unpacked. 0 means system's INT_MAX (default).
140+
Raises `BufferFull` exception when it is insufficient.
141+
You shoud set this parameter when unpacking data from untrasted source.
132142
133-
`max_buffer_size` limits the buffer size. 0 means INT_MAX (default).
143+
:param int max_str_len:
144+
Limits max length of str. (default: 2**31-1)
134145
135-
Raises `BufferFull` exception when it is unsufficient.
146+
:param int max_bin_len:
147+
Limits max length of bin. (default: 2**31-1)
136148
137-
You should set this parameter when unpacking data from an untrustred source.
149+
:param int max_array_len:
150+
Limits max length of array. (default: 2**31-1)
138151
139-
example of streaming deserialization from file-like object::
152+
:param int max_map_len:
153+
Limits max length of map. (default: 2**31-1)
154+
155+
156+
example of streaming deserialize from file-like object::
140157
141158
unpacker = Unpacker(file_like)
142159
for o in unpacker:
143-
do_something(o)
160+
process(o)
144161
145-
example of streaming deserialization from socket::
162+
example of streaming deserialize from socket::
146163
147164
unpacker = Unpacker()
148-
while 1:
149-
buf = sock.recv(1024*2)
165+
while True:
166+
buf = sock.recv(1024**2)
150167
if not buf:
151168
break
152169
unpacker.feed(buf)
153170
for o in unpacker:
154-
do_something(o)
171+
process(o)
155172
"""
156173

157174
def __init__(self, file_like=None, read_size=0, use_list=True,
158175
object_hook=None, object_pairs_hook=None, list_hook=None,
159176
encoding=None, unicode_errors='strict', max_buffer_size=0,
160-
ext_hook=ExtType):
177+
ext_hook=ExtType,
178+
max_str_len=2147483647, # 2**32-1
179+
max_bin_len=2147483647,
180+
max_array_len=2147483647,
181+
max_map_len=2147483647,
182+
max_ext_len=2147483647):
161183
if file_like is None:
162184
self._fb_feeding = True
163185
else:
@@ -185,6 +207,11 @@ def __init__(self, file_like=None, read_size=0, use_list=True,
185207
self._object_hook = object_hook
186208
self._object_pairs_hook = object_pairs_hook
187209
self._ext_hook = ext_hook
210+
self._max_str_len = max_str_len
211+
self._max_bin_len = max_bin_len
212+
self._max_array_len = max_array_len
213+
self._max_map_len = max_map_len
214+
self._max_ext_len = max_ext_len
188215

189216
if list_hook is not None and not callable(list_hook):
190217
raise TypeError('`list_hook` is not callable')
@@ -316,12 +343,18 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None):
316343
n = b & 0b00011111
317344
obj = self._fb_read(n, write_bytes)
318345
typ = TYPE_RAW
346+
if n > self._max_str_len:
347+
raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
319348
elif b & 0b11110000 == 0b10010000:
320349
n = b & 0b00001111
321350
typ = TYPE_ARRAY
351+
if n > self._max_array_len:
352+
raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len)
322353
elif b & 0b11110000 == 0b10000000:
323354
n = b & 0b00001111
324355
typ = TYPE_MAP
356+
if n > self._max_map_len:
357+
raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len)
325358
elif b == 0xc0:
326359
obj = None
327360
elif b == 0xc2:
@@ -331,26 +364,38 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None):
331364
elif b == 0xc4:
332365
typ = TYPE_BIN
333366
n = struct.unpack("B", self._fb_read(1, write_bytes))[0]
367+
if n > self._max_bin_len:
368+
raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
334369
obj = self._fb_read(n, write_bytes)
335370
elif b == 0xc5:
336371
typ = TYPE_BIN
337372
n = struct.unpack(">H", self._fb_read(2, write_bytes))[0]
373+
if n > self._max_bin_len:
374+
raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
338375
obj = self._fb_read(n, write_bytes)
339376
elif b == 0xc6:
340377
typ = TYPE_BIN
341378
n = struct.unpack(">I", self._fb_read(4, write_bytes))[0]
379+
if n > self._max_bin_len:
380+
raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
342381
obj = self._fb_read(n, write_bytes)
343382
elif b == 0xc7: # ext 8
344383
typ = TYPE_EXT
345384
L, n = struct.unpack('Bb', self._fb_read(2, write_bytes))
385+
if L > self._max_ext_len:
386+
raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
346387
obj = self._fb_read(L, write_bytes)
347388
elif b == 0xc8: # ext 16
348389
typ = TYPE_EXT
349390
L, n = struct.unpack('>Hb', self._fb_read(3, write_bytes))
391+
if L > self._max_ext_len:
392+
raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
350393
obj = self._fb_read(L, write_bytes)
351394
elif b == 0xc9: # ext 32
352395
typ = TYPE_EXT
353396
L, n = struct.unpack('>Ib', self._fb_read(5, write_bytes))
397+
if L > self._max_ext_len:
398+
raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
354399
obj = self._fb_read(L, write_bytes)
355400
elif b == 0xca:
356401
obj = struct.unpack(">f", self._fb_read(4, write_bytes))[0]
@@ -374,42 +419,66 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None):
374419
obj = struct.unpack(">q", self._fb_read(8, write_bytes))[0]
375420
elif b == 0xd4: # fixext 1
376421
typ = TYPE_EXT
422+
if self._max_ext_len < 1:
423+
raise ValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len))
377424
n, obj = struct.unpack('b1s', self._fb_read(2, write_bytes))
378425
elif b == 0xd5: # fixext 2
379426
typ = TYPE_EXT
427+
if self._max_ext_len < 2:
428+
raise ValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len))
380429
n, obj = struct.unpack('b2s', self._fb_read(3, write_bytes))
381430
elif b == 0xd6: # fixext 4
382431
typ = TYPE_EXT
432+
if self._max_ext_len < 4:
433+
raise ValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len))
383434
n, obj = struct.unpack('b4s', self._fb_read(5, write_bytes))
384435
elif b == 0xd7: # fixext 8
385436
typ = TYPE_EXT
437+
if self._max_ext_len < 8:
438+
raise ValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len))
386439
n, obj = struct.unpack('b8s', self._fb_read(9, write_bytes))
387440
elif b == 0xd8: # fixext 16
388441
typ = TYPE_EXT
442+
if self._max_ext_len < 16:
443+
raise ValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len))
389444
n, obj = struct.unpack('b16s', self._fb_read(17, write_bytes))
390445
elif b == 0xd9:
391446
typ = TYPE_RAW
392447
n = struct.unpack("B", self._fb_read(1, write_bytes))[0]
448+
if n > self._max_str_len:
449+
raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
393450
obj = self._fb_read(n, write_bytes)
394451
elif b == 0xda:
395452
typ = TYPE_RAW
396453
n = struct.unpack(">H", self._fb_read(2, write_bytes))[0]
454+
if n > self._max_str_len:
455+
raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
397456
obj = self._fb_read(n, write_bytes)
398457
elif b == 0xdb:
399458
typ = TYPE_RAW
400459
n = struct.unpack(">I", self._fb_read(4, write_bytes))[0]
460+
if n > self._max_str_len:
461+
raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
401462
obj = self._fb_read(n, write_bytes)
402463
elif b == 0xdc:
403464
n = struct.unpack(">H", self._fb_read(2, write_bytes))[0]
465+
if n > self._max_array_len:
466+
raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len)
404467
typ = TYPE_ARRAY
405468
elif b == 0xdd:
406469
n = struct.unpack(">I", self._fb_read(4, write_bytes))[0]
470+
if n > self._max_array_len:
471+
raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len)
407472
typ = TYPE_ARRAY
408473
elif b == 0xde:
409474
n = struct.unpack(">H", self._fb_read(2, write_bytes))[0]
475+
if n > self._max_map_len:
476+
raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len)
410477
typ = TYPE_MAP
411478
elif b == 0xdf:
412479
n = struct.unpack(">I", self._fb_read(4, write_bytes))[0]
480+
if n > self._max_map_len:
481+
raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len)
413482
typ = TYPE_MAP
414483
else:
415484
raise UnpackValueError("Unknown header: 0x%x" % b)

0 commit comments

Comments
 (0)