Skip to content

Commit 7486d87

Browse files
committed
Starting on unit tests
Updated several methods in KafkaClient to be classmethods. Updated some inline documentation also
1 parent ab273c4 commit 7486d87

File tree

3 files changed

+156
-40
lines changed

3 files changed

+156
-40
lines changed

example.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from kafka import KafkaClient, FetchRequest, ProduceRequest
44

55
def produce_example(kafka):
6-
message = kafka.create_message_from_string("testing")
6+
message = kafka.create_message("testing")
77
request = ProduceRequest("my-topic", 0, [message])
88
kafka.send_message_set(request)
99

@@ -15,7 +15,7 @@ def consume_example(kafka):
1515
print(nextRequest)
1616

1717
def produce_gz_example(kafka):
18-
message = kafka.create_gzipped_message("this message was gzipped", "along with this one")
18+
message = kafka.create_gzip_message("this message was gzipped", "along with this one")
1919
request = ProduceRequest("my-topic", 0, [message])
2020
kafka.send_message_set(request)
2121

kafka.py

+110-38
Original file line numberDiff line numberDiff line change
@@ -31,24 +31,25 @@ def __str__(self):
3131
ProduceRequest = namedtuple("ProduceRequest", ["topic", "partition", "messages"])
3232
OffsetRequest = namedtuple("OffsetRequest", ["topic", "partition", "time", "maxOffsets"])
3333

34-
def gzip_compress(payload):
34+
def gzip_encode(payload):
3535
buf = StringIO()
36-
f = gzip.GzipFile(fileobj=buf, mode='w')
36+
f = gzip.GzipFile(fileobj=buf, mode='w', compresslevel=6)
3737
f.write(payload)
3838
f.close()
3939
buf.seek(0)
4040
out = buf.read()
4141
buf.close()
4242
return out
4343

44-
def gzip_decompress(payload):
44+
def gzip_decode(payload):
4545
buf = StringIO(payload)
4646
f = gzip.GzipFile(fileobj=buf, mode='r')
4747
out = f.read()
4848
f.close()
4949
buf.close()
5050
return out
5151

52+
5253
def length_prefix_message(msg):
5354
"""
5455
Prefix a message with it's length as an int
@@ -84,9 +85,10 @@ class KafkaClient(object):
8485

8586
ATTRIBUTE_CODEC_MASK = 0x03
8687

87-
def __init__(self, host, port):
88+
def __init__(self, host, port, bufsize=1024):
8889
self.host = host
8990
self.port = port
91+
self.bufsize = bufsize
9092
self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
9193
self._sock.connect((host, port))
9294
self._sock.settimeout(10)
@@ -117,7 +119,7 @@ def _consume_response_iter(self):
117119
# Response iterator
118120
total = 0
119121
while total < (size-2):
120-
resp = self._sock.recv(1024)
122+
resp = self._sock.recv(self.bufsize)
121123
log.debug("Read %d bytes from Kafka", len(resp))
122124
if resp == "":
123125
raise Exception("Underflow")
@@ -133,7 +135,8 @@ def _consume_response(self):
133135
data += chunk
134136
return data
135137

136-
def encode_message(self, message):
138+
@classmethod
139+
def encode_message(cls, message):
137140
"""
138141
Encode a Message from a Message tuple
139142
@@ -163,20 +166,26 @@ def encode_message(self, message):
163166
msg = struct.pack('>BBi%ds' % len(message.payload),
164167
message.magic, message.attributes, message.crc, message.payload)
165168
else:
166-
raise Exception("Unknown message version: %d" % message.magic)
169+
raise Exception("Unexpected magic number: %d" % message.magic)
167170
msg = length_prefix_message(msg)
168171
log.debug("Encoded %s as %r" % (message, msg))
169172
return msg
170173

171-
def encode_message_set(self, messages):
172-
# TODO document
174+
@classmethod
175+
def encode_message_set(cls, messages):
176+
"""
177+
Encode a MessageSet
178+
179+
One or more concatenated Messages
180+
"""
173181
message_set = ""
174182
for message in messages:
175-
encoded_message = self.encode_message(message)
183+
encoded_message = cls.encode_message(message)
176184
message_set += encoded_message
177185
return message_set
178186

179-
def encode_produce_request(self, produceRequest):
187+
@classmethod
188+
def encode_produce_request(cls, produceRequest):
180189
"""
181190
Encode a ProduceRequest
182191
@@ -198,16 +207,41 @@ def encode_produce_request(self, produceRequest):
198207
KafkaClient.PRODUCE_KEY, len(topic), topic, partition, len(message_set), message_set)
199208
return req
200209

201-
def encode_multi_produce_request(self, produceRequests):
202-
# TODO document
210+
@classmethod
211+
def encode_multi_produce_request(cls, produceRequests):
212+
"""
213+
Encode a MultiProducerRequest
214+
215+
Params
216+
======
217+
produceRequest: list of ProduceRequest objects
218+
219+
Returns
220+
=======
221+
Encoded request
222+
223+
Wire Format
224+
===========
225+
<MultiProducerReqeust> ::= <request-key> <num> <ProduceRequests>
226+
<num> ::= <int16>
227+
<ProduceRequests> ::= <ProduceRequest> [ <ProduceRequests> ]
228+
<ProduceRequest> ::= <topic> <partition> <len> <MessageSet>
229+
<topic> ::= <topic-length><string>
230+
<topic-length> ::= <int16>
231+
<partition> ::= <int32>
232+
<len> ::= <int32>
233+
234+
num is the number of ProduceRequests being encoded
235+
"""
203236
req = struct.pack('>HH', KafkaClient.MULTIPRODUCE_KEY, len(produceRequests))
204237
for (topic, partition, messages) in produceRequests:
205-
message_set = self.encode_message_set(messages)
238+
message_set = cls.encode_message_set(messages)
206239
req += struct.pack('>H%dsii%ds' % (len(topic), len(message_set)),
207240
len(topic), topic, partition, len(message_set), message_set)
208241
return req
209242

210-
def encode_fetch_request(self, fetchRequest):
243+
@classmethod
244+
def encode_fetch_request(cls, fetchRequest):
211245
"""
212246
Encode a FetchRequest message
213247
@@ -228,7 +262,8 @@ def encode_fetch_request(self, fetchRequest):
228262
KafkaClient.FETCH_KEY, len(topic), topic, partition, offset, size)
229263
return req
230264

231-
def encode_multi_fetch_request(self, fetchRequests):
265+
@classmethod
266+
def encode_multi_fetch_request(cls, fetchRequests):
232267
"""
233268
Encode the MultiFetchRequest message from a list of FetchRequest objects
234269
@@ -260,7 +295,8 @@ def encode_multi_fetch_request(self, fetchRequests):
260295
req += struct.pack('>H%dsiqi' % len(topic), len(topic), topic, partition, offset, size)
261296
return req
262297

263-
def encode_offset_request(self, offsetRequest):
298+
@classmethod
299+
def encode_offset_request(cls, offsetRequest):
264300
"""
265301
Encode an OffsetRequest message
266302
@@ -281,43 +317,57 @@ def encode_offset_request(self, offsetRequest):
281317
req = struct.pack('>HH%dsiqi' % len(topic), KafkaClient.OFFSET_KEY, len(topic), topic, partition, offset, maxOffsets)
282318
return req
283319

284-
def decode_message(self, data):
320+
@classmethod
321+
def decode_message(cls, data):
285322
"""
286323
Decode a Message
287324
288-
Since a Message can actually contained a compressed payload of multiple nested Messages,
289-
this method returns a generator.
325+
Verify crc and decode the Message. A compressed Message's payload is actually
326+
an encoded MessageSet. This allows Messages to be nested within Messages and
327+
as such, this method will recurse.
328+
329+
Params
330+
======
331+
data, bytes
332+
333+
Returns
334+
=======
335+
Generator of Messages (depth-first)
290336
"""
291-
# TODO document
292337
N = len(data)
293338
(magic,) = struct.unpack('>B', data[0:1])
294-
if magic == 0: # v0 Message
295-
# Read crc; check the crc; append the message
339+
if magic == 0:
340+
# version 0
296341
(crc,) = struct.unpack('>i', data[1:5])
297342
payload = data[5:N]
298343
assert zlib.crc32(payload) == crc
299344
msg = Message(magic, None, crc, payload)
300345
log.debug("Got v0 Message, %s", msg)
301346
yield msg
302-
elif magic == 1: # v1 Message
303-
# Read attributes, crc; check the crc; append the message
347+
elif magic == 1:
348+
# version 1
304349
(att, crc) = struct.unpack('>Bi', data[1:6])
305350
payload = data[6:N]
306351
assert zlib.crc32(payload) == crc
307-
# Uncompressed, just a single Message
308352
if att & KafkaClient.ATTRIBUTE_CODEC_MASK == 0:
353+
# Uncompressed, just a single Message
309354
msg = Message(magic, att, crc, payload)
310355
log.debug("Got v1 Message, %s", msg)
311356
yield msg
312357
elif att & KafkaClient.ATTRIBUTE_CODEC_MASK == 1:
313-
gz = gzip_decompress(payload)
314-
(msgs, _) = self.read_message_set(gz)
358+
# Gzip encoded Message
359+
gz = gzip_decode(payload)
360+
(msgs, _) = cls.read_message_set(gz)
315361
for msg in msgs:
316362
yield msg
363+
elif att & KafkaClient.ATTRIBUTE_CODEC_MASK == 2:
364+
# Snappy encoded Message
365+
raise NotImplementedError("Snappy codec is not yet supported")
317366
else:
318367
raise RuntimeError("Unsupported compression type: %d" % (att & KafkaClient.ATTRIBUTE_CODEC_MASK))
319368

320-
def read_message_set(self, data):
369+
@classmethod
370+
def read_message_set(cls, data):
321371
"""
322372
Read a MessageSet
323373
@@ -363,7 +413,7 @@ def read_message_set(self, data):
363413
cur += 4
364414

365415
# Decode the message(s)
366-
for m in self.decode_message(data[cur:cur+N]):
416+
for m in cls.decode_message(data[cur:cur+N]):
367417
msgs.append(m)
368418

369419
# Advance the cursor
@@ -376,15 +426,37 @@ def read_message_set(self, data):
376426
# Advanced User API #
377427
#########################
378428

379-
def create_message_from_string(self, payload):
380-
#TODO document
429+
@classmethod
430+
def create_message(cls, payload):
431+
"""
432+
Create a standard Message
433+
434+
Params
435+
======
436+
payload, bytes
437+
438+
Returns
439+
=======
440+
A Message tuple
441+
"""
381442
return Message(1, 0, zlib.crc32(payload), payload)
382443

383-
def create_gzipped_message(self, *payloads):
384-
#TODO document
385-
messages = [self.create_message_from_string(payload) for payload in payloads]
386-
message_set = self.encode_message_set(messages)
387-
gzipped = gzip_compress(message_set)
444+
@classmethod
445+
def create_gzip_message(cls, *payloads):
446+
"""
447+
Create a Gzip encoded Message
448+
449+
Params
450+
======
451+
payloads, list of messages (bytes) to be encoded
452+
453+
Returns
454+
=======
455+
A Message tuple
456+
"""
457+
messages = [cls.create_message(payload) for payload in payloads]
458+
message_set = cls.encode_message_set(messages)
459+
gzipped = gzip_encode(message_set)
388460
return Message(1, 0x00 | (KafkaClient.ATTRIBUTE_CODEC_MASK & 0x01), zlib.crc32(gzipped), gzipped)
389461

390462
def send_message_set(self, produceRequest):
@@ -522,7 +594,7 @@ def send_messages_simple(self, topic, partition, *payloads):
522594
partition: int
523595
payloads: strings
524596
"""
525-
messages = tuple([create_message_from_string(payload) for payload in payloads])
597+
messages = tuple([create_message(payload) for payload in payloads])
526598
self.send_message_set(ProduceRequest(topic, partition, messages))
527599

528600
def iter_messages(self, topic, partition, offset, size, auto=True):

test.py

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import binascii
2+
import unittest
3+
4+
from kafka import KafkaClient
5+
6+
class TestMessage(unittest.TestCase):
7+
def test_message_simple(self):
8+
msg = KafkaClient.create_message("testing")
9+
enc = KafkaClient.encode_message(msg)
10+
expect = "\x00\x00\x00\r\x01\x00\xe8\xf3Z\x06testing"
11+
self.assertEquals(enc, expect)
12+
(messages, read) = KafkaClient.read_message_set(enc)
13+
self.assertEquals(len(messages), 1)
14+
self.assertEquals(messages[0], msg)
15+
16+
def test_message_list(self):
17+
msgs = [
18+
KafkaClient.create_message("one"),
19+
KafkaClient.create_message("two"),
20+
KafkaClient.create_message("three")
21+
]
22+
enc = KafkaClient.encode_message_set(msgs)
23+
expect = ("\x00\x00\x00\t\x01\x00zl\x86\xf1one\x00\x00\x00\t\x01\x00\x11"
24+
"\xca\x8aftwo\x00\x00\x00\x0b\x01\x00F\xc5\xd8\xf5three")
25+
self.assertEquals(enc, expect)
26+
(messages, read) = KafkaClient.read_message_set(enc)
27+
self.assertEquals(len(messages), 3)
28+
self.assertEquals(messages[0].payload, "one")
29+
self.assertEquals(messages[1].payload, "two")
30+
self.assertEquals(messages[2].payload, "three")
31+
32+
33+
def test_message_gzip(self):
34+
msg = KafkaClient.create_gzip_message("one", "two", "three")
35+
enc = KafkaClient.encode_message(msg)
36+
# Can't check the bytes directly since Gzip is non-deterministic
37+
(messages, read) = KafkaClient.read_message_set(enc)
38+
self.assertEquals(len(messages), 3)
39+
self.assertEquals(messages[0].payload, "one")
40+
self.assertEquals(messages[1].payload, "two")
41+
self.assertEquals(messages[2].payload, "three")
42+
43+
if __name__ == '__main__':
44+
unittest.main()

0 commit comments

Comments
 (0)