8
8
from itertools import cycle
9
9
from multiprocessing import Queue , Process
10
10
11
- from kafka .common import ProduceRequest
11
+ from kafka .common import ProduceRequest , TopicAndPartition
12
12
from kafka .partitioner import HashedPartitioner
13
13
from kafka .protocol import create_message
14
14
20
20
STOP_ASYNC_PRODUCER = - 1
21
21
22
22
23
- def _send_upstream (topic , queue , client , batch_time , batch_size ,
23
+ def _send_upstream (queue , client , batch_time , batch_size ,
24
24
req_acks , ack_timeout ):
25
25
"""
26
26
Listen on the queue for a specified number of messages or till
@@ -44,24 +44,27 @@ def _send_upstream(topic, queue, client, batch_time, batch_size,
44
44
# timeout is reached
45
45
while count > 0 and timeout >= 0 :
46
46
try :
47
- partition , msg = queue .get (timeout = timeout )
47
+ topic_partition , msg = queue .get (timeout = timeout )
48
+
48
49
except Empty :
49
50
break
50
51
51
52
# Check if the controller has requested us to stop
52
- if partition == STOP_ASYNC_PRODUCER :
53
+ if topic_partition == STOP_ASYNC_PRODUCER :
53
54
stop = True
54
55
break
55
56
56
57
# Adjust the timeout to match the remaining period
57
58
count -= 1
58
59
timeout = send_at - time .time ()
59
- msgset [partition ].append (msg )
60
+ msgset [topic_partition ].append (msg )
60
61
61
62
# Send collected requests upstream
62
63
reqs = []
63
- for partition , messages in msgset .items ():
64
- req = ProduceRequest (topic , partition , messages )
64
+ for topic_partition , messages in msgset .items ():
65
+ req = ProduceRequest (topic_partition .topic ,
66
+ topic_partition .partition ,
67
+ messages )
65
68
reqs .append (req )
66
69
67
70
try :
@@ -78,7 +81,6 @@ class Producer(object):
78
81
79
82
Params:
80
83
client - The Kafka client instance to use
81
- topic - The topic for sending messages to
82
84
async - If set to true, the messages are sent asynchronously via another
83
85
thread (process). We will not wait for a response to these
84
86
req_acks - A value indicating the acknowledgements that the server must
@@ -119,8 +121,7 @@ def __init__(self, client, async=False,
119
121
if self .async :
120
122
self .queue = Queue () # Messages are sent through this queue
121
123
self .proc = Process (target = _send_upstream ,
122
- args = (self .topic ,
123
- self .queue ,
124
+ args = (self .queue ,
124
125
self .client .copy (),
125
126
batch_send_every_t ,
126
127
batch_send_every_n ,
@@ -131,17 +132,18 @@ def __init__(self, client, async=False,
131
132
self .proc .daemon = True
132
133
self .proc .start ()
133
134
134
- def send_messages (self , partition , * msg ):
135
+ def send_messages (self , topic , partition , * msg ):
135
136
"""
136
137
Helper method to send produce requests
137
138
"""
138
139
if self .async :
139
140
for m in msg :
140
- self .queue .put ((partition , create_message (m )))
141
+ self .queue .put ((TopicAndPartition (topic , partition ),
142
+ create_message (m )))
141
143
resp = []
142
144
else :
143
145
messages = [create_message (m ) for m in msg ]
144
- req = ProduceRequest (self . topic , partition , messages )
146
+ req = ProduceRequest (topic , partition , messages )
145
147
try :
146
148
resp = self .client .send_produce_request ([req ], acks = self .req_acks ,
147
149
timeout = self .ack_timeout )
@@ -169,7 +171,6 @@ class SimpleProducer(Producer):
169
171
170
172
Params:
171
173
client - The Kafka client instance to use
172
- topic - The topic for sending messages to
173
174
async - If True, the messages are sent asynchronously via another
174
175
thread (process). We will not wait for a response to these
175
176
req_acks - A value indicating the acknowledgements that the server must
@@ -180,27 +181,31 @@ class SimpleProducer(Producer):
180
181
batch_send_every_n - If set, messages are send in batches of this size
181
182
batch_send_every_t - If set, messages are send after this timeout
182
183
"""
183
- def __init__ (self , client , topic , async = False ,
184
+ def __init__ (self , client , async = False ,
184
185
req_acks = Producer .ACK_AFTER_LOCAL_WRITE ,
185
186
ack_timeout = Producer .DEFAULT_ACK_TIMEOUT ,
186
187
batch_send = False ,
187
188
batch_send_every_n = BATCH_SEND_MSG_COUNT ,
188
189
batch_send_every_t = BATCH_SEND_DEFAULT_INTERVAL ):
189
- self .topic = topic
190
- client .load_metadata_for_topics (topic )
191
- self .next_partition = cycle (client .topic_partitions [topic ])
192
-
190
+ self .partition_cycles = {}
193
191
super (SimpleProducer , self ).__init__ (client , async , req_acks ,
194
192
ack_timeout , batch_send ,
195
193
batch_send_every_n ,
196
194
batch_send_every_t )
197
195
198
- def send_messages (self , * msg ):
199
- partition = self .next_partition .next ()
200
- return super (SimpleProducer , self ).send_messages (partition , * msg )
196
+ def _next_partition (self , topic ):
197
+ if topic not in self .partition_cycles :
198
+ if topic not in self .client .topic_partitions :
199
+ self .client .load_metadata_for_topics (topic )
200
+ self .partition_cycles [topic ] = cycle (self .client .topic_partitions [topic ])
201
+ return self .partition_cycles [topic ].next ()
202
+
203
+ def send_messages (self , topic , * msg ):
204
+ partition = self ._next_partition (topic )
205
+ return super (SimpleProducer , self ).send_messages (topic , partition , * msg )
201
206
202
207
def __repr__ (self ):
203
- return '<SimpleProducer topic=%s, batch=%s>' % ( self .topic , self . async )
208
+ return '<SimpleProducer batch=%s>' % self .async
204
209
205
210
206
211
class KeyedProducer (Producer ):
@@ -209,7 +214,6 @@ class KeyedProducer(Producer):
209
214
210
215
Args:
211
216
client - The kafka client instance
212
- topic - The kafka topic to send messages to
213
217
partitioner - A partitioner class that will be used to get the partition
214
218
to send the message to. Must be derived from Partitioner
215
219
async - If True, the messages are sent asynchronously via another
@@ -220,29 +224,34 @@ class KeyedProducer(Producer):
220
224
batch_send_every_n - If set, messages are send in batches of this size
221
225
batch_send_every_t - If set, messages are send after this timeout
222
226
"""
223
- def __init__ (self , client , topic , partitioner = None , async = False ,
227
+ def __init__ (self , client , partitioner = None , async = False ,
224
228
req_acks = Producer .ACK_AFTER_LOCAL_WRITE ,
225
229
ack_timeout = Producer .DEFAULT_ACK_TIMEOUT ,
226
230
batch_send = False ,
227
231
batch_send_every_n = BATCH_SEND_MSG_COUNT ,
228
232
batch_send_every_t = BATCH_SEND_DEFAULT_INTERVAL ):
229
- self .topic = topic
230
- client .load_metadata_for_topics (topic )
231
-
232
233
if not partitioner :
233
234
partitioner = HashedPartitioner
234
-
235
- self .partitioner = partitioner ( client . topic_partitions [ topic ])
235
+ self . partitioner_class = partitioner
236
+ self .partitioners = {}
236
237
237
238
super (KeyedProducer , self ).__init__ (client , async , req_acks ,
238
239
ack_timeout , batch_send ,
239
240
batch_send_every_n ,
240
241
batch_send_every_t )
241
242
242
- def send (self , key , msg ):
243
- partitions = self .client .topic_partitions [self .topic ]
244
- partition = self .partitioner .partition (key , partitions )
245
- return self .send_messages (partition , msg )
243
+ def _next_partition (self , topic , key ):
244
+ if topic not in self .partitioners :
245
+ if topic not in self .client .topic_partitions :
246
+ self .client .load_metadata_for_topics (topic )
247
+ self .partitioners [topic ] = \
248
+ self .partitioner_class (self .client .topic_partitions [topic ])
249
+ partitioner = self .partitioners [topic ]
250
+ return partitioner .partition (key , self .client .topic_partitions [topic ])
251
+
252
+ def send (self , topic , key , msg ):
253
+ partition = self ._next_partition (topic , key )
254
+ return self .send_messages (topic , partition , msg )
246
255
247
256
def __repr__ (self ):
248
- return '<KeyedProducer topic=%s, batch=%s>' % ( self .topic , self . async )
257
+ return '<KeyedProducer batch=%s>' % self .async
0 commit comments