5
5
import socket
6
6
import time
7
7
8
- from kafka .errors import KafkaConfigurationError , UnsupportedVersionError
8
+ from kafka .errors import KafkaConfigurationError , KafkaTimeoutError , UnsupportedVersionError
9
9
10
10
from kafka .vendor import six
11
11
18
18
from kafka .metrics import MetricConfig , Metrics
19
19
from kafka .protocol .list_offsets import OffsetResetStrategy
20
20
from kafka .structs import OffsetAndMetadata , TopicPartition
21
+ from kafka .util import timeout_ms_fn
21
22
from kafka .version import __version__
22
23
23
24
log = logging .getLogger (__name__ )
@@ -521,7 +522,7 @@ def commit_async(self, offsets=None, callback=None):
521
522
offsets , callback = callback )
522
523
return future
523
524
524
- def commit (self , offsets = None ):
525
+ def commit (self , offsets = None , timeout_ms = None ):
525
526
"""Commit offsets to kafka, blocking until success or error.
526
527
527
528
This commits offsets only to Kafka. The offsets committed using this API
@@ -545,9 +546,9 @@ def commit(self, offsets=None):
545
546
assert self .config ['group_id' ] is not None , 'Requires group_id'
546
547
if offsets is None :
547
548
offsets = self ._subscription .all_consumed_offsets ()
548
- self ._coordinator .commit_offsets_sync (offsets )
549
+ self ._coordinator .commit_offsets_sync (offsets , timeout_ms = timeout_ms )
549
550
550
- def committed (self , partition , metadata = False ):
551
+ def committed (self , partition , metadata = False , timeout_ms = None ):
551
552
"""Get the last committed offset for the given partition.
552
553
553
554
This offset will be used as the position for the consumer
@@ -564,6 +565,9 @@ def committed(self, partition, metadata=False):
564
565
565
566
Returns:
566
567
The last committed offset (int or OffsetAndMetadata), or None if there was no prior commit.
568
+
569
+ Raises:
570
+ KafkaTimeoutError if timeout_ms provided
567
571
"""
568
572
assert self .config ['api_version' ] >= (0 , 8 , 1 ), 'Requires >= Kafka 0.8.1'
569
573
assert self .config ['group_id' ] is not None , 'Requires group_id'
@@ -572,10 +576,10 @@ def committed(self, partition, metadata=False):
572
576
if self ._subscription .is_assigned (partition ):
573
577
committed = self ._subscription .assignment [partition ].committed
574
578
if committed is None :
575
- self ._coordinator .refresh_committed_offsets_if_needed ()
579
+ self ._coordinator .refresh_committed_offsets_if_needed (timeout_ms = timeout_ms )
576
580
committed = self ._subscription .assignment [partition ].committed
577
581
else :
578
- commit_map = self ._coordinator .fetch_committed_offsets ([partition ])
582
+ commit_map = self ._coordinator .fetch_committed_offsets ([partition ], timeout_ms = timeout_ms )
579
583
if partition in commit_map :
580
584
committed = commit_map [partition ]
581
585
else :
@@ -670,17 +674,13 @@ def poll(self, timeout_ms=0, max_records=None, update_offsets=True):
670
674
assert not self ._closed , 'KafkaConsumer is closed'
671
675
672
676
# Poll for new data until the timeout expires
673
- start = time .time ()
674
- remaining = timeout_ms
677
+ inner_timeout_ms = timeout_ms_fn (timeout_ms , None )
675
678
while not self ._closed :
676
- records = self ._poll_once (remaining , max_records , update_offsets = update_offsets )
679
+ records = self ._poll_once (inner_timeout_ms () , max_records , update_offsets = update_offsets )
677
680
if records :
678
681
return records
679
682
680
- elapsed_ms = (time .time () - start ) * 1000
681
- remaining = timeout_ms - elapsed_ms
682
-
683
- if remaining <= 0 :
683
+ if inner_timeout_ms () <= 0 :
684
684
break
685
685
686
686
return {}
@@ -695,14 +695,14 @@ def _poll_once(self, timeout_ms, max_records, update_offsets=True):
695
695
Returns:
696
696
dict: Map of topic to list of records (may be empty).
697
697
"""
698
- begin = time . time ( )
699
- if not self ._coordinator .poll (timeout_ms = timeout_ms ):
698
+ inner_timeout_ms = timeout_ms_fn ( timeout_ms , None )
699
+ if not self ._coordinator .poll (timeout_ms = inner_timeout_ms () ):
700
700
return {}
701
701
702
702
# Fetch positions if we have partitions we're subscribed to that we
703
703
# don't know the offset for
704
704
if not self ._subscription .has_all_fetch_positions ():
705
- self ._update_fetch_positions (self ._subscription .missing_fetch_positions ())
705
+ self ._update_fetch_positions (self ._subscription .missing_fetch_positions (), timeout_ms = inner_timeout_ms () )
706
706
707
707
# If data is available already, e.g. from a previous network client
708
708
# poll() call to commit, then just return it immediately
@@ -723,9 +723,7 @@ def _poll_once(self, timeout_ms, max_records, update_offsets=True):
723
723
if len (futures ):
724
724
self ._client .poll (timeout_ms = 0 )
725
725
726
- timeout_ms -= (time .time () - begin ) * 1000
727
- timeout_ms = max (0 , min (timeout_ms , self ._coordinator .time_to_next_poll () * 1000 ))
728
- self ._client .poll (timeout_ms = timeout_ms )
726
+ self ._client .poll (timeout_ms = inner_timeout_ms (self ._coordinator .time_to_next_poll () * 1000 ))
729
727
# after the long poll, we should check whether the group needs to rebalance
730
728
# prior to returning data so that the group can stabilize faster
731
729
if self ._coordinator .need_rejoin ():
@@ -734,7 +732,7 @@ def _poll_once(self, timeout_ms, max_records, update_offsets=True):
734
732
records , _ = self ._fetcher .fetched_records (max_records , update_offsets = update_offsets )
735
733
return records
736
734
737
- def position (self , partition ):
735
+ def position (self , partition , timeout_ms = None ):
738
736
"""Get the offset of the next record that will be fetched
739
737
740
738
Arguments:
@@ -748,7 +746,7 @@ def position(self, partition):
748
746
assert self ._subscription .is_assigned (partition ), 'Partition is not assigned'
749
747
position = self ._subscription .assignment [partition ].position
750
748
if position is None :
751
- self ._update_fetch_positions ([partition ])
749
+ self ._update_fetch_positions ([partition ], timeout_ms = timeout_ms )
752
750
position = self ._subscription .assignment [partition ].position
753
751
return position .offset if position else None
754
752
@@ -1103,35 +1101,43 @@ def _use_consumer_group(self):
1103
1101
return False
1104
1102
return True
1105
1103
1106
- def _update_fetch_positions (self , partitions ):
1104
+ def _update_fetch_positions (self , partitions , timeout_ms = None ):
1107
1105
"""Set the fetch position to the committed position (if there is one)
1108
1106
or reset it using the offset reset policy the user has configured.
1109
1107
1110
1108
Arguments:
1111
1109
partitions (List[TopicPartition]): The partitions that need
1112
1110
updating fetch positions.
1113
1111
1112
+ Returns True if fetch positions updated, False if timeout
1113
+
1114
1114
Raises:
1115
1115
NoOffsetForPartitionError: If no offset is stored for a given
1116
1116
partition and no offset reset policy is defined.
1117
1117
"""
1118
- # Lookup any positions for partitions which are awaiting reset (which may be the
1119
- # case if the user called :meth:`seek_to_beginning` or :meth:`seek_to_end`. We do
1120
- # this check first to avoid an unnecessary lookup of committed offsets (which
1121
- # typically occurs when the user is manually assigning partitions and managing
1122
- # their own offsets).
1123
- self ._fetcher .reset_offsets_if_needed (partitions )
1124
-
1125
- if not self ._subscription .has_all_fetch_positions ():
1126
- # if we still don't have offsets for all partitions, then we should either seek
1127
- # to the last committed position or reset using the auto reset policy
1128
- if (self .config ['api_version' ] >= (0 , 8 , 1 ) and
1129
- self .config ['group_id' ] is not None ):
1130
- # first refresh commits for all assigned partitions
1131
- self ._coordinator .refresh_committed_offsets_if_needed ()
1132
-
1133
- # Then, do any offset lookups in case some positions are not known
1134
- self ._fetcher .update_fetch_positions (partitions )
1118
+ inner_timeout_ms = timeout_ms_fn (timeout_ms , 'Timeout updating fetch positions' )
1119
+ try :
1120
+ # Lookup any positions for partitions which are awaiting reset (which may be the
1121
+ # case if the user called :meth:`seek_to_beginning` or :meth:`seek_to_end`. We do
1122
+ # this check first to avoid an unnecessary lookup of committed offsets (which
1123
+ # typically occurs when the user is manually assigning partitions and managing
1124
+ # their own offsets).
1125
+ self ._fetcher .reset_offsets_if_needed (partitions , timeout_ms = inner_timeout_ms ())
1126
+
1127
+ if not self ._subscription .has_all_fetch_positions ():
1128
+ # if we still don't have offsets for all partitions, then we should either seek
1129
+ # to the last committed position or reset using the auto reset policy
1130
+ if (self .config ['api_version' ] >= (0 , 8 , 1 ) and
1131
+ self .config ['group_id' ] is not None ):
1132
+ # first refresh commits for all assigned partitions
1133
+ self ._coordinator .refresh_committed_offsets_if_needed (timeout_ms = inner_timeout_ms ())
1134
+
1135
+ # Then, do any offset lookups in case some positions are not known
1136
+ self ._fetcher .update_fetch_positions (partitions , timeout_ms = inner_timeout_ms ())
1137
+ return True
1138
+
1139
+ except KafkaTimeoutError :
1140
+ return False
1135
1141
1136
1142
def _message_generator_v2 (self ):
1137
1143
timeout_ms = 1000 * max (0 , self ._consumer_timeout - time .time ())
0 commit comments