Skip to content

Commit ec67224

Browse files
mnoman09aliabbasrizvi
authored andcommitted
feat(eventProcessor): Add EventProcessor and BatchEventProcessor (#203)
1 parent 6794260 commit ec67224

File tree

4 files changed

+646
-1
lines changed

4 files changed

+646
-1
lines changed

optimizely/event/event_processor.py

+269
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
# Copyright 2019 Optimizely
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import abc
15+
import threading
16+
import time
17+
18+
from datetime import timedelta
19+
from six.moves import queue
20+
21+
from optimizely import logger as _logging
22+
from optimizely.event_dispatcher import EventDispatcher as default_event_dispatcher
23+
from optimizely.helpers import validator
24+
from .event_factory import EventFactory
25+
from .user_event import UserEvent
26+
27+
ABC = abc.ABCMeta('ABC', (object,), {'__slots__': ()})
28+
29+
30+
class BaseEventProcessor(ABC):
31+
""" Class encapsulating event processing. Override with your own implementation. """
32+
33+
@abc.abstractmethod
34+
def process(user_event):
35+
""" Method to provide intermediary processing stage within event production.
36+
Args:
37+
user_event: UserEvent instance that needs to be processed and dispatched.
38+
"""
39+
pass
40+
41+
42+
class BatchEventProcessor(BaseEventProcessor):
43+
"""
44+
BatchEventProcessor is an implementation of the BaseEventProcessor that batches events.
45+
The BatchEventProcessor maintains a single consumer thread that pulls events off of
46+
the blocking queue and buffers them for either a configured batch size or for a
47+
maximum duration before the resulting LogEvent is sent to the EventDispatcher.
48+
"""
49+
50+
_DEFAULT_QUEUE_CAPACITY = 1000
51+
_DEFAULT_BATCH_SIZE = 10
52+
_DEFAULT_FLUSH_INTERVAL = timedelta(seconds=30)
53+
_DEFAULT_TIMEOUT_INTERVAL = timedelta(seconds=5)
54+
_SHUTDOWN_SIGNAL = object()
55+
_FLUSH_SIGNAL = object()
56+
LOCK = threading.Lock()
57+
58+
def __init__(self,
59+
event_dispatcher,
60+
logger,
61+
start_on_init=False,
62+
event_queue=None,
63+
batch_size=None,
64+
flush_interval=None,
65+
timeout_interval=None):
66+
""" BatchEventProcessor init method to configure event batching.
67+
Args:
68+
event_dispatcher: Provides a dispatch_event method which if given a URL and params sends a request to it.
69+
logger: Provides a log method to log messages. By default nothing would be logged.
70+
start_on_init: Optional boolean param which starts the consumer thread if set to True.
71+
Default value is False.
72+
event_queue: Optional component which accumulates the events until dispacthed.
73+
batch_size: Optional param which defines the upper limit on the number of events in event_queue after which
74+
the event_queue will be flushed.
75+
flush_interval: Optional floating point number representing time interval in seconds after which event_queue will
76+
be flushed.
77+
timeout_interval: Optional floating point number representing time interval in seconds before joining the consumer
78+
thread.
79+
"""
80+
self.event_dispatcher = event_dispatcher or default_event_dispatcher
81+
self.logger = _logging.adapt_logger(logger or _logging.NoOpLogger())
82+
self.event_queue = event_queue or queue.Queue(maxsize=self._DEFAULT_QUEUE_CAPACITY)
83+
self.batch_size = batch_size if self._validate_intantiation_props(batch_size, 'batch_size') \
84+
else self._DEFAULT_BATCH_SIZE
85+
self.flush_interval = timedelta(seconds=flush_interval) \
86+
if self._validate_intantiation_props(flush_interval, 'flush_interval') \
87+
else self._DEFAULT_FLUSH_INTERVAL
88+
self.timeout_interval = timedelta(seconds=timeout_interval) \
89+
if self._validate_intantiation_props(timeout_interval, 'timeout_interval') \
90+
else self._DEFAULT_TIMEOUT_INTERVAL
91+
self._current_batch = list()
92+
93+
if start_on_init is True:
94+
self.start()
95+
96+
@property
97+
def is_running(self):
98+
""" Property to check if consumer thread is alive or not. """
99+
return self.executor.isAlive()
100+
101+
def _validate_intantiation_props(self, prop, prop_name):
102+
""" Method to determine if instantiation properties like batch_size, flush_interval
103+
and timeout_interval are valid.
104+
105+
Args:
106+
prop: Property value that needs to be validated.
107+
prop_name: Property name.
108+
109+
Returns:
110+
False if property value is None or less than 1 or not a finite number.
111+
False if property name is batch_size and value is a floating point number.
112+
True otherwise.
113+
"""
114+
if (prop_name == 'batch_size' and not isinstance(prop, int)) or prop is None or prop < 1 or \
115+
not validator.is_finite_number(prop):
116+
self.logger.info('Using default value for {}.'.format(prop_name))
117+
return False
118+
119+
return True
120+
121+
def _get_time(self, _time=None):
122+
""" Method to return rounded off time as integer in seconds. If _time is None, uses current time.
123+
124+
Args:
125+
_time: time in seconds that needs to be rounded off.
126+
127+
Returns:
128+
Integer time in seconds.
129+
"""
130+
if _time is None:
131+
return int(round(time.time()))
132+
133+
return int(round(_time))
134+
135+
def start(self):
136+
""" Starts the batch processing thread to batch events. """
137+
if hasattr(self, 'executor') and self.is_running:
138+
self.logger.warning('BatchEventProcessor already started.')
139+
return
140+
141+
self.flushing_interval_deadline = self._get_time() + self._get_time(self.flush_interval.total_seconds())
142+
self.executor = threading.Thread(target=self._run)
143+
self.executor.setDaemon(True)
144+
self.executor.start()
145+
146+
def _run(self):
147+
""" Triggered as part of the thread which batches events or flushes event_queue and sleeps
148+
periodically if queue is empty.
149+
"""
150+
try:
151+
while True:
152+
if self._get_time() > self.flushing_interval_deadline:
153+
self._flush_queue()
154+
155+
try:
156+
item = self.event_queue.get(True, 0.05)
157+
158+
except queue.Empty:
159+
time.sleep(0.05)
160+
continue
161+
162+
if item == self._SHUTDOWN_SIGNAL:
163+
self.logger.debug('Received shutdown signal.')
164+
break
165+
166+
if item == self._FLUSH_SIGNAL:
167+
self.logger.debug('Received flush signal.')
168+
self._flush_queue()
169+
continue
170+
171+
if isinstance(item, UserEvent):
172+
self._add_to_batch(item)
173+
174+
except Exception as exception:
175+
self.logger.error('Uncaught exception processing buffer. Error: ' + str(exception))
176+
177+
finally:
178+
self.logger.info('Exiting processing loop. Attempting to flush pending events.')
179+
self._flush_queue()
180+
181+
def flush(self):
182+
""" Adds flush signal to event_queue. """
183+
184+
self.event_queue.put(self._FLUSH_SIGNAL)
185+
186+
def _flush_queue(self):
187+
""" Flushes event_queue by dispatching events. """
188+
189+
if len(self._current_batch) == 0:
190+
return
191+
192+
with self.LOCK:
193+
to_process_batch = list(self._current_batch)
194+
self._current_batch = list()
195+
196+
log_event = EventFactory.create_log_event(to_process_batch, self.logger)
197+
198+
try:
199+
self.event_dispatcher.dispatch_event(log_event)
200+
except Exception as e:
201+
self.logger.error('Error dispatching event: ' + str(log_event) + ' ' + str(e))
202+
203+
def process(self, user_event):
204+
""" Method to process the user_event by putting it in event_queue.
205+
Args:
206+
user_event: UserEvent Instance.
207+
"""
208+
if not isinstance(user_event, UserEvent):
209+
self.logger.error('Provided event is in an invalid format.')
210+
return
211+
212+
self.logger.debug('Received user_event: ' + str(user_event))
213+
214+
try:
215+
self.event_queue.put_nowait(user_event)
216+
except queue.Full:
217+
self.logger.debug('Payload not accepted by the queue. Current size: {}'.format(str(self.event_queue.qsize())))
218+
219+
def _add_to_batch(self, user_event):
220+
""" Method to append received user event to current batch.
221+
Args:
222+
user_event: UserEvent Instance.
223+
"""
224+
if self._should_split(user_event):
225+
self._flush_queue()
226+
self._current_batch = list()
227+
228+
# Reset the deadline if starting a new batch.
229+
if len(self._current_batch) == 0:
230+
self.flushing_interval_deadline = self._get_time() + \
231+
self._get_time(self.flush_interval.total_seconds())
232+
233+
with self.LOCK:
234+
self._current_batch.append(user_event)
235+
if len(self._current_batch) >= self.batch_size:
236+
self._flush_queue()
237+
238+
def _should_split(self, user_event):
239+
""" Method to check if current event batch should split into two.
240+
Args:
241+
user_event: UserEvent Instance.
242+
Return Value:
243+
- True, if revision number and project_id of last event in current batch do not match received event's
244+
revision number and project id respectively.
245+
- False, otherwise.
246+
"""
247+
if len(self._current_batch) == 0:
248+
return False
249+
250+
current_context = self._current_batch[-1].event_context
251+
new_context = user_event.event_context
252+
253+
if current_context.revision != new_context.revision:
254+
return True
255+
256+
if current_context.project_id != new_context.project_id:
257+
return True
258+
259+
return False
260+
261+
def stop(self):
262+
""" Stops and disposes batch event processor. """
263+
self.event_queue.put(self._SHUTDOWN_SIGNAL)
264+
self.logger.warning('Stopping Scheduler.')
265+
266+
self.executor.join(self.timeout_interval.total_seconds())
267+
268+
if self.is_running:
269+
self.logger.error('Timeout exceeded while attempting to close for ' + str(self.timeout_interval) + ' ms.')

optimizely/event/log_event.py

+3
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,6 @@ def __init__(self, url, params, http_verb=None, headers=None):
2020
self.params = params
2121
self.http_verb = http_verb or 'POST'
2222
self.headers = headers
23+
24+
def __str__(self):
25+
return str(self.__class__) + ": " + str(self.__dict__)

0 commit comments

Comments
 (0)