the async thread will no longer exit if there's anything left to flush, added timeouts to requests

Ilya Volodarsky · Ilya Volodarsky · commit e0587ea3a869 · 2013-01-30T18:31:05.000-08:00
diff --git a/analytics/client.py b/analytics/client.py
@@ -8,13 +8,12 @@
 import requests
 
 from stats import Statistics
-from errors import ApiError, BatchError
+from errors import ApiError
 from utils import guess_timezone
 
 import options
 
 
-
 logging_enabled = True
 import logging
 logger = logging.getLogger('analytics')
@@ -65,29 +64,32 @@ def request(client, url, data):
     try:
 
         response = requests.post(url, data=json.dumps(data),
-            headers={'content-type': 'application/json'})
+            headers={'content-type': 'application/json'}, timeout=client.timeout)
 
         log('debug', 'Finished Segment.io request.')
 
         package_response(client, data, response)
 
+        return response.status_code == 200
+
     except requests.ConnectionError as e:
         package_exception(client, data, e)
+    except requests.Timeout as e:
+        package_exception(client, data, e)
+
+    return False
 
 
 class FlushThread(threading.Thread):
 
-    def __init__(self, client, url, batches):
+    def __init__(self, client):
         threading.Thread.__init__(self)
         self.client = client
-        self.url = url
-        self.batches = batches
 
     def run(self):
         log('debug', 'Flushing thread running ...')
 
-        for data in self.batches:
-            request(self.client, self.url, data)
+        self.client._sync_flush()
 
         log('debug', 'Flushing thread done.')
 
@@ -102,7 +104,8 @@ def __init__(self, secret=None,
                        log_level=logging.INFO, log=True,
                        flush_at=20, flush_after=datetime.timedelta(0, 10),
                        async=True, max_queue_size=10000,
-                       stats=Statistics()):
+                       stats=Statistics(),
+                       timeout=10):
         """Create a new instance of a analytics-python Client
 
         :param str secret: The Segment.io API secret
@@ -117,6 +120,7 @@ def __init__(self, secret=None,
         : param bool async: True to have the client flush to the server on another
         thread, therefore not blocking code (this is the default). False to
         enable blocking and making the request on the calling thread.
+        : param float timeout: Number of seconds before timing out request to Segment.io
 
         """
 
@@ -140,6 +144,8 @@ def __init__(self, secret=None,
         self.flush_at = flush_at
         self.flush_after = flush_after
 
+        self.timeout = timeout
+
         self.stats = stats
 
         self.flush_lock = threading.Lock()
@@ -321,7 +327,7 @@ def _enqueue(self, action):
             submitted = True
 
         else:
-            log('warn', 'Segment.io queue is full')
+            log('warn', 'analytics-python queue is full')
 
         if self._should_flush():
             self.flush()
@@ -350,8 +356,6 @@ def flush(self, async=None):
 
         flushing = False
 
-        url = options.host + options.endpoints['batch']
-
         # if the async parameter is provided, it overrides the client's settings
         if async == None:
             async = self.async
@@ -363,54 +367,52 @@ def flush(self, async=None):
 
                 if self._flush_thread_is_free():
 
-                    log('debug', 'Attempting asynchronous flush ...')
-
-                    batches = self._get_batches()
-                    if len(batches) > 0:
-
-                        self.flushing_thread = FlushThread(self,
-                            url, batches)
+                    log('debug', 'Initiating asynchronous flush ..')
 
-                        self.flushing_thread.start()
+                    self.flushing_thread = FlushThread(self)
+                    self.flushing_thread.start()
 
-                        flushing = True
+                    flushing = True
 
                 else:
-                    log('debug', 'The flushing thread is still active, ' +
-                        'cant flush right now')
+                    log('debug', 'The flushing thread is still active.')
         else:
 
             # Flushes on this thread
-            log('debug', 'Starting synchronous flush ...')
-
-            batches = self._get_batches()
-            if len(batches) > 0:
-                for data in batches:
-                    request(self, url, data)
-                flushing = True
-
-            log('debug', 'Finished synchronous flush.')
+            log('debug', 'Initiating synchronous flush ..')
+            self._sync_flush()
+            flushing = True
 
         if flushing:
             self.last_flushed = datetime.datetime.now()
             self.stats.flushes += 1
 
         return flushing
 
-    def _get_batches(self):
+    def _sync_flush(self):
+
+        log('debug', 'Starting flush ..')
+
+        successful = 0
+        failed = 0
 
-        batches = []
+        url = options.host + options.endpoints['batch']
 
         while len(self.queue) > 0:
+
             batch = []
             for i in range(self.max_flush_size):
                 if len(self.queue) == 0:
                     break
+
                 batch.append(self.queue.pop())
 
-            batches.append({
-                'batch':          batch,
-                'secret':         self.secret
-            })
+            payload = {'batch': batch, 'secret': self.secret}
+
+            if request(self, url, payload):
+                successful += len(batch)
+            else:
+                failed += len(batch)
 
-        return batches
+        log('debug', 'Successfully flushed ' + str(successful) + ' items [' +
+            str(failed) + ' failed].')
diff --git a/analytics/errors.py b/analytics/errors.py
@@ -11,15 +11,3 @@ def __repr__(self):
 
     def __str__(self):
         return repr(self.message)
-
-
-class BatchError(Exception):
-
-    def __init__(self, errors):
-        self.errors = errors
-
-    def __repr__(self):
-        return self.__str__()
-
-    def __str__(self):
-        return repr(self.errors)
diff --git a/setup.py b/setup.py
@@ -13,7 +13,7 @@
 
 setup(
     name='analytics-python',
-    version='0.2.8',
+    version='0.3.0',
     url='https://github.com/segmentio/analytics-python',
     author='Ilya Volodarsky',
     author_email='ilya@segment.io',
diff --git a/test.py b/test.py
@@ -45,7 +45,7 @@ def test_timezone_utils(self):
 
         shouldnt_be_edited = analytics.utils.guess_timezone(utcnow)
 
-        self.assertTrue(utcnow == shouldnt_be_edited)
+        self.assertEqual(utcnow, shouldnt_be_edited)
 
     def test_clean(self):
         supported = {
@@ -67,7 +67,7 @@ def test_clean(self):
 
         analytics.default_client._clean(combined)
 
-        self.assertTrue(combined == supported)
+        self.assertEqual(combined, supported)
 
     def test_async_basic_identify(self):
         # flush after every message
@@ -83,20 +83,20 @@ def test_async_basic_identify(self):
             "Friends": 30
         })
 
-        self.assertTrue(analytics.stats.identifies == last_identifies + 1)
+        self.assertEqual(analytics.stats.identifies, last_identifies + 1)
 
         # this should flush because we set the flush_at to 1
-        self.assertTrue(analytics.stats.flushes == last_flushes + 1)
+        self.assertEqual(analytics.stats.flushes, last_flushes + 1)
 
         # this should do nothing, as the async thread is currently active
         analytics.flush()
 
         # we should see no more flushes here
-        self.assertTrue(analytics.stats.flushes == last_flushes + 1)
+        self.assertEqual(analytics.stats.flushes, last_flushes + 1)
 
         sleep(1)
 
-        self.assertTrue(analytics.stats.successful == last_successful + 1)
+        self.assertEqual(analytics.stats.successful, last_successful + 1)
 
     def test_async_basic_track(self):
 
@@ -111,13 +111,13 @@ def test_async_basic_track(self):
             "Song": "Eleanor Rigby"
         })
 
-        self.assertTrue(analytics.stats.tracks == last_tracks + 1)
+        self.assertEqual(analytics.stats.tracks, last_tracks + 1)
 
         analytics.flush()
 
         sleep(1)
 
-        self.assertTrue(analytics.stats.successful == last_successful + 1)
+        self.assertEqual(analytics.stats.successful, last_successful + 1)
 
     def test_async_full_identify(self):
 
@@ -147,11 +147,11 @@ def test_async_full_identify(self):
         analytics.identify('ilya@analytics.io', traits,
             context=context, timestamp=datetime.now())
 
-        self.assertTrue(analytics.stats.identifies == last_identifies + 1)
+        self.assertEqual(analytics.stats.identifies, last_identifies + 1)
 
         sleep(1)
 
-        self.assertTrue(analytics.stats.successful == last_successful + 1)
+        self.assertEqual(analytics.stats.successful, last_successful + 1)
 
     def test_async_full_track(self):
 
@@ -169,11 +169,11 @@ def test_async_full_track(self):
         analytics.track('ilya@analytics.io', 'Played a Song',
             properties, timestamp=datetime.now())
 
-        self.assertTrue(analytics.stats.tracks == last_tracks + 1)
+        self.assertEqual(analytics.stats.tracks, last_tracks + 1)
 
         sleep(1)
 
-        self.assertTrue(analytics.stats.successful == last_successful + 1)
+        self.assertEqual(analytics.stats.successful, last_successful + 1)
 
     def test_blocking_flush(self):
 
@@ -191,8 +191,8 @@ def test_blocking_flush(self):
         analytics.track('ilya@analytics.io', 'Played a Song',
             properties, timestamp=datetime.today())
 
-        self.assertTrue(analytics.stats.tracks == last_tracks + 1)
-        self.assertTrue(analytics.stats.successful == last_successful + 1)
+        self.assertEqual(analytics.stats.tracks, last_tracks + 1)
+        self.assertEqual(analytics.stats.successful, last_successful + 1)
 
     def test_time_policy(self):
 
@@ -221,7 +221,7 @@ def test_time_policy(self):
         })
 
         # that shouldn't of triggered a flush
-        self.assertTrue(analytics.stats.flushes == last_flushes)
+        self.assertEqual(analytics.stats.flushes, last_flushes)
 
         # sleep past the time-flush policy
         sleep(1.2)
@@ -232,7 +232,7 @@ def test_time_policy(self):
             "Song": "Eleanor Rigby"
         })
 
-        self.assertTrue(analytics.stats.flushes == last_flushes + 1)
+        self.assertEqual(analytics.stats.flushes, last_flushes + 1)
 
     def test_performance(self):
 
@@ -243,7 +243,7 @@ def test_performance(self):
         analytics.default_client.async = True
         analytics.default_client.flush_at = 200
         analytics.default_client.max_flush_size = 50
-        analytics.default_client.set_log_level(logging.WARN)
+        analytics.default_client.set_log_level(logging.DEBUG)
 
         for i in range(to_send):
             analytics.track('ilya@analytics.io', 'Played a Song', {