Skip to content

Commit 89deb62

Browse files
committed
Improve efficiency of activity deduplication algorithm.
1 parent 1de30ea commit 89deb62

File tree

3 files changed

+41
-9
lines changed

3 files changed

+41
-9
lines changed

tapiriik/services/interchange.py

+21
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,27 @@ def __eq__(self, other):
332332
def __ne__(self, other):
333333
return not self.__eq__(other)
334334

335+
# We define ascending as most-recent first.
336+
# For simplicity, we compare without timezones.
337+
# The only place this ordering is (intentionally...) used, it doesn't matter.
338+
def __gt__(self, other):
339+
try:
340+
return self.StartTime.replace(tzinfo=None) < other.StartTime.replace(tzinfo=None)
341+
except AttributeError:
342+
return self.StartTime.replace(tzinfo=None) < other.replace(tzinfo=None)
343+
344+
def __ge__(self, other):
345+
try:
346+
return self.StartTime.replace(tzinfo=None) <= other.StartTime.replace(tzinfo=None)
347+
except AttributeError:
348+
return self.StartTime.replace(tzinfo=None) <= other.replace(tzinfo=None)
349+
350+
def __lt__(self, other):
351+
return not self.__ge__(other)
352+
353+
def __le__(self, other):
354+
return not self.__gt__(other)
355+
335356

336357
class UploadedActivity (Activity):
337358
pass # will contain list of which service instances contain this activity - not really merited

tapiriik/sync/sync.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@
1717
import pytz
1818
import kombu
1919
import json
20+
import bisect
2021

21-
# Set this up seperate from the logger used in this scope, so services logging messages are caught and logged into user's files.
22+
# Set this up separate from the logger used in this scope, so services logging messages are caught and logged into user's files.
2223
_global_logger = logging.getLogger("tapiriik")
2324

2425
_global_logger.setLevel(logging.DEBUG)
@@ -439,8 +440,13 @@ def _accumulateActivities(self, conn, svcActivities, no_add=False):
439440
if act.TZ and not hasattr(act.TZ, "localize"):
440441
raise ValueError("Got activity with TZ type " + str(type(act.TZ)) + " instead of a pytz timezone")
441442
# Used to ensureTZ() right here - doubt it's needed any more?
442-
existElsewhere = [
443-
x for x in self._activities if
443+
# Binsearch to find which activities actually need individual attention.
444+
# Otherwise it's O(mn^2).
445+
# self._activities is sorted most recent first
446+
relevantActivitiesStart = bisect.bisect_left(self._activities, act.StartTime + timezoneErrorPeriod)
447+
relevantActivitiesEnd = bisect.bisect_right(self._activities, act.StartTime - timezoneErrorPeriod, lo=relevantActivitiesStart)
448+
extantActIter = (
449+
x for x in (self._activities[idx] for idx in range(relevantActivitiesStart, relevantActivitiesEnd)) if
444450
(
445451
# Identical
446452
x.UID == act.UID
@@ -481,9 +487,14 @@ def _accumulateActivities(self, conn, svcActivities, no_add=False):
481487
and
482488
# Prevents closely-spaced activities of known different type from being lumped together - esp. important for manually-enetered ones
483489
(x.Type == ActivityType.Other or act.Type == ActivityType.Other or x.Type == act.Type or ActivityType.AreVariants([act.Type, x.Type]))
484-
]
485-
if len(existElsewhere) > 0:
486-
existingActivity = existElsewhere[0]
490+
)
491+
492+
try:
493+
existingActivity = next(extantActIter)
494+
except StopIteration:
495+
existingActivity = None
496+
497+
if existingActivity:
487498
# we don't merge the exclude values here, since at this stage the services have the option of just not returning those activities
488499
if act.TZ is not None and existingActivity.TZ is None:
489500
existingActivity.TZ = act.TZ
@@ -522,7 +533,7 @@ def _accumulateActivities(self, conn, svcActivities, no_add=False):
522533
act.UIDs = existingActivity.UIDs # stop the circular inclusion, not that it matters
523534
continue
524535
if not no_add:
525-
self._activities.append(act)
536+
bisect.insort_left(self._activities, act)
526537

527538
def _determineEligibleRecipientServices(self, activity, recipientServices):
528539
from tapiriik.auth import User

tapiriik/testing/sync.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,8 @@ def test_activity_coalesce(self):
393393
s._accumulateActivities(recB, [copy.deepcopy(actB)])
394394

395395
self.assertEqual(len(s._activities), 2)
396-
act = s._activities[0]
397-
self.assertEqual(act.Type, actA.Type)
396+
self.assertEqual(s._activities[0].Type, actB.Type)
397+
self.assertEqual(s._activities[1].Type, actA.Type)
398398

399399
def test_eligibility_excluded(self):
400400
user = TestTools.create_mock_user()

0 commit comments

Comments
 (0)