review-heatmap/src/review_heatmap/activity.py at 7c505b981ba8f458d2cf3a05df3c1b94b4bb88e6 · glutanimate/review-heatmap · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
# -*- coding: utf-8 -*-

# Review Heatmap Add-on for Anki
#
# Copyright (C) 2016-2019  Aristotelis P. <https//glutanimate.com/>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version, with the additions
# listed at the end of the accompanied license file.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
# NOTE: This program is subject to certain additional terms pursuant to
# Section 7 of the GNU Affero General Public License.  You should have
# received a copy of these additional terms immediately following the
# terms and conditions of the GNU Affero General Public License which
# accompanied this program.
#
# If not, please request a copy through one of the means of contact
# listed here: <https://glutanimate.com/contact/>.
#
# Any modifications to this file must keep this entire header intact.

"""
Components related to gathering and analyzing user activity
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

import time
import datetime

from aqt import mw

from anki.utils import ids2str

from .libaddon.platform import ANKI20
from .libaddon.debug import logger, isDebuggingOn

__all__ = ["ActivityReporter"]


class ActivityReporter(object):
    def __init__(self, col, config, whole=False):
        self.col = col
        self.config = config
        # NOTE: Refactor the following instance variables if we
        # ever decide to persist ActivityReporter objects across
        # multiple invocations (e.g. to cache results)
        self.whole = whole
        self.offset = self._getColOffset()
        self.today = self._getToday(self.offset)

    # Public API
    #########################################################################

    def getData(self, limhist=None, limfcst=None, mode="reviews"):
        time_limits = self._getTimeLimits(limhist, limfcst)

        if mode == "reviews":
            return self._getActivity(**self._reviewsData(time_limits))
        else:
            raise NotImplementedError("activity mode {} not implemented".format(mode))

    # Activity calculations
    #########################################################################

    # General

    def _getActivity(self, history, forecast={}):
        if not history:
            return None

        first_day = history[0][0] if history else None
        last_day = forecast[-1][0] if forecast else None

        # Stats: cumulative activity and streaks

        streak_max = streak_cur = streak_last = 0
        current = total = total_time_spent = 0

        for idx, item in enumerate(history):
            current += 1
            timestamp, activity, time_spent = item

            try:
                next_timestamp = history[idx + 1][0]
            except IndexError:  # last item
                streak_last = current
                next_timestamp = None

            if timestamp + 86400 != next_timestamp:  # >1 day gap. streak over.
                if current > streak_max:
                    streak_max = current
                current = 0

            total += activity
            total_time_spent += time_spent

        days_learned = idx + 1

        # Stats: current streak
        if history[-1][0] in (self.today, self.today - 86400):
            # last recorded date today or yesterday?
            streak_cur = streak_last

        # Stats: average count on days with activity
        avg_cur = int(round(total / max(days_learned, 1)))
        # Stats: average time spent per days reviewing cards
        time_spent_daily_avg = int(round(total_time_spent / max(days_learned, 1)))

        # Stats: percentage of days with activity
        #
        # NOTE: days_total is based on first recorded revlog entry, i.e. it is
        # not the grand total of days since collection creation date / whatever
        # history limits the user might have set. This value seems more
        # desirable and motivating than the raw percentage of days learned
        # in the date inclusion period.
        days_total = (self.today - first_day) / 86400 + 1
        if days_total == 1:
            pdays = 100  # review history only extends to yesterday
        else:
            pdays = int(round((days_learned / days_total) * 100))

        # Compose activity data (remove time spent data to match forecast size)
        activity = dict([i[:2] for i in history] + forecast)
        if history[-1][0] == self.today:  # history takes precedence for today
            activity[self.today] = history[-1][1]

        return {
            "activity": activity,
            # individual cal-heatmap dates need to be in ms:
            "start": first_day * 1000 if first_day else None,
            "stop": last_day * 1000 if last_day else None,
            "today": self.today * 1000,
            "offset": self.offset,
            "stats": {
                "streak_max": {"type": "streak", "value": streak_max},
                "streak_cur": {"type": "streak", "value": streak_cur},
                "pct_days_active": {"type": "percentage", "value": pdays},
                "activity_daily_avg": {"type": "cards", "value": avg_cur},
                "time_spent_max": {"type": "time_day", "value": total_time_spent,},
                "time_spent_daily_avg": {
                    "type": "time_minute",
                    "value": time_spent_daily_avg,
                },
            },
        }

    # Mode-specific

    def _reviewsData(self, time_limits):
        return {
            "history": self._cardsDone(start=time_limits[0]),
            "forecast": self._cardsDue(start=self.today, stop=time_limits[1]),
        }

    # Collection properties
    #########################################################################

    def _getColOffset(self):
        """
        Return daily scheduling cutoff time in hours
        """
        if not ANKI20 and self.col.schedVer() == 2:
            return self.col.conf.get("rollover", 4)
        start_date = datetime.datetime.fromtimestamp(self.col.crt)
        return start_date.hour

    @staticmethod
    def daystartEpoch(timestr, is_timestamp=True, offset=0):
        """
        Convert strftime date string into unix timestamp of 00:00 UTC
        """
        # Use db query instead of Python time-related modules to guarantee
        # consistency with rest of activity data (also: Anki does not seem
        # to ship 'pytz' by default, and 'calendar' might be removed from
        # packaging at some point, as Anki's code does not directly depend
        # on it)
        offset = " '-{} hours', ".format(offset) if offset else ""
        unixepoch = " 'unixepoch', " if is_timestamp else ""

        cmd = """
SELECT CAST(STRFTIME('%s', '{timestr}', {unixepoch} {offset}
'localtime', 'start of day') AS int)""".format(
            timestr=timestr, unixepoch=unixepoch, offset=offset
        )
        return mw.col.db.scalar(cmd)

    def _getToday(self, offset):
        """
        Return unix epoch timestamp in seconds for today (00:00 UTC)
        """
        return self.daystartEpoch("now", is_timestamp=False, offset=offset)

    # Time limits
    #########################################################################

    def _getTimeLimits(self, limhist=None, limfcst=None):
        conf = self.config["synced"]

        if limhist is not None:
            history_start = self._daysFromToday(-limhist)
        else:
            history_start = self._getConfHistoryLimit(conf["limhist"], conf["limdate"])

        if limfcst is not None:
            forecast_stop = self._daysFromToday(limfcst)
        else:
            forecast_stop = self._getConfForecastLimit(conf["limfcst"])

        return (history_start, forecast_stop)

    def _getConfHistoryLimit(self, limit_days, limit_date):
        if limit_days is None and limit_date is None:
            return None

        if limit_days:
            limit_days_date = self._daysFromToday(-limit_days)
        else:
            limit_days_date = 0

        limit_date = self.daystartEpoch(limit_date) if limit_date else None

        if not limit_date or limit_date == self.daystartEpoch(self.col.crt):
            # ignore zero value or default value
            limit_date = 0
        else:
            limit_date = limit_date

        # choose most restricting limit
        return max(limit_days_date, limit_date) or None

    def _getConfForecastLimit(self, limit_days):
        if not limit_days:
            return None
        return self._daysFromToday(limit_days)

    def _daysFromToday(self, days):
        return self.today + 86400 * days

    # Deck limits
    #########################################################################

    def _validDecks(self, excluded):
        all_excluded = []

        for did in excluded:
            children = [d[1] for d in self.col.decks.children(did)]
            all_excluded.extend(children)

        all_excluded.extend(excluded)

        return [d["id"] for d in self.col.decks.all() if d["id"] not in all_excluded]

    def _didLimit(self):
        excluded_dids = self.config["synced"]["limdecks"]
        if self.whole:
            if excluded_dids:
                dids = self._validDecks(excluded_dids)
            else:
                dids = [d["id"] for d in self.col.decks.all()]
        else:
            dids = self.col.decks.active()
        return ids2str(dids)

    def _revlogLimit(self):
        excluded_dids = self.config["synced"]["limdecks"]
        ignore_deleted = self.config["synced"]["limcdel"]
        if self.whole:
            if excluded_dids:
                dids = self._validDecks(excluded_dids)
            elif ignore_deleted:
                # Limiting log entries to cids with assigned dids automatically
                # excludes deleted entries. In cases where we do not use a deck
                # limit we specify the following instead:
                return "cid IN (SELECT id FROM cards)"
            else:
                return ""
        else:
            dids = self.col.decks.active()
        return "cid IN (SELECT id FROM cards WHERE did IN %s)" % ids2str(dids)

    # Database queries for user activity
    #########################################################################

    def _cardsDue(self, start=None, stop=None):
        # start, stop: timestamps in seconds. Set to None for unlimited.
        # start: inclusive; stop: exclusive

        lim = ""
        if start is not None:
            lim += " AND day >= {}".format(start)
        if stop is not None:
            lim += " AND day < {}".format(stop)
        cmd = """
SELECT
STRFTIME('%s', 'now', '-{} hours', 'localtime', 'start of day')
    + (due - :today) * 86400
AS day, -COUNT(), due -- nsegative to support heatmap legend
FROM cards
WHERE did IN {} AND queue IN (2,3)
{}
GROUP BY day ORDER BY day""".format(
            self.offset, self._didLimit(), lim
        )

        res = self.col.db.all(cmd, today=self.col.sched.today)

        if isDebuggingOn():
            if not ANKI20 and mw.col.schedVer() == 2:
                offset = mw.col.conf.get("rollover", 4)
                schedver = 2
            else:
                startDate = datetime.datetime.fromtimestamp(mw.col.crt)
                offset = startDate.hour
                schedver = 1

            logger.debug(cmd)
            logger.debug(self.col.sched.today)
            logger.debug("Anki20 %s, Scheduler version %s", ANKI20, schedver)
            logger.debug("Day starts at setting: %s hours", offset)
            logger.debug(
                time.strftime(
                    "dayCutoff: %Y-%m-%d %H:%M", time.localtime(mw.col.sched.dayCutoff),
                )
            )
            logger.debug(
                time.strftime("local now: %Y-%m-%d %H:%M", time.localtime(time.time()))
            )
            logger.debug(
                time.strftime(
                    "Col today: %Y-%m-%d",
                    time.localtime(mw.col.crt + 86400 * mw.col.sched.today),
                )
            )
            logger.debug("Col days: %s", mw.col.sched.today)
            logger.debug(res)

        return [i[:-1] for i in res]

    def _cardsDone(self, start=None):
        """
        start: timestamp in seconds to start reporting from

        Group revlog entries by day while taking local timezone and DST
        settings into account. Return as unix timestamps of UTC day start
        (00:00:00 UTC+0 of each day)

        We perform the grouping here instead of passing the raw data on to
        cal-heatmap because of performance reasons (user revlogs can easily
        reach >100K entries).

        Grouping-by-day needs to be timezone-aware to assign the recorded
        timestamps to the correct day. For that reason we include the
        'localtime' strftime modifier, even though it does come at a
        performance penalty
        """
        offset = self.offset * 3600

        lims = []
        if start is not None:
            lims.append("day >= {}".format(start))

        deck_limit = self._revlogLimit()
        if deck_limit:
            lims.append(deck_limit)

        lim = "WHERE " + " AND ".join(lims) if lims else ""

        cmd = """
SELECT CAST(STRFTIME('%s', id / 1000 - {}, 'unixepoch',
                     'localtime', 'start of day') AS int)
AS day, COUNT(), SUM(time) as time_spent
FROM revlog {}
GROUP BY day ORDER BY day""".format(
            offset, lim
        )

        return self.col.db.all(cmd)