Skip to content

Commit

Permalink
Merge pull request #5097 from kobotoolbox/delete-access-logs-config
Browse files Browse the repository at this point in the history
Delete expired access logs
  • Loading branch information
noliveleger authored Oct 22, 2024
2 parents 580e202 + 8b42059 commit e0637af
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 0 deletions.
2 changes: 2 additions & 0 deletions dependencies/pip/dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,8 @@ modilabs-python-utils==0.1.5
# via -r dependencies/pip/requirements.in
mongomock==4.1.2
# via -r dependencies/pip/dev_requirements.in
more-itertools==10.4.0
# via -r dependencies/pip/requirements.in
multidict==6.0.5
# via
# aiohttp
Expand Down
1 change: 1 addition & 0 deletions dependencies/pip/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ jsonfield
jsonschema
kombu
lxml
more-itertools
oauthlib
openpyxl
#py-gfm # Incompatible with markdown 3.x
Expand Down
2 changes: 2 additions & 0 deletions dependencies/pip/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,8 @@ markupsafe==2.1.5
# via werkzeug
modilabs-python-utils==0.1.5
# via -r dependencies/pip/requirements.in
more-itertools==10.4.0
# via -r dependencies/pip/requirements.in
multidict==6.0.5
# via
# aiohttp
Expand Down
45 changes: 45 additions & 0 deletions kobo/apps/audit_log/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from datetime import timedelta

from constance import config
from django.conf import settings
from django.utils import timezone
from more_itertools import chunked

from kobo.apps.audit_log.models import (
AccessLog,
AuditLog,
)
from kobo.celery import celery_app
from kpi.utils.log import logging


@celery_app.task()
def batch_delete_audit_logs_by_id(ids):
logs = AuditLog.objects.filter(id__in=ids)
count, _ = logs.delete()
logging.info(f'Deleted {count} audit logs from database')


@celery_app.task()
def spawn_access_log_cleaning_tasks():
"""
Enqueue tasks to delete access logs older than ACCESS_LOG_LIFESPAN days old.
ACCESS_LOG_LIFESPAN is configured via constance.
Ids are batched into multiple tasks.
"""

expiration_date = timezone.now() - timedelta(
days=config.ACCESS_LOG_LIFESPAN
)

expired_logs = (
AccessLog.objects.filter(date_created__lt=expiration_date)
.values_list('id', flat=True)
.iterator()
)
for id_batch in chunked(
expired_logs, settings.ACCESS_LOG_DELETION_BATCH_SIZE
):
# queue up a new task for each batch of expired ids
batch_delete_audit_logs_by_id.delay(ids=id_batch)
96 changes: 96 additions & 0 deletions kobo/apps/audit_log/tests/test_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from datetime import timedelta
from unittest.mock import patch

from constance.test import override_config
from django.test import override_settings
from django.utils import timezone

from kobo.apps.audit_log.models import AccessLog
from kobo.apps.audit_log.tasks import (
batch_delete_audit_logs_by_id,
spawn_access_log_cleaning_tasks,
)
from kobo.apps.kobo_auth.shortcuts import User
from kpi.tests.base_test_case import BaseTestCase


@override_config(ACCESS_LOG_LIFESPAN=1)
class AuditLogTasksTestCase(BaseTestCase):

fixtures = ['test_data']

def test_spawn_deletion_task_identifies_expired_logs(self):
"""
Test the spawning task correctly identifies which logs to delete.
Separated for easier debugging of the spawning vs deleting steps
"""
user = User.objects.get(username='someuser')
old_log = AccessLog.objects.create(
user=user,
date_created=timezone.now() - timedelta(days=1, hours=1),
)
older_log = AccessLog.objects.create(
user=user,
date_created=timezone.now() - timedelta(days=2)
)
new_log = AccessLog.objects.create(user=user)

with patch(
'kobo.apps.audit_log.tasks.batch_delete_audit_logs_by_id.delay'
) as patched_spawned_task:
spawn_access_log_cleaning_tasks()

# get the list of ids passed for any call to the actual deletion task
id_lists = [kwargs['ids'] for _, _, kwargs in patched_spawned_task.mock_calls]
# flatten the list
all_deleted_ids = [log_id for id_list in id_lists for log_id in id_list]
self.assertIn(old_log.id, all_deleted_ids)
self.assertIn(older_log.id, all_deleted_ids)
self.assertNotIn(new_log.id, all_deleted_ids)

@override_settings(ACCESS_LOG_DELETION_BATCH_SIZE=2)
def test_spawn_task_batches_ids(self):
three_days_ago = timezone.now() - timedelta(days=3)
user = User.objects.get(username='someuser')
old_log_1 = AccessLog.objects.create(
user=user, date_created=three_days_ago
)
old_log_2 = AccessLog.objects.create(
user=user, date_created=three_days_ago
)
old_log_3 = AccessLog.objects.create(
user=user, date_created=three_days_ago
)

with patch(
'kobo.apps.audit_log.tasks.batch_delete_audit_logs_by_id.delay'
) as patched_spawned_task:
spawn_access_log_cleaning_tasks()

# Should be 2 batches
self.assertEqual(patched_spawned_task.call_count, 2)
# make sure all batches were <= ACCESS_LOG_DELETION_BATCH_SIZE
all_deleted_ids = []
for task_call in patched_spawned_task.mock_calls:
_, _, kwargs = task_call
id_list = kwargs['ids']
self.assertLessEqual(len(id_list), 2)
all_deleted_ids.extend(id_list)

# make sure we queued everything for deletion
self.assertIn(old_log_1.id, all_deleted_ids)
self.assertIn(old_log_2.id, all_deleted_ids)
self.assertIn(old_log_3.id, all_deleted_ids)

def test_batch_delete_audit_logs_by_id(self):
user = User.objects.get(username='someuser')
log_1 = AccessLog.objects.create(user=user)
log_2 = AccessLog.objects.create(user=user)
log_3 = AccessLog.objects.create(user=user)
self.assertEqual(AccessLog.objects.count(), 3)

batch_delete_audit_logs_by_id(ids=[log_1.id, log_2.id])
# only log_3 should remain
self.assertEqual(AccessLog.objects.count(), 1)
self.assertEqual(AccessLog.objects.first().id, log_3.id)
13 changes: 13 additions & 0 deletions kobo/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,11 @@
),
'Email message to sent to admins on failure.',
),
'ACCESS_LOG_LIFESPAN': (
60,
'Length of time in days to keep access logs.',
'positive_int'
)
}

CONSTANCE_ADDITIONAL_FIELDS = {
Expand Down Expand Up @@ -649,6 +654,7 @@
'EXPOSE_GIT_REV',
'FRONTEND_MIN_RETRY_TIME',
'FRONTEND_MAX_RETRY_TIME',
'ACCESS_LOG_LIFESPAN',
),
'Rest Services': (
'ALLOW_UNSECURED_HOOK_ENDPOINTS',
Expand Down Expand Up @@ -1219,6 +1225,11 @@ def dj_stripe_request_callback_method():
'schedule': crontab(minute=0, hour=0),
'options': {'queue': 'kpi_low_priority_queue'}
},
'delete-expired-access-logs': {
'task': 'kobo.apps.audit_log.tasks.spawn_access_log_cleaning_tasks',
'schedule': crontab(minute=0, hour=0),
'options': {'queue': 'kpi_low_priority_queue'}
}
}


Expand Down Expand Up @@ -1769,6 +1780,8 @@ def dj_stripe_request_callback_method():
'application/x-zip-compressed'
]

ACCESS_LOG_DELETION_BATCH_SIZE = 1000

# Silence Django Guardian warning. Authentication backend is hooked, but
# Django Guardian does not recognize it because it is extended
SILENCED_SYSTEM_CHECKS = ['guardian.W001']
Expand Down

0 comments on commit e0637af

Please sign in to comment.