Skip to content

Commit 3edb2b0

Browse files
authored
perf(assets): prefetch related XForm objects to reduce database queries DEV-880 (#6487)
### 📣 Summary Improve performance of asset queries by prefetching related XForm objects. ### 📖 Description This change optimizes asset retrieval by prefetching related XForm objects, reducing the number of database queries executed when listing or accessing assets. Previously, each access to related XForm data triggered additional queries, which impacted performance on endpoints returning multiple assets. With prefetching, related data is loaded efficiently in a single query, improving response times and lowering database load without altering API behavior or response content.
1 parent f36dd0a commit 3edb2b0

File tree

4 files changed

+73
-32
lines changed

4 files changed

+73
-32
lines changed

kpi/serializers/v2/asset.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from django.db.models import F
1111
from django.utils.translation import gettext as t
1212
from django.utils.translation import ngettext as nt
13-
from django_request_cache import cache_for_request
1413
from drf_spectacular.types import OpenApiTypes
1514
from drf_spectacular.utils import extend_schema_field
1615
from rest_framework import exceptions, serializers
@@ -1280,13 +1279,3 @@ def _get_user_detail(obj, attr: str) -> str:
12801279
def _get_view(self) -> str:
12811280
request = self.context['request']
12821281
return request.parser_context['kwargs']['uid_project_view']
1283-
1284-
# FIXME Remove this method, seems to not be used anywhere
1285-
@cache_for_request
1286-
def _user_has_asset_perms(self, obj: Asset, perm: str) -> bool:
1287-
request = self.context.get('request')
1288-
user = get_database_user(request.user)
1289-
self._set_asset_ids_cache(obj)
1290-
if obj.owner == user or obj.has_perm(user, perm):
1291-
return True
1292-
return False

kpi/tests/api/v1/test_api_assets.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
from urllib.parse import unquote_plus
55

66
from django.urls import reverse
7-
from formpack.utils.expand_content import SCHEMA_VERSION
87
from rest_framework import status
98
from rest_framework.authtoken.models import Token
109

10+
from formpack.utils.expand_content import SCHEMA_VERSION
1111
from kobo.apps.kobo_auth.shortcuts import User
1212
from kpi.constants import ASSET_TYPE_COLLECTION
1313
from kpi.models import Asset, SubmissionExportTask
@@ -61,13 +61,13 @@ def test_query_counts(self):
6161
# expected query counts are different in v1 and v2 so override the test here
6262
self.create_asset()
6363

64-
with self.assertNumQueries(FuzzyInt(28, 37)):
64+
with self.assertNumQueries(FuzzyInt(28, 40)):
6565
self.client.get(self.list_url)
6666
# test query count does not increase with more assets
6767
self.create_asset()
6868
self.create_asset()
6969
self.create_asset()
70-
with self.assertNumQueries(FuzzyInt(28, 37)):
70+
with self.assertNumQueries(FuzzyInt(28, 40)):
7171
self.client.get(self.list_url)
7272

7373

kpi/tests/api/v2/test_api_assets.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -419,18 +419,18 @@ def test_creator_permissions_on_import(self):
419419
def test_query_counts(self):
420420
self.create_asset()
421421
# 45 when stripe is disabled, 46 when enabled
422-
with self.assertNumQueries(FuzzyInt(42, 51)):
422+
with self.assertNumQueries(FuzzyInt(36, 45)):
423423
self.client.get(self.list_url)
424424
# test query count does not increase with more assets
425425
# add several assets so the fuzziness of the count doesn't hide an O(n) addition
426426
self.create_asset()
427427
self.create_asset()
428428
self.create_asset()
429-
with self.assertNumQueries(FuzzyInt(42, 51)):
429+
with self.assertNumQueries(FuzzyInt(36, 45)):
430430
self.client.get(self.list_url)
431431

432432
# test query counts with search filter
433-
with self.assertNumQueries(FuzzyInt(42, 51)):
433+
with self.assertNumQueries(FuzzyInt(36, 45)):
434434
self.client.get(self.list_url, data={'q': 'asset_type:survey'})
435435

436436

kpi/views/v2/asset.py

Lines changed: 67 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from kobo.apps.audit_log.base_views import AuditLoggedModelViewSet
1717
from kobo.apps.audit_log.models import AuditType
18+
from kobo.apps.openrosa.apps.logger.models.xform import XForm
1819
from kpi.constants import (
1920
ASSET_TYPE_ARG_NAME,
2021
ASSET_TYPE_SURVEY,
@@ -372,6 +373,10 @@ class AssetViewSet(
372373
]
373374
log_type = AuditType.PROJECT_HISTORY
374375

376+
def __init__(self, *args, **kwargs):
377+
super().__init__(*args, **kwargs)
378+
self._serializer_context = {}
379+
375380
@action(
376381
detail=False,
377382
methods=['POST'],
@@ -694,8 +699,9 @@ def get_serializer_context(self):
694699
"""
695700
Extra context provided to the serializer class.
696701
"""
697-
698702
context_ = super().get_serializer_context()
703+
context_.update(self._serializer_context)
704+
699705
if self.action == 'list':
700706
# To avoid making a triple join-query for each asset in the list
701707
# to retrieve related objects, we populated dicts key-ed by asset
@@ -705,34 +711,37 @@ def get_serializer_context(self):
705711
# The serializer will be able to pick what it needs from that dict
706712
# and narrow down data according to users' permissions.
707713

708-
# self.__filtered_queryset is set in the `list()` method that
714+
# self._filtered_queryset is set in the `list()` method that
709715
# DRF automatically calls and is overridden below. This is
710716
# to prevent double calls to `filter_queryset()` as described in
711717
# the issue here: https://github.com/kobotoolbox/kpi/issues/2576
712-
queryset = self.__filtered_queryset
718+
queryset = self._filtered_queryset
713719

714720
# 1) Retrieve all asset IDs of the current list
715-
asset_ids = AssetPagination.get_all_asset_ids_from_queryset(
716-
queryset
717-
)
721+
if 'asset_ids_cache' not in context_:
722+
asset_ids = AssetPagination.get_all_asset_ids_from_queryset(queryset)
723+
context_['asset_ids_cache'] = asset_ids
724+
else:
725+
asset_ids = context_['asset_ids_cache']
718726

719727
# 2) Get object permissions per asset
720728
context_[
721729
'object_permissions_per_asset'
722730
] = self.cache_all_assets_perms(asset_ids)
723-
context_['asset_ids_cache'] = asset_ids
724731

725732
# 3) Get the collection subscriptions per asset
726733
subscriptions_queryset = (
727734
UserAssetSubscription.objects.values('asset_id', 'user_id')
728735
.distinct()
736+
.filter(asset_id__in=asset_ids)
729737
.order_by('asset_id')
730738
)
731739

732740
user_subscriptions_per_asset = defaultdict(list)
733741
for record in subscriptions_queryset:
734742
user_subscriptions_per_asset[record['asset_id']].append(
735-
record['user_id'])
743+
record['user_id']
744+
)
736745

737746
context_['user_subscriptions_per_asset'] = user_subscriptions_per_asset
738747

@@ -770,17 +779,26 @@ def get_serializer_context(self):
770779
def list(self, request, *args, **kwargs):
771780
# assigning global filtered query set to prevent additional,
772781
# unnecessary calls to `filter_queryset`
773-
self.__filtered_queryset = self.filter_queryset(self.get_queryset())
782+
self._filtered_queryset = self.filter_queryset(self.get_queryset())
783+
784+
page = self.paginate_queryset(self._filtered_queryset)
774785

775-
page = self.paginate_queryset(self.__filtered_queryset)
776786
if page is not None:
777-
serializer = self.get_serializer(page, many=True)
787+
self._serializer_context['asset_ids_cache'] = []
788+
self._serializer_context['asset_uids_cache'] = []
789+
for asset in page:
790+
self._serializer_context['asset_ids_cache'].append(asset.pk)
791+
self._serializer_context['asset_uids_cache'].append(asset.uid)
792+
793+
serializer = self.get_serializer(
794+
self._attach_xforms_to_assets(page), many=True
795+
)
778796
metadata = None
779797
if request.GET.get('metadata') == 'on':
780-
metadata = self.get_metadata(self.__filtered_queryset)
798+
metadata = self.get_metadata(self._filtered_queryset)
781799
return self.get_paginated_response(serializer.data, metadata)
782800

783-
serializer = self.get_serializer(self.__filtered_queryset, many=True)
801+
serializer = self.get_serializer(self._filtered_queryset, many=True)
784802
return Response(serializer.data)
785803

786804
@action(detail=False, methods=['GET'])
@@ -861,8 +879,9 @@ def reports(self, request, *args, **kwargs):
861879
asset = self.get_object()
862880
if not asset.has_deployment:
863881
raise Http404
864-
serializer = ReportsDetailSerializer(asset,
865-
context=self.get_serializer_context())
882+
serializer = ReportsDetailSerializer(
883+
asset, context=self.get_serializer_context()
884+
)
866885
return Response(serializer.data)
867886

868887
@extend_schema(tags=['Form content'])
@@ -904,6 +923,39 @@ def xform(self, request, *args, **kwargs):
904923
def xls(self, request, *args, **kwargs):
905924
return self.table_view(self, request, *args, **kwargs)
906925

926+
def _attach_xforms_to_assets(self, assets: list):
927+
"""
928+
Attach the related XForm to each Asset and yield them to
929+
stay memory-efficient.
930+
"""
931+
932+
asset_uids = self._serializer_context['asset_uids_cache']
933+
xform_qs = (
934+
XForm.all_objects.filter(kpi_asset_uid__in=asset_uids)
935+
.only(
936+
'id_string',
937+
'num_of_submissions',
938+
'attachment_storage_bytes',
939+
'require_auth',
940+
'uuid',
941+
'mongo_uuid',
942+
'encrypted',
943+
'last_submission_time',
944+
'kpi_asset_uid',
945+
)
946+
.order_by()
947+
)
948+
949+
xforms_by_uid = {xf.kpi_asset_uid: xf for xf in xform_qs}
950+
951+
# Single pass over assets: attach _xform then yield
952+
for asset in assets:
953+
if asset.has_deployment:
954+
xf = xforms_by_uid.get(asset.uid, None)
955+
xf.user = asset.owner
956+
asset.deployment._xform = xf
957+
yield asset
958+
907959
def _bulk_asset_actions(self, data: dict) -> dict:
908960
params = {
909961
'data': data,

0 commit comments

Comments
 (0)