From bc30d188c54f167fea05bc41c29b3dbe3b0ea297 Mon Sep 17 00:00:00 2001 From: Alex Dusenbery Date: Tue, 12 Dec 2023 10:14:35 -0500 Subject: [PATCH] fix: compare emails case-insensitively when setting assignment lms_user_ids ENT-8129 | The email stored on assignment records might be only a case-insensitive match to the corresponding email in the core.User records, so do case-insensitive matching during the assignment flow and during the post-user-save signal that updates assignment ``lms_user_ids``. --- .../apps/content_assignments/api.py | 23 +++++++++++++------ .../apps/content_assignments/signals.py | 2 +- .../content_assignments/tests/test_api.py | 2 +- .../content_assignments/tests/test_signals.py | 4 ++-- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/enterprise_access/apps/content_assignments/api.py b/enterprise_access/apps/content_assignments/api.py index 4881d78a..fb919563 100644 --- a/enterprise_access/apps/content_assignments/api.py +++ b/enterprise_access/apps/content_assignments/api.py @@ -8,7 +8,8 @@ from typing import Iterable from django.db import transaction -from django.db.models import Sum +from django.db.models import Q, Sum +from django.db.models.functions import Lower from opaque_keys import InvalidKeyError from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.locator import CourseLocator @@ -33,7 +34,7 @@ # * Divide result by 10 in case we are off by an order of magnitude. # # Batch size derivation formula: ((1 MB) / (258 B)) / 10 ≈ 350 -USER_EMAIL_READ_BATCH_SIZE = 350 +USER_EMAIL_READ_BATCH_SIZE = 100 class AllocationException(Exception): @@ -392,14 +393,22 @@ def _try_populate_assignments_lms_user_id(assignments): for assignment_chunk in chunks(assignments_with_empty_lms_user_id, USER_EMAIL_READ_BATCH_SIZE): emails = [assignment.learner_email for assignment in assignment_chunk] # Construct a list of tuples containing (email, lms_user_id) for every assignment in this chunk. - email_lms_user_id = User.objects.filter( - email__in=emails, # this is the part that could exceed max statement length if batch size is too large. - lms_user_id__isnull=False, - ).values_list('email', 'lms_user_id') + # this is the part that could exceed max statement length if batch size is too large. + # There's no case-insensitive IN query in Django, so we have to build up a big + # OR type of query. + email_filter = Q() + for assignment_email in emails: + email_filter |= Q(email__iexact=assignment_email) + + email_lms_user_id = User.objects.filter(email_filter, lms_user_id__isnull=False).annotate( + email_lower=Lower('email'), + ).values_list('email_lower', 'lms_user_id') + # dict() on a list of 2-tuples treats the first elements as keys and second elements as values. lms_user_id_by_email = dict(email_lms_user_id) + for assignment in assignment_chunk: - lms_user_id = lms_user_id_by_email.get(assignment.learner_email) + lms_user_id = lms_user_id_by_email.get(assignment.learner_email.lower()) if lms_user_id: assignment.lms_user_id = lms_user_id assignments_to_save.append(assignment) diff --git a/enterprise_access/apps/content_assignments/signals.py b/enterprise_access/apps/content_assignments/signals.py index 95a3b40b..f5aae4d5 100644 --- a/enterprise_access/apps/content_assignments/signals.py +++ b/enterprise_access/apps/content_assignments/signals.py @@ -20,7 +20,7 @@ def update_assignment_lms_user_id_from_user_email(sender, **kwargs): # pylint: user = kwargs['instance'] if user.lms_user_id: assignments_to_update = LearnerContentAssignment.objects.filter( - learner_email=user.email, + learner_email__iexact=user.email, lms_user_id=None, ) diff --git a/enterprise_access/apps/content_assignments/tests/test_api.py b/enterprise_access/apps/content_assignments/tests/test_api.py index c06c4f32..cd31a0e1 100644 --- a/enterprise_access/apps/content_assignments/tests/test_api.py +++ b/enterprise_access/apps/content_assignments/tests/test_api.py @@ -467,7 +467,7 @@ def test_allocate_assignments_set_lms_user_id( } if user_exists: - UserFactory(username='alice', email=learner_email, lms_user_id=lms_user_id) + UserFactory(username='alice', email=learner_email.upper(), lms_user_id=lms_user_id) assignment = None if existing_assignment_state: diff --git a/enterprise_access/apps/content_assignments/tests/test_signals.py b/enterprise_access/apps/content_assignments/tests/test_signals.py index 744db686..723b2960 100644 --- a/enterprise_access/apps/content_assignments/tests/test_signals.py +++ b/enterprise_access/apps/content_assignments/tests/test_signals.py @@ -41,8 +41,8 @@ def test_update_assignment_lms_user_id_from_user_email_login(self): test_user = UserFactory(email=TEST_EMAIL) # Simulate creating asignments for the test learner AFTER user creation. assignments_post_register = [ - LearnerContentAssignmentFactory(learner_email=TEST_EMAIL, lms_user_id=None), - LearnerContentAssignmentFactory(learner_email=TEST_EMAIL, lms_user_id=None), + LearnerContentAssignmentFactory(learner_email=TEST_EMAIL.upper(), lms_user_id=None), + LearnerContentAssignmentFactory(learner_email=TEST_EMAIL.upper(), lms_user_id=None), ] # Simulate the learner logging in. test_user.last_login = timezone.now()