Skip to content

Commit

Permalink
Fix part of oppia#19636: Added beam jobs for user bio validation (opp…
Browse files Browse the repository at this point in the history
…ia#19993)

* added beam job

* linting

* Small Changes

* linting

* minor fixes

* use username instead user ID

* lint checks

* license date correction
  • Loading branch information
yashwardhan-jyani authored Apr 17, 2024
1 parent d21272a commit 71643d2
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 0 deletions.
91 changes: 91 additions & 0 deletions core/jobs/batch_jobs/user_validation_jobs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# coding: utf-8
#
# Copyright 2024 The Oppia Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Validation Jobs for user models."""

from __future__ import annotations

from core.jobs import base_jobs
from core.jobs.io import ndb_io
from core.jobs.transforms import job_result_transforms
from core.jobs.types import job_run_result
from core.platform import models

import apache_beam as beam

MYPY = False
if MYPY: # pragma: no cover
from mypy_imports import user_models

(user_models,) = models.Registry.import_models([models.Names.USER])


class GetUsersWithInvalidBioJob(base_jobs.JobBase):
"""Validates that no user has a null bio
or a bio with length greater than 2000.
"""

def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
user_usernames_and_bios = (
self.pipeline
| 'Get all UserSettingsModels' >> (
ndb_io.GetModels(user_models.UserSettingsModel.get_all()))
| 'Extract username and bio from model' >> beam.Map(
lambda user_settings: (
user_settings.username, user_settings.user_bio))
)

users_with_invalid_bios = (
user_usernames_and_bios
| 'Get users with null bio or bio with length greater than 2000' >>
beam.Filter(
lambda user_username_and_bio:
not isinstance(user_username_and_bio[1], str)
or len(user_username_and_bio[1]) > 2000)
)

number_of_users_queried_report = (
user_usernames_and_bios
| 'Report count of user models' >> (
job_result_transforms.CountObjectsToJobRunResult(
'CountTotalUsers'))
)

number_of_users_with_invalid_bio_report = (
users_with_invalid_bios
| 'Report count of invalid user models' >> (
job_result_transforms.CountObjectsToJobRunResult(
'CountInvalidUserBios'))
)

invalid_user_usernames_and_bios_report = (
users_with_invalid_bios
| 'Report info on each invalid user bio' >> beam.Map(
lambda user_username_and_bio:
job_run_result.JobRunResult.as_stderr(
'The username of user is "%s" and their bio is "%s"'
% (user_username_and_bio[0], user_username_and_bio[1])
))
)

return (
(
number_of_users_queried_report,
number_of_users_with_invalid_bio_report,
invalid_user_usernames_and_bios_report,
)
| 'Combine reported results' >> beam.Flatten()
)
99 changes: 99 additions & 0 deletions core/jobs/batch_jobs/user_validation_jobs_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# coding: utf-8
#
# Copyright 2024 The Oppia Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Unit tests for jobs.batch_jobs.user_validation_jobs."""

from __future__ import annotations

from core.jobs import job_test_utils
from core.jobs.batch_jobs import user_validation_jobs
from core.jobs.types import job_run_result
from core.platform import models

from typing import Final, Type

MYPY = False
if MYPY:
from mypy_imports import user_models

(user_models,) = models.Registry.import_models([models.Names.USER])


class GetUsersWithInvalidBioJobTests(job_test_utils.JobTestBase):

JOB_CLASS: Type[
user_validation_jobs.GetUsersWithInvalidBioJob
] = user_validation_jobs.GetUsersWithInvalidBioJob

USER_USERNAME_1: Final = 'user_1'
USER_USERNAME_2: Final = 'user_2'
USER_USERNAME_3: Final = 'user_3'

def test_empty_storage(self) -> None:
self.assert_job_output_is_empty()

def test_user_with_null_bio(self) -> None:
user = self.create_model(
user_models.UserSettingsModel,
username=self.USER_USERNAME_1,
email='[email protected]'
)
user.update_timestamps()
self.put_multi([user])

self.assert_job_output_is([
job_run_result.JobRunResult(
stderr='The username of user is "user_1"' +
' and their bio is "None"'),
job_run_result.JobRunResult(
stdout='CountInvalidUserBios SUCCESS: 1'),
job_run_result.JobRunResult(
stdout='CountTotalUsers SUCCESS: 1')
])

def test_user_with_valid_bio(self) -> None:
user = self.create_model(
user_models.UserSettingsModel,
username=self.USER_USERNAME_2,
email='[email protected]',
user_bio='Test Bio'
)
user.update_timestamps()
self.put_multi([user])

self.assert_job_output_is([
job_run_result.JobRunResult(
stdout='CountTotalUsers SUCCESS: 1')
])

def test_user_with_too_long_bio(self) -> None:
user = self.create_model(
user_models.UserSettingsModel,
username=self.USER_USERNAME_3,
email='[email protected]',
user_bio='Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis convallis ut felis eget fringilla. Phasellus congue quam et odio venenatis viverra. Maecenas pharetra, dui a convallis vestibulum, augue sem posuere enim, ac hendrerit nisl diam eu dolor. Sed mattis risus in quam mollis pellentesque. Proin mi nisl, dignissim at euismod in, fermentum ut nisi. Donec vel mauris ipsum. Nulla quis egestas nisl. Phasellus urna nisi, iaculis tincidunt dui volutpat, sagittis congue urna. Suspendisse commodo mollis sem, porttitor molestie justo laoreet in. Nullam facilisis dui sapien, et viverra lorem elementum vel.Integer tincidunt feugiat orci, vitae molestie erat facilisis eget. Nullam venenatis, tellus gravida varius condimentum, orci nunc ornare purus, quis pretium diam nulla vitae ipsum. In vel lorem consectetur, mattis enim a, varius magna. Nunc consequat nisl vel mi feugiat, a consequat turpis dapibus. Cras lectus magna, ullamcorper id orci vel, malesuada hendrerit enim. Nunc in quam felis. Proin posuere justo sed consectetur molestie. Quisque non aliquet magna. Vivamus non vulputate augue, quis placerat est.Nullam vel ornare arcu. Integer ornare est lacinia ligula vehicula efficitur. Aliquam varius elit sit amet eros vestibulum, eu pharetra justo maximus. Proin a sagittis felis, ac tempus ipsum. Cras egestas lorem quis ante ullamcorper, vitae accumsan sapien luctus. Nulla sodales elit sit amet dignissim ornare. In non porttitor tellus, sit amet interdum nisl. Vivamus ut lobortis lacus. Duis feugiat tempor eros vitae aliquet. Integer varius elit quis erat cursus, faucibus bibendum sapien varius. In ut luctus elit, bibendum posuere felis. Donec pretium enim id eleifend venenatis. Cras aliquet magna nec ante sodales, vel imperdiet velit posuere.Nunc nulla sem, condimentum sit amet tempor eu, pharetra at nulla. Nulla auctor pellentesque condimentum. In vestibulum, lectus nec pulvinar dignissim, elit quam viverra metus, ut fermentum ipsum dui ac mauris. Aliquam imperdiet dictum nulla, eget dignissim risus vehicula sit amet. Nam et blandit turpis, ut varius nulla. Mauris dui.' # pylint: disable=line-too-long
)
user.update_timestamps()
self.put_multi([user])

self.assert_job_output_is([
job_run_result.JobRunResult(
stderr='The username of user is "user_3" and their bio is "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis convallis ut felis eget fringilla. Phasellus congue quam et odio venenatis viverra. Maecenas pharetra, dui a convallis vestibulum, augue sem posuere enim, ac hendrerit nisl diam eu dolor. Sed mattis risus in quam mollis pellentesque. Proin mi nisl, dignissim at euismod in, fermentum ut nisi. Donec vel mauris ipsum. Nulla quis egestas nisl. Phasellus urna nisi, iaculis tincidunt dui volutpat, sagittis congue urna. Suspendisse commodo mollis sem, porttitor molestie justo laoreet in. Nullam facilisis dui sapien, et viverra lorem elementum vel.Integer tincidunt feugiat orci, vitae molestie erat facilisis eget. Nullam venenatis, tellus gravida varius condimentum, orci nunc ornare purus, quis pretium diam nulla vitae ipsum. In vel lorem consectetur, mattis enim a, varius magna. Nunc consequat nisl vel mi feugiat, a consequat turpis dapibus. Cras lectus magna, ullamcorper id orci vel, malesuada hendrerit enim. Nunc in quam felis. Proin posuere justo sed consectetur molestie. Quisque non aliquet magna. Vivamus non vulputate augue, quis placerat est.Nullam vel ornare arcu. Integer ornare est lacinia ligula vehicula efficitur. Aliquam varius elit sit amet eros vestibulum, eu pharetra justo maximus. Proin a sagittis felis, ac tempus ipsum. Cras egestas lorem quis ante ullamcorper, vitae accumsan sapien luctus. Nulla sodales elit sit amet dignissim ornare. In non porttitor tellus, sit amet interdum nisl. Vivamus ut lobortis lacus. Duis feugiat tempor eros vitae aliquet. Integer varius elit quis erat cursus, faucibus bibendum sapien varius. In ut luctus elit, bibendum posuere felis. Donec pretium enim id eleifend venenatis. Cras aliquet magna nec ante sodales, vel imperdiet velit posuere.Nunc nulla sem, condimentum sit amet tempor eu, pharetra at nulla. Nulla auctor pellentesque condimentum. In vestibulum, lectus nec pulvinar dignissim, elit quam viverra metus, ut fermentum ipsum dui ac mauris. Aliquam imperdiet dictum nulla, eget dignissim risus vehicula sit amet. Nam et blandit turpis, ut varius nulla. Mauris dui."'), # pylint: disable=line-too-long
job_run_result.JobRunResult(
stdout='CountInvalidUserBios SUCCESS: 1'),
job_run_result.JobRunResult(
stdout='CountTotalUsers SUCCESS: 1')
])
1 change: 1 addition & 0 deletions core/jobs/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
# of every job.
from core.jobs.batch_jobs import blog_post_search_indexing_jobs # pylint: disable=unused-import # isort: skip
from core.jobs.batch_jobs import blog_validation_jobs # pylint: disable=unused-import # isort: skip
from core.jobs.batch_jobs import user_validation_jobs # pylint: disable=unused-import # isort: skip
from core.jobs.batch_jobs import collection_info_jobs # pylint: disable=unused-import # isort: skip
from core.jobs.batch_jobs import email_deletion_jobs # pylint: disable=unused-import # isort: skip
from core.jobs.batch_jobs import exp_migration_jobs # pylint: disable=unused-import # isort: skip
Expand Down
1 change: 1 addition & 0 deletions scripts/backend_test_shards.json
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@
"core.jobs.batch_jobs.blog_validation_jobs_test",
"core.jobs.transforms.validation.blog_validation_test",
"core.jobs.transforms.validation.skill_validation_test",
"core.jobs.batch_jobs.user_validation_jobs_test",
"core.jobs.transforms.validation.story_validation_test",
"core.tests.load_tests.feedback_thread_summaries_test",
"core.tests.test_utils_test",
Expand Down
1 change: 1 addition & 0 deletions scripts/linters/test_files/valid_job_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from __future__ import annotations
from core.jobs.batch_jobs import blog_post_search_indexing_jobs # pylint: disable=unused-import # isort: skip
from core.jobs.batch_jobs import blog_validation_jobs # pylint: disable=unused-import # isort: skip
from core.jobs.batch_jobs import user_validation_jobs # pylint: disable=unused-import # isort: skip
from core.jobs.batch_jobs import collection_info_jobs # pylint: disable=unused-import # isort: skip
from core.jobs.batch_jobs import email_deletion_jobs # pylint: disable=unused-import # isort: skip
from core.jobs.batch_jobs import exp_migration_jobs # pylint: disable=unused-import # isort: skip
Expand Down

0 comments on commit 71643d2

Please sign in to comment.