Skip to content

Commit

Permalink
Add test case for duplicate submission with an attachment and improve…
Browse files Browse the repository at this point in the history
… in logic to extract the UUID from xml
  • Loading branch information
rajpatel24 committed Aug 29, 2024
1 parent 7630294 commit bf597d5
Show file tree
Hide file tree
Showing 22 changed files with 109 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def test_post_submission_authenticated_bad_json(self):
self.assertTrue('error' in rendered_response.data)
self.assertTrue(
rendered_response.data['error'].startswith(
'Received empty submission'
'Instance ID is required'
)
)
self.assertTrue(rendered_response.status_code == 400)
Expand Down Expand Up @@ -513,7 +513,7 @@ def test_post_concurrent_same_submissions(self):
results[result] += 1

assert results[status.HTTP_201_CREATED] == 1
assert results[status.HTTP_409_CONFLICT] == DUPLICATE_SUBMISSIONS_COUNT - 1
assert results[status.HTTP_202_ACCEPTED] == DUPLICATE_SUBMISSIONS_COUNT - 1


def submit_data(identifier, survey_, username_, live_server_url, token_):
Expand Down
9 changes: 5 additions & 4 deletions kobo/apps/openrosa/apps/logger/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ class BuildDbQueriesNoConfirmationProvidedError(Exception):
pass


class ConflictingXMLHashInstanceError(Exception):
pass


class DuplicateInstanceError(Exception):
def __init__(self, message='Duplicate Instance'):
super().__init__(message)
Expand All @@ -37,6 +33,11 @@ def __init__(self, message='Could not determine the user'):
super().__init__(message)


class InstanceIdMissingError(Exception):
def __init__(self, message='Could not determine the instance ID'):
super().__init__(message)


class InstanceMultipleNodeError(Exception):
pass

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,7 @@
</kids>
<gps>-1.2627557 36.7926442 0.0 30.0</gps>
<web_browsers>chrome ie</web_browsers>
<meta>
<instanceID>uuid:364f173c688e482486a48661700466gg</instanceID>
</meta>
</new_repeats>
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,7 @@
</kids>
<gps>-1.2627557 36.7926442 0.0 30.0</gps>
<web_browsers>chrome ie</web_browsers>
<meta>
<instanceID>uuid:364f173c688e482486a48661700466gg</instanceID>
</meta>
</new_repeats>
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<?xml version='1.0' ?><survey_names id="survey_names"><start>2012-08-17T11:24:53.254+03</start><name>HD</name><end>2012-08-17T11:24:57.897+03</end></survey_names>
<?xml version='1.0' ?><survey_names id="survey_names"><start>2012-08-17T11:24:53.254+03</start><name>HD</name><end>2012-08-17T11:24:57.897+03</end><meta><instanceID>uuid:729f173c688e482486a48661700455ff</instanceID></meta></survey_names>
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,7 @@
<has_children>0</has_children>
<gps>-1.2836198 36.8795437 0.0 1044.0</gps>
<web_browsers>firefox chrome safari</web_browsers>
<meta>
<instanceID>uuid:364f173c688e482486a48661700466gg</instanceID>
</meta>
</tutorial>
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@
<has_children>0</has_children>
<gps>-1.2836198 36.8795437 0.0 1044.0</gps>
<web_browsers>firefox chrome safari</web_browsers>
<meta>
<instanceID>uuid:639f173c688e482486a48661700456ty</instanceID>
</meta>
</tutorial>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version='1.0' ?>
<tutorial id="tutorial">
<name>Larry
Again
</name><!-- newline to make sure we can handle it -->
<age>23</age>
<picture>1335783522563.jpg</picture>
<has_children>0</has_children>
<gps>-1.2836198 36.8795437 0.0 1044.0</gps>
<web_browsers>firefox chrome safari</web_browsers>
<meta>
<instanceID>uuid:729f173c688e482486a48661700455ff</instanceID>
</meta>
</tutorial>
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<?xml version='1.0' ?><Water id="Water_2011_03_17"><research_assistant_name>Andrew</research_assistant_name><photo>1300375832136.jpg</photo><location><gps>9.045921564102173 7.526530623435974 572.0 48.0</gps><zone>northwest</zone><state_in_northwest>kebbi</state_in_northwest><lga_in_kebbi>bunza</lga_in_kebbi><ward>R</ward><community>Y</community></location><respondent><name>R</name><position>R</position><phone_number>4</phone_number></respondent><locality>T</locality><nearest_school_or_clinic>R</nearest_school_or_clinic><pre_existing_id>n/a</pre_existing_id><type>borehole</type><distribution_type>single_point</distribution_type><lift_mechanism>diesel</lift_mechanism><developed_by>federal_government</developed_by><managed_by>federal_government state_government</managed_by><managing_organizations><federal_government><name>T</name></federal_government><state_government><name>Y</name></state_government><local_government /><community /><individual /><international_development_partner /><private_organization /></managing_organizations><pay_for_water>no</pay_for_water><used_today>no</used_today><reasons_not_used>no_diesel missing_stolen_parts</reasons_not_used><physical_state>well_maintained</physical_state><other_close_water>yes</other_close_water><times_used>year_round</times_used><help_completed>nobody</help_completed><thank_you /></Water>
<?xml version='1.0' ?><Water id="Water_2011_03_17"><research_assistant_name>Andrew</research_assistant_name><photo>1300375832136.jpg</photo><location><gps>9.045921564102173 7.526530623435974 572.0 48.0</gps><zone>northwest</zone><state_in_northwest>kebbi</state_in_northwest><lga_in_kebbi>bunza</lga_in_kebbi><ward>R</ward><community>Y</community></location><respondent><name>R</name><position>R</position><phone_number>4</phone_number></respondent><locality>T</locality><nearest_school_or_clinic>R</nearest_school_or_clinic><pre_existing_id>n/a</pre_existing_id><type>borehole</type><distribution_type>single_point</distribution_type><lift_mechanism>diesel</lift_mechanism><developed_by>federal_government</developed_by><managed_by>federal_government state_government</managed_by><managing_organizations><federal_government><name>T</name></federal_government><state_government><name>Y</name></state_government><local_government /><community /><individual /><international_development_partner /><private_organization /></managing_organizations><pay_for_water>no</pay_for_water><used_today>no</used_today><reasons_not_used>no_diesel missing_stolen_parts</reasons_not_used><physical_state>well_maintained</physical_state><other_close_water>yes</other_close_water><times_used>year_round</times_used><help_completed>nobody</help_completed><meta><instanceID>uuid:435f173c688e482486a48661700467gh</instanceID></meta><thank_you /></Water>
Binary file not shown.
2 changes: 1 addition & 1 deletion kobo/apps/openrosa/apps/logger/tests/test_backup_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_date_created_override(self):
"""
xml_file_path = os.path.join(
settings.OPENROSA_APP_DIR, "apps", "logger", "fixtures",
"tutorial", "instances", "tutorial_2012-06-27_11-27-53.xml")
"tutorial", "instances", "tutorial_2012-06-27_11-27-53_w_uuid.xml")
xml_file = django_file(
xml_file_path, field_name="xml_file", content_type="text/xml")
media_files = []
Expand Down
44 changes: 38 additions & 6 deletions kobo/apps/openrosa/apps/logger/tests/test_form_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from kobo.apps.openrosa.apps.main.models.user_profile import UserProfile
from kobo.apps.openrosa.apps.main.tests.test_base import TestBase
from kobo.apps.openrosa.apps.logger.models import Instance
from kobo.apps.openrosa.apps.logger.models import Instance, Attachment
from kobo.apps.openrosa.apps.logger.models.instance import InstanceHistory
from kobo.apps.openrosa.apps.logger.xform_instance_parser import clean_and_parse_xml
from kobo.apps.openrosa.apps.viewer.models.parsed_instance import ParsedInstance
Expand All @@ -36,7 +36,7 @@ def test_form_post(self):
"""
xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml"
)

self._make_submission(xml_submission_file_path)
Expand Down Expand Up @@ -114,7 +114,7 @@ def test_submission_to_not_required_auth_as_anonymous_user(self):

xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml"
)

# Anonymous should be able to submit data
Expand Down Expand Up @@ -154,7 +154,7 @@ def test_submission_to_require_auth_with_perm(self):

xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml"
)
self._make_submission(xml_submission_file_path, auth=auth)
self.assertEqual(self.response.status_code, 201)
Expand Down Expand Up @@ -252,6 +252,38 @@ def test_duplicate_submission_with_different_content(self):
another_inst = Instance.objects.order_by('pk').last()
self.assertEqual(inst.xml, another_inst.xml)

def test_duplicate_submission_with_same_content_but_with_attachment(self):
"""
Test that submitting the same XML content twice,
first without and then with an attachment,
results in a single instance with the attachment added.
"""
xml_submission_file_path = os.path.join(
os.path.dirname(__file__),
"../fixtures/tutorial/instances/tutorial_with_attachment",
"tutorial_2012-06-27_11-27-53_w_attachment.xml"
)
media_file_path = os.path.join(
os.path.dirname(__file__),
"../fixtures/tutorial/instances/tutorial_with_attachment",
"1335783522563.jpg"
)
initial_instance_count = Instance.objects.count()

# Test submission with XML file
self._make_submission(xml_submission_file_path)
initial_instance = Instance.objects.last()
self.assertEqual(self.response.status_code, 201)
self.assertEqual(Instance.objects.count(), initial_instance_count + 1)

# Test duplicate submission with attachment
with open(media_file_path, 'rb') as media_file:
self._make_submission(xml_submission_file_path, media_file=media_file)
self.assertEqual(self.response.status_code, 201)
self.assertEqual(Instance.objects.count(), initial_instance_count + 1)
self.assertEqual(Attachment.objects.filter(instance=initial_instance).count(), 1)


def test_owner_can_edit_submissions(self):
xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
Expand Down Expand Up @@ -393,7 +425,7 @@ def test_submission_when_requires_auth(self):

xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml"
)
auth = DigestAuth('alice', 'alice')
self._make_submission(
Expand All @@ -410,7 +442,7 @@ def test_submission_linked_to_reporter(self):

xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml"
)
auth = DigestAuth('alice', 'alice')
self._make_submission(
Expand Down
8 changes: 6 additions & 2 deletions kobo/apps/openrosa/apps/logger/tests/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ def test_parse_xform_nested_repeats(self):
'has_kids': '1'
},
'web_browsers': 'chrome ie',
'gps': '-1.2627557 36.7926442 0.0 30.0'
'gps': '-1.2627557 36.7926442 0.0 30.0',
'meta': {
'instanceID': 'uuid:364f173c688e482486a48661700466gg'
}
}
}
self.assertEqual(dict_, expected_dict)
Expand All @@ -83,7 +86,8 @@ def test_parse_xform_nested_repeats(self):
'kids/has_kids': '1',
'info/age': '80',
'web_browsers': 'chrome ie',
'info/name': 'Adam'
'info/name': 'Adam',
'meta/instanceID': 'uuid:364f173c688e482486a48661700466gg'
}
self.assertEqual(flat_dict, expected_flat_dict)

Expand Down
15 changes: 6 additions & 9 deletions kobo/apps/openrosa/apps/logger/xform_instance_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,12 @@ def get_meta_from_xml(xml_str: str, meta_name: str) -> str:

def get_uuid_from_xml(xml):

def _uuid_only(uuid, regex):
matches = regex.match(uuid)
if matches and len(matches.groups()) > 0:
return matches.groups()[0]
return None
def _uuid_only(uuid):
return re.sub(r'^uuid:', '', uuid)

uuid = get_meta_from_xml(xml, "instanceID")
regex = re.compile(r"uuid:(.*)")
if uuid:
return _uuid_only(uuid, regex)
return _uuid_only(uuid)
# check in survey_node attributes
xml = clean_and_parse_xml(xml)
children = xml.childNodes
Expand All @@ -74,12 +71,12 @@ def _uuid_only(uuid, regex):
survey_node = children[0]
uuid = survey_node.getAttribute('instanceID')
if uuid != '':
return _uuid_only(uuid, regex)
return _uuid_only(uuid)
return None


def get_root_uuid_from_xml(xml):
root_uuid = get_meta_from_xml(xml, "rootUuid")
root_uuid = get_meta_from_xml(xml, 'rootUuid')
if root_uuid:
return root_uuid

Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<?xml version='1.0' ?><gps id="gps"><location>40.81101715564728 -73.96446704864502 -152.0 16.0</location></gps>
<?xml version='1.0' ?><gps id="gps"><location>40.81101715564728 -73.96446704864502 -152.0 16.0</location><meta><instanceID>uuid:729f173c688e482486a48661700455ff</instanceID></meta></gps>
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<?xml version='1.0' ?><gps id="gps"><location>40.811086893081665 -73.96449387073517 -113.0 16.0</location></gps>
<?xml version='1.0' ?><gps id="gps"><location>40.811086893081665 -73.96449387073517 -113.0 16.0</location><meta><instanceID>uuid:435f173c688e482486a48661700467gh</instanceID></meta></gps>
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@
<gps>-1.2627107 36.7925771 0.0 37.5</gps>
</gps_group>
<web_browsers>chrome safari</web_browsers>
<meta>
<instanceID>uuid:435f173c688e482486a486617004534df</instanceID>
</meta>
</grouped_gps>
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,7 @@
</kids>
<gps>-1.2627557 36.7926442 0.0 30.0</gps>
<web_browsers>chrome ie</web_browsers>
<meta>
<instanceID>uuid:435f173c688e482463a486617004534df</instanceID>
</meta>
</new_repeats>
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,7 @@
</kids>
<gps>-1.2627557 36.7926442 0.0 30.0</gps>
<web_browsers>chrome ie firefox</web_browsers>
<meta>
<instanceID>uuid:32sd3c688e482486a48661700455ff</instanceID>
</meta>
</new_repeats>
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def test_csv_columns_for_gps_within_groups(self):
'gps_group/_gps_longitude',
'gps_group/_gps_altitude',
'gps_group/_gps_precision',
'meta/instanceID',
'web_browsers/firefox',
'web_browsers/chrome',
'web_browsers/ie',
Expand Down Expand Up @@ -246,6 +247,7 @@ def test_format_mongo_data_for_csv(self):
'kids/kids_details[1]/kids_age': '50',
'kids/kids_details[2]/kids_name': 'Cain',
'kids/kids_details[2]/kids_age': '76',
'meta/instanceID': 'uuid:435f173c688e482463a486617004534df',
'web_browsers/chrome': True,
'web_browsers/ie': True,
'web_browsers/safari': False,
Expand Down
19 changes: 10 additions & 9 deletions kobo/apps/openrosa/libs/utils/logger_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@
from wsgiref.util import FileWrapper

from kobo.apps.openrosa.apps.logger.exceptions import (
ConflictingXMLHashInstanceError,
DuplicateUUIDError,
FormInactiveError,
InstanceIdMissingError,
TemporarilyUnavailableError,
)
from kobo.apps.openrosa.apps.logger.models import Attachment, Instance, XForm
Expand Down Expand Up @@ -168,10 +168,12 @@ def create_instance(

# get new and deprecated uuid's
new_uuid = get_uuid_from_xml(xml)
if not new_uuid:
raise InstanceIdMissingError()

with get_instance_lock(xml_hash, new_uuid, xform.id) as lock_acquired:
if not lock_acquired:
raise ConflictingXMLHashInstanceError()
raise DuplicateInstanceError()
# Dorey's rule from 2012 (commit 890a67aa):
# Ignore submission as a duplicate IFF
# * a submission's XForm collects start time
Expand All @@ -184,13 +186,13 @@ def create_instance(
# and still exactly matches an existing submission, it's certainly a
# duplicate (https://docs.opendatakit.org/openrosa-metadata/#fields).

if xform.has_start_time or new_uuid is not None:
if xform.has_start_time or new_uuid:
# XML matches are identified by identical content hash OR, when a
# content hash is not present, by string comparison of the full
# content, which is slow! Use the management command
# `populate_xml_hashes_for_instances` to hash existing submissions
existing_instance = Instance.objects.filter(
Q(xml_hash=xml_hash) | Q(xml_hash=Instance.DEFAULT_XML_HASH, xml=xml),
xml_hash=xml_hash,
xform__user=xform.user,
).first()
else:
Expand Down Expand Up @@ -591,6 +593,10 @@ def safe_create_instance(
error = OpenRosaResponseBadRequest(
t("Received empty submission. No instance was created")
)
except InstanceIdMissingError:
error = OpenRosaResponseBadRequest(
t("Instance ID is required")
)
except FormInactiveError:
error = OpenRosaResponseNotAllowed(t("Form is not active"))
except TemporarilyUnavailableError:
Expand All @@ -601,11 +607,6 @@ def safe_create_instance(
)
except ExpatError as e:
error = OpenRosaResponseBadRequest(t("Improperly formatted XML."))
except ConflictingXMLHashInstanceError:
response = OpenRosaResponse(t('Conflict with already existing instance'))
response.status_code = 409
response['Location'] = request.build_absolute_uri(request.path)
error = response
except DuplicateInstanceError:
response = OpenRosaResponse(t('Duplicate instance'))
response.status_code = 202
Expand Down

0 comments on commit bf597d5

Please sign in to comment.