generated from CDCgov/template
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into fix/199-concate-given-name
- Loading branch information
Showing
2 changed files
with
158 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,8 +52,8 @@ def test_model_construct(self): | |
"type": "DL", | ||
"value": "D1234567", | ||
"authority": "VA", | ||
} | ||
] | ||
}, | ||
], | ||
} | ||
record = pii.PIIRecord.model_construct(**data) | ||
assert record.birth_date == "1980-2-1" | ||
|
@@ -72,7 +72,7 @@ def test_model_construct(self): | |
assert record.address[1].postal_code == "98765-4321" | ||
assert record.address[1].county == "county2" | ||
|
||
#identifiers | ||
# identifiers | ||
assert str(record.identifiers[0].type) == "MR" | ||
assert record.identifiers[0].value == "99" | ||
|
||
|
@@ -129,14 +129,14 @@ def test_parse_sex(self): | |
def test_parse_ssn(self): | ||
record = pii.PIIRecord(identifiers=[pii.Identifier(type="SS", value="123-45-6789")]) | ||
assert record.identifiers[0].value == "123-45-6789" | ||
#testing extra spaces | ||
# testing extra spaces | ||
record = pii.PIIRecord(identifiers=[pii.Identifier(type="SS", value=" 123-45-6789 ")]) | ||
assert record.identifiers[0].value == "123-45-6789" | ||
#testing no dashes | ||
# testing no dashes | ||
record = pii.PIIRecord(identifiers=[pii.Identifier(type="SS", value="123456789")]) | ||
assert record.identifiers[0].value == "123-45-6789" | ||
record = pii.PIIRecord(identifiers=[pii.Identifier(type="SS", value="1-2-3")]) | ||
assert record.identifiers[0].value == '' | ||
assert record.identifiers[0].value == "" | ||
record = pii.PIIRecord() | ||
assert record.identifiers == [] | ||
|
||
|
@@ -228,49 +228,91 @@ def test_feature_iter(self): | |
"type": "DL", | ||
"value": "D1234567", | ||
"authority": "VA", | ||
} | ||
}, | ||
], | ||
) | ||
|
||
with pytest.raises(ValueError): | ||
list(record.feature_iter("external_id")) | ||
|
||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.BIRTHDATE))) == ["1980-02-01"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.BIRTHDATE))) == [ | ||
"1980-02-01" | ||
] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.SEX))) == ["M"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.ADDRESS))) == ["123 Main St", "456 Elm St"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.CITY))) == ["Anytown", "Somecity"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.STATE))) == ["NY", "CA"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.ZIP))) == ["12345", "98765"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.GIVEN_NAME))) == ["John L", "Jane"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.FIRST_NAME))) == ["John", "Jane"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.LAST_NAME))) == ["Doe", "Smith"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.ADDRESS))) == [ | ||
"123 Main St", | ||
"456 Elm St", | ||
] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.CITY))) == [ | ||
"Anytown", | ||
"Somecity", | ||
] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.STATE))) == [ | ||
"NY", | ||
"CA", | ||
] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.ZIP))) == [ | ||
"12345", | ||
"98765", | ||
] | ||
assert list( | ||
record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.GIVEN_NAME)) | ||
) == ["John L", "Jane"] | ||
assert list( | ||
record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.FIRST_NAME)) | ||
) == ["John", "Jane"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.LAST_NAME))) == [ | ||
"Doe", | ||
"Smith", | ||
] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.RACE))) == [] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.TELECOM))) == [ | ||
"555-123-4567", | ||
"(555) 987-6543", | ||
"[email protected]", | ||
] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.PHONE))) == ["5559876543"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.EMAIL))) == ["[email protected]"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.SUFFIX))) == ["suffix", "suffix2"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.COUNTY))) == ["county"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.IDENTIFIER))) == ["MR::123456", "SS::123-45-6789", "DL:VA:D1234567"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.PHONE))) == [ | ||
"5559876543" | ||
] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.EMAIL))) == [ | ||
"[email protected]" | ||
] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.SUFFIX))) == [ | ||
"suffix", | ||
"suffix2", | ||
] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.COUNTY))) == [ | ||
"county" | ||
] | ||
assert list( | ||
record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.IDENTIFIER)) | ||
) == ["123456::MR", "123-45-6789::SS", "D1234567:VA:DL"] | ||
|
||
# IDENTIFIER with suffix | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.IDENTIFIER, suffix="MR"))) == ["MR::123456"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.IDENTIFIER, suffix="SS"))) == ["SS::123-45-6789"] | ||
assert list( | ||
record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.IDENTIFIER, suffix="MR")) | ||
) == ["123456::MR"] | ||
assert list( | ||
record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.IDENTIFIER, suffix="SS")) | ||
) == ["123-45-6789::SS"] | ||
|
||
# Other fields work okay, few more checks on difference race yield values | ||
record = pii.PIIRecord(race="asked unknown") | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.RACE))) == [] | ||
record = pii.PIIRecord(race="asked but unknown") | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.RACE))) == [] | ||
record = pii.PIIRecord(race="asian") | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.RACE))) == ["ASIAN"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.RACE))) == [ | ||
"ASIAN" | ||
] | ||
record = pii.PIIRecord(race="african american") | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.RACE))) == ["BLACK"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.RACE))) == [ | ||
"BLACK" | ||
] | ||
record = pii.PIIRecord(race="white") | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.RACE))) == ["WHITE"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.RACE))) == [ | ||
"WHITE" | ||
] | ||
|
||
def test_feature_iter_given_name(self): | ||
record = pii.PIIRecord( | ||
|
@@ -280,9 +322,12 @@ def test_feature_iter_given_name(self): | |
], | ||
) | ||
|
||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.GIVEN_NAME))) == ["John L", "Jon Lewis Doe"] | ||
assert list(record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.FIRST_NAME))) == ["John", "Jon"] | ||
|
||
assert list( | ||
record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.GIVEN_NAME)) | ||
) == ["John L", "Jon Lewis Doe"] | ||
assert list( | ||
record.feature_iter(pii.Feature(attribute=pii.FeatureAttribute.FIRST_NAME)) | ||
) == ["John", "Jon"] | ||
|
||
def test_blocking_keys_invalid(self): | ||
rec = pii.PIIRecord() | ||
|
@@ -312,17 +357,19 @@ def test_blocking_keys_mrn_last_four(self): | |
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == set() | ||
rec = pii.PIIRecord(**{"identifiers": []}) | ||
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == set() | ||
rec = pii.PIIRecord(**{"identifiers": [{ "type": "MR", "value": "123456789" }]}) | ||
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"MR::6789"} | ||
rec = pii.PIIRecord(**{"identifiers": [{ "type": "MR", "value": "89" }]}) | ||
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"MR::89"} | ||
|
||
#test multiple identifiers return correctly | ||
rec = pii.PIIRecord(identifiers=[ | ||
pii.Identifier(type="MR", value="123456789"), | ||
pii.Identifier(type="SS", value="123456789"), | ||
]) | ||
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"MR::6789", "SS::6789"} | ||
rec = pii.PIIRecord(**{"identifiers": [{"type": "MR", "value": "123456789"}]}) | ||
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"6789::MR"} | ||
rec = pii.PIIRecord(**{"identifiers": [{"type": "MR", "value": "89"}]}) | ||
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"89::MR"} | ||
|
||
# test multiple identifiers return correctly | ||
rec = pii.PIIRecord( | ||
identifiers=[ | ||
pii.Identifier(type="MR", value="123456789"), | ||
pii.Identifier(type="SS", value="123456789"), | ||
] | ||
) | ||
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"6789::MR", "6789::SS"} | ||
|
||
def test_blocking_keys_sex(self): | ||
rec = pii.PIIRecord(**{"gender": "M"}) | ||
|
@@ -398,30 +445,62 @@ def test_blocking_keys_phone_last_four(self): | |
assert rec.blocking_keys(BlockingKey.PHONE) == set() | ||
rec = pii.PIIRecord(**{"telecom": [{"value": "(555) 123-4567", "system": "phone"}]}) | ||
assert rec.blocking_keys(BlockingKey.PHONE) == {"4567"} | ||
rec = pii.PIIRecord(**{"telecom": [{"value": "555.123.4567", "system": "phone"}, {"value": "555-987-6543 ext 123", "system": "phone"}]}) | ||
rec = pii.PIIRecord( | ||
**{ | ||
"telecom": [ | ||
{"value": "555.123.4567", "system": "phone"}, | ||
{"value": "555-987-6543 ext 123", "system": "phone"}, | ||
] | ||
} | ||
) | ||
assert rec.blocking_keys(BlockingKey.PHONE) == {"4567", "6543"} | ||
rec = pii.PIIRecord(**{"telecom": [{"value": "555.123.4567", "system": "phone"}, {"value": "555-987-6543", "system": "fax"}]}) | ||
rec = pii.PIIRecord( | ||
**{ | ||
"telecom": [ | ||
{"value": "555.123.4567", "system": "phone"}, | ||
{"value": "555-987-6543", "system": "fax"}, | ||
] | ||
} | ||
) | ||
assert rec.blocking_keys(BlockingKey.PHONE) == {"4567"} | ||
|
||
def test_blocking_keys_email_first_four(self): | ||
rec = pii.PIIRecord(**{"email": "[email protected]"}) | ||
assert rec.blocking_keys(BlockingKey.EMAIL) == set() | ||
rec = pii.PIIRecord(**{"telecom": [{"value": "[email protected]", "system": "email"}]}) | ||
assert rec.blocking_keys(BlockingKey.EMAIL) == {"test"} | ||
rec = pii.PIIRecord(**{"telecom": [{"value": "[email protected]", "system": "email"}, {"value": "[email protected]", "system": "email"}]}) | ||
rec = pii.PIIRecord( | ||
**{ | ||
"telecom": [ | ||
{"value": "[email protected]", "system": "email"}, | ||
{"value": "[email protected]", "system": "email"}, | ||
] | ||
} | ||
) | ||
assert rec.blocking_keys(BlockingKey.EMAIL) == {"test", "bob@"} | ||
rec = pii.PIIRecord(**{"telecom": [{"value": "[email protected]", "system": "email"}, {"value": "[email protected]", "system": "other"}]}) | ||
rec = pii.PIIRecord( | ||
**{ | ||
"telecom": [ | ||
{"value": "[email protected]", "system": "email"}, | ||
{"value": "[email protected]", "system": "other"}, | ||
] | ||
} | ||
) | ||
assert rec.blocking_keys(BlockingKey.EMAIL) == {"t@gm"} | ||
|
||
def test_blocking_keys_identifier(self): | ||
rec = pii.PIIRecord(**{"identifiers": []}) | ||
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == set() | ||
rec = pii.PIIRecord(**{"identifiers": [{"type": "MR", "value": "123456789", "authority": "NY"}]}) | ||
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"MR:NY:6789"} | ||
rec = pii.PIIRecord( | ||
**{"identifiers": [{"type": "MR", "value": "123456789", "authority": "NY"}]} | ||
) | ||
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"6789:NY:MR"} | ||
|
||
#test only get first 2 characters of authority for blocking | ||
rec = pii.PIIRecord(**{"identifiers": [{"type": "MR", "value": "123456789", "authority": "DMV"}]}) | ||
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"MR:DM:6789"} | ||
# test only get first 2 characters of authority for blocking | ||
rec = pii.PIIRecord( | ||
**{"identifiers": [{"type": "MR", "value": "123456789", "authority": "DMV"}]} | ||
) | ||
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"6789:DM:MR"} | ||
|
||
def test_blocking_values(self): | ||
rec = pii.PIIRecord( | ||
|
@@ -436,7 +515,7 @@ def test_blocking_values(self): | |
if key == BlockingKey.BIRTHDATE: | ||
assert val == "1980-01-01" | ||
elif key == BlockingKey.IDENTIFIER: | ||
assert val == "MR::3456" | ||
assert val == "3456::MR" | ||
elif key == BlockingKey.FIRST_NAME: | ||
assert val == "John" | ||
elif key == BlockingKey.LAST_NAME: | ||
|