Skip to content

Commit

Permalink
Removed first 2 chars of authority from IDENTIFIER blocking key (#227)
Browse files Browse the repository at this point in the history
## Description
Changing IDENTIFIER blocking keys to `{value[-4:]}:{type}` format,
authority is no longer present in the keys.

BREAKING CHANGE: The format of IDENTIFIER blocking keys has changed, and
databases should be reset before upgrading.

## Related Issues
closes #226
  • Loading branch information
ericbuckley authored Feb 28, 2025
1 parent c54ff62 commit 1027b73
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 14 deletions.
2 changes: 1 addition & 1 deletion docs/site/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ patient data and used during query retrieval. The following blocking key types a

`IDENTIFIER` (ID: **10**)

: A colon separated string of the identifier type, first 2 characters of the authority and last 4 characters of the value.
: A colon separated string of the last 4 characters of the value and the identifier type.


### Evaluation Functions
Expand Down
10 changes: 3 additions & 7 deletions src/recordlinker/schemas/pii.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,13 +394,9 @@ def blocking_keys(self, key: models.BlockingKey) -> set[str]:
# NOTE: we could optimize here and remove the dashes from the date
vals.update(self.feature_iter(Feature(attribute=FeatureAttribute.BIRTHDATE)))
elif key == models.BlockingKey.IDENTIFIER:
vals.update(
{
f"{value_part[-4:]}:{authority_part[:2]}:{type_part}"
for x in self.feature_iter(Feature(attribute=FeatureAttribute.IDENTIFIER))
for value_part, authority_part, type_part in [x.split(":", 2)]
}
)
for ident in self.feature_iter(Feature(attribute=FeatureAttribute.IDENTIFIER)):
_value, _, _type = ident.split(":", 2)
vals.add(f"{_value[-4:]}:{_type}")
elif key == models.BlockingKey.SEX:
vals.update(self.feature_iter(Feature(attribute=FeatureAttribute.SEX)))
elif key == models.BlockingKey.ZIP:
Expand Down
12 changes: 6 additions & 6 deletions tests/unit/schemas/test_pii.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,9 +358,9 @@ def test_blocking_keys_mrn_last_four(self):
rec = pii.PIIRecord(**{"identifiers": []})
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == set()
rec = pii.PIIRecord(**{"identifiers": [{"type": "MR", "value": "123456789"}]})
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"6789::MR"}
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"6789:MR"}
rec = pii.PIIRecord(**{"identifiers": [{"type": "MR", "value": "89"}]})
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"89::MR"}
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"89:MR"}

# test multiple identifiers return correctly
rec = pii.PIIRecord(
Expand All @@ -369,7 +369,7 @@ def test_blocking_keys_mrn_last_four(self):
pii.Identifier(type="SS", value="123456789"),
]
)
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"6789::MR", "6789::SS"}
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"6789:MR", "6789:SS"}

def test_blocking_keys_sex(self):
rec = pii.PIIRecord(**{"gender": "M"})
Expand Down Expand Up @@ -494,13 +494,13 @@ def test_blocking_keys_identifier(self):
rec = pii.PIIRecord(
**{"identifiers": [{"type": "MR", "value": "123456789", "authority": "NY"}]}
)
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"6789:NY:MR"}
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"6789:MR"}

# test only get first 2 characters of authority for blocking
rec = pii.PIIRecord(
**{"identifiers": [{"type": "MR", "value": "123456789", "authority": "DMV"}]}
)
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"6789:DM:MR"}
assert rec.blocking_keys(BlockingKey.IDENTIFIER) == {"6789:MR"}

def test_blocking_values(self):
rec = pii.PIIRecord(
Expand All @@ -515,7 +515,7 @@ def test_blocking_values(self):
if key == BlockingKey.BIRTHDATE:
assert val == "1980-01-01"
elif key == BlockingKey.IDENTIFIER:
assert val == "3456::MR"
assert val == "3456:MR"
elif key == BlockingKey.FIRST_NAME:
assert val == "John"
elif key == BlockingKey.LAST_NAME:
Expand Down

0 comments on commit 1027b73

Please sign in to comment.