Skip to content

Commit

Permalink
adding optimization to shortcirtuit get_block_data early if blocking …
Browse files Browse the repository at this point in the history
…key value is missing
  • Loading branch information
ericbuckley committed Sep 24, 2024
1 parent 9f11ead commit 0b123ae
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/recordlinker/linkage/simple_mpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ def get_block_data(
key = models.BlockingKey[key_name]
# Get all the possible values from the data for this key
vals = [v for v in key.to_value(data)]
# If there are no values for a blocking key in the pass, we can skip
# the query and return an empty list, this is just an optimization
if not vals:
return []
# Create a dynamic alias for the Blocking Value table using the index
# this is necessary since we are potentially joining the same table
# multiple times with different conditions
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/test_simple_mpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,9 @@ def test_block_on_multiple_names(self, session, prime_index):
algo_config = {"blocks": [{"value": "first_name"}, {"value": "last_name"}]}
matches = simple_mpi.get_block_data(session, data, algo_config)
assert len(matches) == 4

def test_block_missing_keys(self, session, prime_index):
data = {"birthdate": "01/01/1980"}
algo_config = {"blocks": [{"value": "birthdate"}, {"value": "last_name"}]}
matches = simple_mpi.get_block_data(session, data, algo_config)
assert len(matches) == 0

0 comments on commit 0b123ae

Please sign in to comment.