Skip to content

Commit 73b35cc

Browse files
authored
[Core] Improve hash collision avoidance in prefix caching (#12621)
Signed-off-by: Russell Bryant <[email protected]>
1 parent 5095e96 commit 73b35cc

File tree

3 files changed

+45
-10
lines changed

3 files changed

+45
-10
lines changed

Diff for: tests/core/block/test_prefix_caching_block.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ def test_nth_block_has_correct_content_hash(seed: int, block_size: int,
6565

6666
previous_block = MagicMock(spec=PrefixCachingBlock)
6767
prev_block_hash = random.randint(0, 1000)
68-
previous_block.content_hash = (prev_block_hash
69-
if prev_block_has_hash else None)
68+
previous_block.content_hash = (prev_block_hash if prev_block_has_hash
69+
else hash('None'))
7070

7171
num_to_fill = block_size if is_curr_block_full else random.randint(
7272
0, block_size - 1)

Diff for: vllm/core/block/prefix_caching_block.py

+34-8
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,15 @@ class PrefixCachingBlockAllocator(BlockAllocator):
6565
from 0 to num_blocks - 1.
6666
"""
6767

68+
# Note that we use 'None' as a string here instead of None because
69+
# as of Python 3.12, hash(None) returns a constant predictable value.
70+
# This could possibly make it easier to find and exploit hash
71+
# collisions. 'None' as a string will be hashed differently per process,
72+
# but consistently within the same process. This is the same as the
73+
# behavior of None prior to Python 3.12.
74+
_none_hash: int = hash('None')
75+
76+
# Implements Block.Factory.
6877
def __init__(
6978
self,
7079
num_blocks: int,
@@ -122,7 +131,6 @@ def __init__(
122131

123132
self.metric_data = CacheMetricData()
124133

125-
# Implements Block.Factory.
126134
def _create_block(
127135
self,
128136
prev_block: Optional[Block],
@@ -737,6 +745,14 @@ class PrefixCachingBlock(Block):
737745
such as adapters that influence the block, apart from the token_ids.
738746
"""
739747

748+
# Note that we use 'None' as a string here instead of None because
749+
# as of Python 3.12, hash(None) returns a constant predictable value.
750+
# This could possibly make it easier to find and exploit hash
751+
# collisions. 'None' as a string will be hashed differently per process,
752+
# but consistently within the same process. This is the same as the
753+
# behavior of None prior to Python 3.12.
754+
_none_hash: int = hash('None')
755+
740756
def __init__(
741757
self,
742758
prev_block: Optional[Block],
@@ -891,13 +907,13 @@ def content_hash(self) -> Optional[int]:
891907

892908
is_first_block = self._prev_block is None
893909
prev_block_hash = (
894-
None if is_first_block else
910+
self._none_hash if is_first_block else
895911
self._prev_block.content_hash # type: ignore
896912
)
897913

898914
# Previous block exists but does not yet have a hash.
899915
# Return no hash in this case.
900-
if prev_block_hash is None and not is_first_block:
916+
if prev_block_hash == self._none_hash and not is_first_block:
901917
return None
902918

903919
self._cached_content_hash = PrefixCachingBlock.hash_block_tokens(
@@ -907,8 +923,9 @@ def content_hash(self) -> Optional[int]:
907923
extra_hash=self._extra_hash)
908924
return self._cached_content_hash
909925

910-
@staticmethod
911-
def hash_block_tokens(is_first_block: bool,
926+
@classmethod
927+
def hash_block_tokens(cls,
928+
is_first_block: bool,
912929
prev_block_hash: Optional[int],
913930
cur_block_token_ids: List[int],
914931
extra_hash: Optional[int] = None) -> int:
@@ -929,7 +946,8 @@ def hash_block_tokens(is_first_block: bool,
929946
Returns:
930947
- int: The computed hash value for the block.
931948
"""
932-
assert (prev_block_hash is None) == is_first_block
949+
if is_first_block and prev_block_hash is None:
950+
prev_block_hash = cls._none_hash
933951
return hash((is_first_block, prev_block_hash, *cur_block_token_ids,
934952
extra_hash))
935953

@@ -949,6 +967,14 @@ class ComputedBlocksTracker:
949967
cached block hashes in the allocator.
950968
"""
951969

970+
# Note that we use 'None' as a string here instead of None because
971+
# as of Python 3.12, hash(None) returns a constant predictable value.
972+
# This could possibly make it easier to find and exploit hash
973+
# collisions. 'None' as a string will be hashed differently per process,
974+
# but consistently within the same process. This is the same as the
975+
# behavior of None prior to Python 3.12.
976+
_none_hash: int = hash('None')
977+
952978
def __init__(
953979
self,
954980
allocator: DeviceAwareBlockAllocator,
@@ -994,7 +1020,7 @@ def _update_seq_hashes(self, seq: Sequence) -> None:
9941020
# We need to know the hash of the previous block to compute the hash of
9951021
# the current block so that blocks could be uniquely identified across
9961022
# sequences of prefixes.
997-
prev_block_hash = (None if cur_num_blocks_recorded == 0 else
1023+
prev_block_hash = (self._none_hash if cur_num_blocks_recorded == 0 else
9981024
block_hashes_recorded[-1])
9991025
# Only update the computed block hashes for the new blocks
10001026
for i in range(cur_num_blocks_recorded, num_computed_blocks):
@@ -1009,7 +1035,7 @@ def _update_seq_hashes(self, seq: Sequence) -> None:
10091035
# This has to be kept in sync with the allocator's hash
10101036
# calculation.
10111037
block_hash = PrefixCachingBlock.hash_block_tokens(
1012-
is_first_block=prev_block_hash is None,
1038+
is_first_block=prev_block_hash == self._none_hash,
10131039
prev_block_hash=prev_block_hash,
10141040
cur_block_token_ids=block_token_ids,
10151041
extra_hash=extra_hash,

Diff for: vllm/v1/core/kv_cache_utils.py

+9
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,15 @@ def hash_block_tokens(
263263
The hash value of the block and the token ids in the block.
264264
The entire tuple is used as the hash key of the block.
265265
"""
266+
if not parent_block_hash:
267+
# Note that we use 'None' as a string here instead of None because
268+
# as of Python 3.12, hash(None) returns a constant predictable value.
269+
# This could possibly make it easier to find and exploit hash
270+
# collisions. 'None' as a string will be hashed differently per process,
271+
# but consistently within the same process. This is the same as the
272+
# behavior of None prior to Python 3.12.
273+
parent_block_hash = hash('None')
274+
266275
curr_block_token_ids_tuple = tuple(curr_block_token_ids)
267276
return BlockHashType(
268277
hash((parent_block_hash, curr_block_token_ids_tuple, extra_keys)),

0 commit comments

Comments
 (0)