@@ -65,6 +65,15 @@ class PrefixCachingBlockAllocator(BlockAllocator):
65
65
from 0 to num_blocks - 1.
66
66
"""
67
67
68
+ # Note that we use 'None' as a string here instead of None because
69
+ # as of Python 3.12, hash(None) returns a constant predictable value.
70
+ # This could possibly make it easier to find and exploit hash
71
+ # collisions. 'None' as a string will be hashed differently per process,
72
+ # but consistently within the same process. This is the same as the
73
+ # behavior of None prior to Python 3.12.
74
+ _none_hash : int = hash ('None' )
75
+
76
+ # Implements Block.Factory.
68
77
def __init__ (
69
78
self ,
70
79
num_blocks : int ,
@@ -122,7 +131,6 @@ def __init__(
122
131
123
132
self .metric_data = CacheMetricData ()
124
133
125
- # Implements Block.Factory.
126
134
def _create_block (
127
135
self ,
128
136
prev_block : Optional [Block ],
@@ -737,6 +745,14 @@ class PrefixCachingBlock(Block):
737
745
such as adapters that influence the block, apart from the token_ids.
738
746
"""
739
747
748
+ # Note that we use 'None' as a string here instead of None because
749
+ # as of Python 3.12, hash(None) returns a constant predictable value.
750
+ # This could possibly make it easier to find and exploit hash
751
+ # collisions. 'None' as a string will be hashed differently per process,
752
+ # but consistently within the same process. This is the same as the
753
+ # behavior of None prior to Python 3.12.
754
+ _none_hash : int = hash ('None' )
755
+
740
756
def __init__ (
741
757
self ,
742
758
prev_block : Optional [Block ],
@@ -891,13 +907,13 @@ def content_hash(self) -> Optional[int]:
891
907
892
908
is_first_block = self ._prev_block is None
893
909
prev_block_hash = (
894
- None if is_first_block else
910
+ self . _none_hash if is_first_block else
895
911
self ._prev_block .content_hash # type: ignore
896
912
)
897
913
898
914
# Previous block exists but does not yet have a hash.
899
915
# Return no hash in this case.
900
- if prev_block_hash is None and not is_first_block :
916
+ if prev_block_hash == self . _none_hash and not is_first_block :
901
917
return None
902
918
903
919
self ._cached_content_hash = PrefixCachingBlock .hash_block_tokens (
@@ -907,8 +923,9 @@ def content_hash(self) -> Optional[int]:
907
923
extra_hash = self ._extra_hash )
908
924
return self ._cached_content_hash
909
925
910
- @staticmethod
911
- def hash_block_tokens (is_first_block : bool ,
926
+ @classmethod
927
+ def hash_block_tokens (cls ,
928
+ is_first_block : bool ,
912
929
prev_block_hash : Optional [int ],
913
930
cur_block_token_ids : List [int ],
914
931
extra_hash : Optional [int ] = None ) -> int :
@@ -929,7 +946,8 @@ def hash_block_tokens(is_first_block: bool,
929
946
Returns:
930
947
- int: The computed hash value for the block.
931
948
"""
932
- assert (prev_block_hash is None ) == is_first_block
949
+ if is_first_block and prev_block_hash is None :
950
+ prev_block_hash = cls ._none_hash
933
951
return hash ((is_first_block , prev_block_hash , * cur_block_token_ids ,
934
952
extra_hash ))
935
953
@@ -949,6 +967,14 @@ class ComputedBlocksTracker:
949
967
cached block hashes in the allocator.
950
968
"""
951
969
970
+ # Note that we use 'None' as a string here instead of None because
971
+ # as of Python 3.12, hash(None) returns a constant predictable value.
972
+ # This could possibly make it easier to find and exploit hash
973
+ # collisions. 'None' as a string will be hashed differently per process,
974
+ # but consistently within the same process. This is the same as the
975
+ # behavior of None prior to Python 3.12.
976
+ _none_hash : int = hash ('None' )
977
+
952
978
def __init__ (
953
979
self ,
954
980
allocator : DeviceAwareBlockAllocator ,
@@ -994,7 +1020,7 @@ def _update_seq_hashes(self, seq: Sequence) -> None:
994
1020
# We need to know the hash of the previous block to compute the hash of
995
1021
# the current block so that blocks could be uniquely identified across
996
1022
# sequences of prefixes.
997
- prev_block_hash = (None if cur_num_blocks_recorded == 0 else
1023
+ prev_block_hash = (self . _none_hash if cur_num_blocks_recorded == 0 else
998
1024
block_hashes_recorded [- 1 ])
999
1025
# Only update the computed block hashes for the new blocks
1000
1026
for i in range (cur_num_blocks_recorded , num_computed_blocks ):
@@ -1009,7 +1035,7 @@ def _update_seq_hashes(self, seq: Sequence) -> None:
1009
1035
# This has to be kept in sync with the allocator's hash
1010
1036
# calculation.
1011
1037
block_hash = PrefixCachingBlock .hash_block_tokens (
1012
- is_first_block = prev_block_hash is None ,
1038
+ is_first_block = prev_block_hash == self . _none_hash ,
1013
1039
prev_block_hash = prev_block_hash ,
1014
1040
cur_block_token_ids = block_token_ids ,
1015
1041
extra_hash = extra_hash ,
0 commit comments