@@ -86,11 +86,13 @@ pub struct S3CompatibleObjectStorage {
86
86
s3_client : S3Client ,
87
87
uri : Uri ,
88
88
bucket : String ,
89
- prefix : PathBuf ,
89
+ prefix : String ,
90
90
multipart_policy : MultiPartPolicy ,
91
91
retry_params : RetryParams ,
92
92
disable_multi_object_delete : bool ,
93
93
disable_multipart_upload : bool ,
94
+ // If 0, we don't have any prefix
95
+ hash_prefix_cardinality : usize ,
94
96
}
95
97
96
98
impl fmt:: Debug for S3CompatibleObjectStorage {
@@ -99,6 +101,7 @@ impl fmt::Debug for S3CompatibleObjectStorage {
99
101
. debug_struct ( "S3CompatibleObjectStorage" )
100
102
. field ( "bucket" , & self . bucket )
101
103
. field ( "prefix" , & self . prefix )
104
+ . field ( "hash_prefix_cardinality" , & self . hash_prefix_cardinality )
102
105
. finish ( )
103
106
}
104
107
}
@@ -181,19 +184,20 @@ impl S3CompatibleObjectStorage {
181
184
s3_client,
182
185
uri : uri. clone ( ) ,
183
186
bucket,
184
- prefix,
187
+ prefix : prefix . to_string_lossy ( ) . to_string ( ) ,
185
188
multipart_policy : MultiPartPolicy :: default ( ) ,
186
189
retry_params,
187
190
disable_multi_object_delete,
188
191
disable_multipart_upload,
192
+ hash_prefix_cardinality : s3_storage_config. hash_prefix_cardinality ,
189
193
} )
190
194
}
191
195
192
196
/// Sets a specific for all buckets.
193
197
///
194
198
/// This method overrides any existing prefix. (It does NOT
195
199
/// append the argument to any existing prefix.)
196
- pub fn with_prefix ( self , prefix : PathBuf ) -> Self {
200
+ pub fn with_prefix ( self , prefix : String ) -> Self {
197
201
Self {
198
202
s3_client : self . s3_client ,
199
203
uri : self . uri ,
@@ -203,6 +207,7 @@ impl S3CompatibleObjectStorage {
203
207
retry_params : self . retry_params ,
204
208
disable_multi_object_delete : self . disable_multi_object_delete ,
205
209
disable_multipart_upload : self . disable_multipart_upload ,
210
+ hash_prefix_cardinality : self . hash_prefix_cardinality ,
206
211
}
207
212
}
208
213
@@ -262,12 +267,49 @@ async fn compute_md5<T: AsyncRead + std::marker::Unpin>(mut read: T) -> io::Resu
262
267
}
263
268
}
264
269
}
270
+ const HEX_ALPHABET : [ u8 ; 16 ] = * b"0123456789abcdef" ;
271
+ const UNINITIALIZED_HASH_PREFIX : & str = "00000000" ;
272
+
273
+ fn build_key ( prefix : & str , relative_path : & str , hash_prefix_cardinality : usize ) -> String {
274
+ let mut key = String :: with_capacity (
275
+ UNINITIALIZED_HASH_PREFIX . len ( ) + 1 + prefix. len ( ) + 1 + relative_path. len ( ) ,
276
+ ) ;
277
+ if hash_prefix_cardinality > 1 {
278
+ key. push_str ( UNINITIALIZED_HASH_PREFIX ) ;
279
+ key. push ( '/' ) ;
280
+ }
281
+ key. push_str ( prefix) ;
282
+ if key. as_bytes ( ) . last ( ) . copied ( ) != Some ( b'/' ) {
283
+ key. push ( '/' ) ;
284
+ }
285
+ key. push_str ( relative_path) ;
286
+ // We then set up the prefix.
287
+ if hash_prefix_cardinality > 1 {
288
+ let key_without_prefix = & key. as_bytes ( ) [ UNINITIALIZED_HASH_PREFIX . len ( ) + 1 ..] ;
289
+ let mut prefix_hash: usize =
290
+ murmurhash32:: murmurhash3 ( key_without_prefix) as usize % hash_prefix_cardinality;
291
+ unsafe {
292
+ let prefix_buf: & mut [ u8 ] = & mut key. as_bytes_mut ( ) [ ..UNINITIALIZED_HASH_PREFIX . len ( ) ] ;
293
+ for prefix_byte in prefix_buf {
294
+ let hex: u8 = HEX_ALPHABET [ ( prefix_hash % 16 ) as usize ] ;
295
+ * prefix_byte = hex;
296
+ if prefix_hash < 16 {
297
+ break ;
298
+ }
299
+ prefix_hash /= 16 ;
300
+ }
301
+ }
302
+ }
303
+ key
304
+ }
265
305
266
306
impl S3CompatibleObjectStorage {
267
307
fn key ( & self , relative_path : & Path ) -> String {
268
- // FIXME: This may not work on Windows.
269
- let key_path = self . prefix . join ( relative_path) ;
270
- key_path. to_string_lossy ( ) . to_string ( )
308
+ build_key (
309
+ & self . prefix ,
310
+ relative_path. to_string_lossy ( ) . as_ref ( ) ,
311
+ self . hash_prefix_cardinality ,
312
+ )
271
313
}
272
314
273
315
fn relative_path ( & self , key : & str ) -> PathBuf {
@@ -945,13 +987,13 @@ mod tests {
945
987
let s3_client = S3Client :: new ( & sdk_config) ;
946
988
let uri = Uri :: for_test ( "s3://bucket/indexes" ) ;
947
989
let bucket = "bucket" . to_string ( ) ;
948
- let prefix = PathBuf :: new ( ) ;
949
990
950
991
let mut s3_storage = S3CompatibleObjectStorage {
951
992
s3_client,
952
993
uri,
953
994
bucket,
954
- prefix,
995
+ prefix : String :: new ( ) ,
996
+ hash_prefix_cardinality : 0 ,
955
997
multipart_policy : MultiPartPolicy :: default ( ) ,
956
998
retry_params : RetryParams :: for_test ( ) ,
957
999
disable_multi_object_delete : false ,
@@ -962,7 +1004,7 @@ mod tests {
962
1004
PathBuf :: from( "indexes/foo" )
963
1005
) ;
964
1006
965
- s3_storage. prefix = PathBuf :: from ( "indexes" ) ;
1007
+ s3_storage. prefix = "indexes" . to_string ( ) ;
966
1008
967
1009
assert_eq ! (
968
1010
s3_storage. relative_path( "indexes/foo" ) ,
@@ -1000,13 +1042,13 @@ mod tests {
1000
1042
let s3_client = S3Client :: from_conf ( config) ;
1001
1043
let uri = Uri :: for_test ( "s3://bucket/indexes" ) ;
1002
1044
let bucket = "bucket" . to_string ( ) ;
1003
- let prefix = PathBuf :: new ( ) ;
1004
1045
1005
1046
let s3_storage = S3CompatibleObjectStorage {
1006
1047
s3_client,
1007
1048
uri,
1008
1049
bucket,
1009
- prefix,
1050
+ prefix : String :: new ( ) ,
1051
+ hash_prefix_cardinality : 0 ,
1010
1052
multipart_policy : MultiPartPolicy :: default ( ) ,
1011
1053
retry_params : RetryParams :: for_test ( ) ,
1012
1054
disable_multi_object_delete : true ,
@@ -1041,13 +1083,13 @@ mod tests {
1041
1083
let s3_client = S3Client :: from_conf ( config) ;
1042
1084
let uri = Uri :: for_test ( "s3://bucket/indexes" ) ;
1043
1085
let bucket = "bucket" . to_string ( ) ;
1044
- let prefix = PathBuf :: new ( ) ;
1045
1086
1046
1087
let s3_storage = S3CompatibleObjectStorage {
1047
1088
s3_client,
1048
1089
uri,
1049
1090
bucket,
1050
- prefix,
1091
+ prefix : String :: new ( ) ,
1092
+ hash_prefix_cardinality : 0 ,
1051
1093
multipart_policy : MultiPartPolicy :: default ( ) ,
1052
1094
retry_params : RetryParams :: for_test ( ) ,
1053
1095
disable_multi_object_delete : false ,
@@ -1123,13 +1165,13 @@ mod tests {
1123
1165
let s3_client = S3Client :: from_conf ( config) ;
1124
1166
let uri = Uri :: for_test ( "s3://bucket/indexes" ) ;
1125
1167
let bucket = "bucket" . to_string ( ) ;
1126
- let prefix = PathBuf :: new ( ) ;
1127
1168
1128
1169
let s3_storage = S3CompatibleObjectStorage {
1129
1170
s3_client,
1130
1171
uri,
1131
1172
bucket,
1132
- prefix,
1173
+ prefix : String :: new ( ) ,
1174
+ hash_prefix_cardinality : 0 ,
1133
1175
multipart_policy : MultiPartPolicy :: default ( ) ,
1134
1176
retry_params : RetryParams :: for_test ( ) ,
1135
1177
disable_multi_object_delete : false ,
@@ -1216,13 +1258,13 @@ mod tests {
1216
1258
let s3_client = S3Client :: from_conf ( config) ;
1217
1259
let uri = Uri :: for_test ( "s3://bucket/indexes" ) ;
1218
1260
let bucket = "bucket" . to_string ( ) ;
1219
- let prefix = PathBuf :: new ( ) ;
1220
1261
1221
1262
let s3_storage = S3CompatibleObjectStorage {
1222
1263
s3_client,
1223
1264
uri,
1224
1265
bucket,
1225
- prefix,
1266
+ prefix : String :: new ( ) ,
1267
+ hash_prefix_cardinality : 0 ,
1226
1268
multipart_policy : MultiPartPolicy :: default ( ) ,
1227
1269
retry_params : RetryParams :: for_test ( ) ,
1228
1270
disable_multi_object_delete : false ,
@@ -1233,4 +1275,19 @@ mod tests {
1233
1275
. await
1234
1276
. unwrap ( ) ;
1235
1277
}
1278
+
1279
+ #[ test]
1280
+ fn test_build_key ( ) {
1281
+ assert_eq ! ( build_key( "hello" , "coucou" , 0 ) , "hello/coucou" ) ;
1282
+ assert_eq ! ( build_key( "hello/" , "coucou" , 0 ) , "hello/coucou" ) ;
1283
+ assert_eq ! ( build_key( "hello/" , "coucou" , 1 ) , "hello/coucou" ) ;
1284
+ assert_eq ! ( build_key( "hello" , "coucou" , 1 ) , "hello/coucou" ) ;
1285
+ assert_eq ! ( build_key( "hello/" , "coucou" , 2 ) , "10000000/hello/coucou" ) ;
1286
+ assert_eq ! ( build_key( "hello" , "coucou" , 2 ) , "10000000/hello/coucou" ) ;
1287
+ assert_eq ! ( build_key( "hello/" , "coucou" , 16 ) , "d0000000/hello/coucou" ) ;
1288
+ assert_eq ! ( build_key( "hello" , "coucou" , 16 ) , "d0000000/hello/coucou" ) ;
1289
+ assert_eq ! ( build_key( "hello/" , "coucou" , 17 ) , "50000000/hello/coucou" ) ;
1290
+ assert_eq ! ( build_key( "hello" , "coucou" , 17 ) , "50000000/hello/coucou" ) ;
1291
+ assert_eq ! ( build_key( "hello/" , "coucou" , 70 ) , "f0000000/hello/coucou" ) ;
1292
+ }
1236
1293
}
0 commit comments