@@ -86,11 +86,13 @@ pub struct S3CompatibleObjectStorage {
86
86
s3_client : S3Client ,
87
87
uri : Uri ,
88
88
bucket : String ,
89
- prefix : PathBuf ,
89
+ prefix : String ,
90
90
multipart_policy : MultiPartPolicy ,
91
91
retry_params : RetryParams ,
92
92
disable_multi_object_delete : bool ,
93
93
disable_multipart_upload : bool ,
94
+ // If 0, we don't have any prefix
95
+ hash_prefix_cardinality : usize ,
94
96
}
95
97
96
98
impl fmt:: Debug for S3CompatibleObjectStorage {
@@ -99,6 +101,7 @@ impl fmt::Debug for S3CompatibleObjectStorage {
99
101
. debug_struct ( "S3CompatibleObjectStorage" )
100
102
. field ( "bucket" , & self . bucket )
101
103
. field ( "prefix" , & self . prefix )
104
+ . field ( "hash_prefix_cardinality" , & self . hash_prefix_cardinality )
102
105
. finish ( )
103
106
}
104
107
}
@@ -181,19 +184,20 @@ impl S3CompatibleObjectStorage {
181
184
s3_client,
182
185
uri : uri. clone ( ) ,
183
186
bucket,
184
- prefix,
187
+ prefix : prefix . to_string_lossy ( ) . to_string ( ) ,
185
188
multipart_policy : MultiPartPolicy :: default ( ) ,
186
189
retry_params,
187
190
disable_multi_object_delete,
188
191
disable_multipart_upload,
192
+ hash_prefix_cardinality : s3_storage_config. hash_prefix_cardinality ,
189
193
} )
190
194
}
191
195
192
196
/// Sets a specific for all buckets.
193
197
///
194
198
/// This method overrides any existing prefix. (It does NOT
195
199
/// append the argument to any existing prefix.)
196
- pub fn with_prefix ( self , prefix : PathBuf ) -> Self {
200
+ pub fn with_prefix ( self , prefix : String ) -> Self {
197
201
Self {
198
202
s3_client : self . s3_client ,
199
203
uri : self . uri ,
@@ -203,6 +207,7 @@ impl S3CompatibleObjectStorage {
203
207
retry_params : self . retry_params ,
204
208
disable_multi_object_delete : self . disable_multi_object_delete ,
205
209
disable_multipart_upload : self . disable_multipart_upload ,
210
+ hash_prefix_cardinality : self . hash_prefix_cardinality ,
206
211
}
207
212
}
208
213
@@ -262,12 +267,47 @@ async fn compute_md5<T: AsyncRead + std::marker::Unpin>(mut read: T) -> io::Resu
262
267
}
263
268
}
264
269
}
270
+ const HEX_ALPHABET : [ u8 ; 16 ] = * b"0123456789abcdef" ;
271
+ const UNINITIALIZED_HASH_PREFIX : & str = "00000000" ;
272
+
273
+ fn build_key ( prefix : & str , relative_path : & str , hash_prefix_cardinality : usize ) -> String {
274
+ let mut key = String :: with_capacity ( UNINITIALIZED_HASH_PREFIX . len ( ) + 1 + prefix. len ( ) + 1 + relative_path. len ( ) ) ;
275
+ if hash_prefix_cardinality > 1 {
276
+ key. push_str ( UNINITIALIZED_HASH_PREFIX ) ;
277
+ key. push ( '/' ) ;
278
+ }
279
+ key. push_str ( prefix) ;
280
+ if key. as_bytes ( ) . last ( ) . copied ( ) != Some ( b'/' ) {
281
+ key. push ( '/' ) ;
282
+ }
283
+ key. push_str ( relative_path) ;
284
+ // We then set up the prefix.
285
+ if hash_prefix_cardinality > 1 {
286
+ let key_without_prefix = & key. as_bytes ( ) [ UNINITIALIZED_HASH_PREFIX . len ( ) + 1 ..] ;
287
+ let mut prefix_hash: usize =
288
+ murmurhash32:: murmurhash3 ( key_without_prefix) as usize % hash_prefix_cardinality;
289
+ unsafe {
290
+ let prefix_buf: & mut [ u8 ] = & mut key. as_bytes_mut ( ) [ ..UNINITIALIZED_HASH_PREFIX . len ( ) ] ;
291
+ for prefix_byte in prefix_buf. iter_mut ( ) {
292
+ let hex: u8 = HEX_ALPHABET [ ( prefix_hash % 16 ) as usize ] ;
293
+ * prefix_byte = hex;
294
+ if prefix_hash < 16 {
295
+ break ;
296
+ }
297
+ prefix_hash /= 16 ;
298
+ }
299
+ }
300
+ }
301
+ key
302
+ }
265
303
266
304
impl S3CompatibleObjectStorage {
267
305
fn key ( & self , relative_path : & Path ) -> String {
268
- // FIXME: This may not work on Windows.
269
- let key_path = self . prefix . join ( relative_path) ;
270
- key_path. to_string_lossy ( ) . to_string ( )
306
+ build_key (
307
+ & self . prefix ,
308
+ relative_path. to_string_lossy ( ) . as_ref ( ) ,
309
+ self . hash_prefix_cardinality ,
310
+ )
271
311
}
272
312
273
313
fn relative_path ( & self , key : & str ) -> PathBuf {
@@ -945,13 +985,13 @@ mod tests {
945
985
let s3_client = S3Client :: new ( & sdk_config) ;
946
986
let uri = Uri :: for_test ( "s3://bucket/indexes" ) ;
947
987
let bucket = "bucket" . to_string ( ) ;
948
- let prefix = PathBuf :: new ( ) ;
949
988
950
989
let mut s3_storage = S3CompatibleObjectStorage {
951
990
s3_client,
952
991
uri,
953
992
bucket,
954
- prefix,
993
+ prefix : String :: new ( ) ,
994
+ hash_prefix_cardinality : 0 ,
955
995
multipart_policy : MultiPartPolicy :: default ( ) ,
956
996
retry_params : RetryParams :: for_test ( ) ,
957
997
disable_multi_object_delete : false ,
@@ -962,7 +1002,7 @@ mod tests {
962
1002
PathBuf :: from( "indexes/foo" )
963
1003
) ;
964
1004
965
- s3_storage. prefix = PathBuf :: from ( "indexes" ) ;
1005
+ s3_storage. prefix = "indexes" . to_string ( ) ;
966
1006
967
1007
assert_eq ! (
968
1008
s3_storage. relative_path( "indexes/foo" ) ,
@@ -1000,13 +1040,13 @@ mod tests {
1000
1040
let s3_client = S3Client :: from_conf ( config) ;
1001
1041
let uri = Uri :: for_test ( "s3://bucket/indexes" ) ;
1002
1042
let bucket = "bucket" . to_string ( ) ;
1003
- let prefix = PathBuf :: new ( ) ;
1004
1043
1005
1044
let s3_storage = S3CompatibleObjectStorage {
1006
1045
s3_client,
1007
1046
uri,
1008
1047
bucket,
1009
- prefix,
1048
+ prefix : String :: new ( ) ,
1049
+ hash_prefix_cardinality : 0 ,
1010
1050
multipart_policy : MultiPartPolicy :: default ( ) ,
1011
1051
retry_params : RetryParams :: for_test ( ) ,
1012
1052
disable_multi_object_delete : true ,
@@ -1041,13 +1081,13 @@ mod tests {
1041
1081
let s3_client = S3Client :: from_conf ( config) ;
1042
1082
let uri = Uri :: for_test ( "s3://bucket/indexes" ) ;
1043
1083
let bucket = "bucket" . to_string ( ) ;
1044
- let prefix = PathBuf :: new ( ) ;
1045
1084
1046
1085
let s3_storage = S3CompatibleObjectStorage {
1047
1086
s3_client,
1048
1087
uri,
1049
1088
bucket,
1050
- prefix,
1089
+ prefix : String :: new ( ) ,
1090
+ hash_prefix_cardinality : 0 ,
1051
1091
multipart_policy : MultiPartPolicy :: default ( ) ,
1052
1092
retry_params : RetryParams :: for_test ( ) ,
1053
1093
disable_multi_object_delete : false ,
@@ -1123,13 +1163,13 @@ mod tests {
1123
1163
let s3_client = S3Client :: from_conf ( config) ;
1124
1164
let uri = Uri :: for_test ( "s3://bucket/indexes" ) ;
1125
1165
let bucket = "bucket" . to_string ( ) ;
1126
- let prefix = PathBuf :: new ( ) ;
1127
1166
1128
1167
let s3_storage = S3CompatibleObjectStorage {
1129
1168
s3_client,
1130
1169
uri,
1131
1170
bucket,
1132
- prefix,
1171
+ prefix : String :: new ( ) ,
1172
+ hash_prefix_cardinality : 0 ,
1133
1173
multipart_policy : MultiPartPolicy :: default ( ) ,
1134
1174
retry_params : RetryParams :: for_test ( ) ,
1135
1175
disable_multi_object_delete : false ,
@@ -1216,13 +1256,13 @@ mod tests {
1216
1256
let s3_client = S3Client :: from_conf ( config) ;
1217
1257
let uri = Uri :: for_test ( "s3://bucket/indexes" ) ;
1218
1258
let bucket = "bucket" . to_string ( ) ;
1219
- let prefix = PathBuf :: new ( ) ;
1220
1259
1221
1260
let s3_storage = S3CompatibleObjectStorage {
1222
1261
s3_client,
1223
1262
uri,
1224
1263
bucket,
1225
- prefix,
1264
+ prefix : String :: new ( ) ,
1265
+ hash_prefix_cardinality : 0 ,
1226
1266
multipart_policy : MultiPartPolicy :: default ( ) ,
1227
1267
retry_params : RetryParams :: for_test ( ) ,
1228
1268
disable_multi_object_delete : false ,
@@ -1233,4 +1273,19 @@ mod tests {
1233
1273
. await
1234
1274
. unwrap ( ) ;
1235
1275
}
1276
+
1277
+ #[ test]
1278
+ fn test_build_key ( ) {
1279
+ assert_eq ! ( build_key( "hello" , "coucou" , 0 ) , "hello/coucou" ) ;
1280
+ assert_eq ! ( build_key( "hello/" , "coucou" , 0 ) , "hello/coucou" ) ;
1281
+ assert_eq ! ( build_key( "hello/" , "coucou" , 1 ) , "hello/coucou" ) ;
1282
+ assert_eq ! ( build_key( "hello" , "coucou" , 1 ) , "hello/coucou" ) ;
1283
+ assert_eq ! ( build_key( "hello/" , "coucou" , 2 ) , "10000000/hello/coucou" ) ;
1284
+ assert_eq ! ( build_key( "hello" , "coucou" , 2 ) , "10000000/hello/coucou" ) ;
1285
+ assert_eq ! ( build_key( "hello/" , "coucou" , 16 ) , "d0000000/hello/coucou" ) ;
1286
+ assert_eq ! ( build_key( "hello" , "coucou" , 16 ) , "d0000000/hello/coucou" ) ;
1287
+ assert_eq ! ( build_key( "hello/" , "coucou" , 17 ) , "50000000/hello/coucou" ) ;
1288
+ assert_eq ! ( build_key( "hello" , "coucou" , 17 ) , "50000000/hello/coucou" ) ;
1289
+ assert_eq ! ( build_key( "hello/" , "coucou" , 70 ) , "f0000000/hello/coucou" ) ;
1290
+ }
1236
1291
}
0 commit comments