@@ -27,7 +27,7 @@ use futures::{Future, StreamExt};
27
27
use itertools:: Itertools ;
28
28
use quickwit_common:: metrics:: IntCounter ;
29
29
use quickwit_common:: pretty:: PrettySample ;
30
- use quickwit_common:: Progress ;
30
+ use quickwit_common:: { rate_limited_info , Progress } ;
31
31
use quickwit_metastore:: {
32
32
ListSplitsQuery , ListSplitsRequestExt , MetastoreServiceStreamSplitsExt , SplitInfo ,
33
33
SplitMetadata , SplitState ,
@@ -122,8 +122,8 @@ pub async fn run_garbage_collect(
122
122
123
123
let index_uids: Vec < IndexUid > = indexes. keys ( ) . cloned ( ) . collect ( ) ;
124
124
125
- let Some ( list_splits_query_for_index_uids ) =
126
- ListSplitsQuery :: try_from_index_uids ( index_uids. clone ( ) )
125
+ // TODO maybe we want to do a ListSplitsQuery::for_all_indexes and post-filter ourselves here
126
+ let Some ( list_splits_query_for_index_uids ) = ListSplitsQuery :: try_from_index_uids ( index_uids)
127
127
else {
128
128
return Ok ( SplitRemovalInfo :: default ( ) ) ;
129
129
} ;
@@ -187,7 +187,6 @@ pub async fn run_garbage_collect(
187
187
OffsetDateTime :: now_utc ( ) . unix_timestamp ( ) - deletion_grace_period. as_secs ( ) as i64 ;
188
188
189
189
Ok ( delete_splits_marked_for_deletion_several_indexes (
190
- index_uids,
191
190
updated_before_timestamp,
192
191
metastore,
193
192
indexes,
@@ -221,20 +220,15 @@ async fn delete_splits(
221
220
)
222
221
. await
223
222
} else {
224
- error ! (
225
- "we are trying to GC without knowing the storage, this shouldn't \
226
- happen"
223
+ // in practice this can happen if the index was created between the start of
224
+ // the run and now, and one of its splits has already expired, which likely
225
+ // means a very long gc run, or if we run gc on a single index from the cli.
226
+ quickwit_common:: rate_limited_warn!(
227
+ limit_per_min = 2 ,
228
+ index_uid=%index_uid,
229
+ "we are trying to GC without knowing the storage" ,
227
230
) ;
228
- Err ( DeleteSplitsError {
229
- successes : Vec :: new ( ) ,
230
- storage_error : None ,
231
- storage_failures : splits_metadata_to_delete
232
- . into_iter ( )
233
- . map ( |split| split. as_split_info ( ) )
234
- . collect ( ) ,
235
- metastore_error : None ,
236
- metastore_failures : Vec :: new ( ) ,
237
- } )
231
+ Ok ( Vec :: new ( ) )
238
232
}
239
233
}
240
234
} )
@@ -304,11 +298,12 @@ async fn list_splits_metadata(
304
298
/// Removes any splits marked for deletion which haven't been
305
299
/// updated after `updated_before_timestamp` in batches of 1000 splits.
306
300
///
301
+ /// Only splits from index_uids in the `storages` map will be deleted.
302
+ ///
307
303
/// The aim of this is to spread the load out across a longer period
308
304
/// rather than short, heavy bursts on the metastore and storage system itself.
309
- #[ instrument( skip( index_uids , storages, metastore, progress_opt, metrics) , fields( num_indexes=%index_uids . len( ) ) ) ]
305
+ #[ instrument( skip( storages, metastore, progress_opt, metrics) , fields( num_indexes=%storages . len( ) ) ) ]
310
306
async fn delete_splits_marked_for_deletion_several_indexes (
311
- index_uids : Vec < IndexUid > ,
312
307
updated_before_timestamp : i64 ,
313
308
metastore : MetastoreServiceClient ,
314
309
storages : HashMap < IndexUid , Arc < dyn Storage > > ,
@@ -317,18 +312,22 @@ async fn delete_splits_marked_for_deletion_several_indexes(
317
312
) -> SplitRemovalInfo {
318
313
let mut split_removal_info = SplitRemovalInfo :: default ( ) ;
319
314
320
- let Some ( list_splits_query) = ListSplitsQuery :: try_from_index_uids ( index_uids) else {
321
- error ! ( "failed to create list splits query. this should never happen" ) ;
322
- return split_removal_info;
323
- } ;
315
+ // we ask for all indexes because the query is more efficient and we almost always want all
316
+ // indexes anyway. The exception is when garbage collecting a single index from the commandline.
317
+ // In this case, we will log a bunch of warn. i (trinity) consider it worth the more generic
318
+ // code which needs fewer special case while testing, but we could check index_uids len if we
319
+ // think it's a better idea.
320
+ let list_splits_query = ListSplitsQuery :: for_all_indexes ( ) ;
324
321
325
322
let mut list_splits_query = list_splits_query
326
323
. with_split_state ( SplitState :: MarkedForDeletion )
327
324
. with_update_timestamp_lte ( updated_before_timestamp)
328
325
. with_limit ( DELETE_SPLITS_BATCH_SIZE )
329
326
. sort_by_index_uid ( ) ;
330
327
331
- loop {
328
+ let mut splits_to_delete_possibly_remaining = true ;
329
+
330
+ while splits_to_delete_possibly_remaining {
332
331
let splits_metadata_to_delete: Vec < SplitMetadata > = match protect_future (
333
332
progress_opt,
334
333
list_splits_metadata ( & metastore, & list_splits_query) ,
@@ -342,19 +341,32 @@ async fn delete_splits_marked_for_deletion_several_indexes(
342
341
}
343
342
} ;
344
343
344
+ // We page through the list of splits to delete using a limit and a `search_after` trick.
345
+ // To detect if this is the last page, we check if the number of splits is less than the
346
+ // limit.
347
+ assert ! ( splits_metadata_to_delete. len( ) <= DELETE_SPLITS_BATCH_SIZE ) ;
348
+ splits_to_delete_possibly_remaining =
349
+ splits_metadata_to_delete. len ( ) == DELETE_SPLITS_BATCH_SIZE ;
350
+
345
351
// set split after which to search for the next loop
346
352
let Some ( last_split_metadata) = splits_metadata_to_delete. last ( ) else {
347
353
break ;
348
354
} ;
349
355
list_splits_query = list_splits_query. after_split ( last_split_metadata) ;
350
356
351
- let num_splits_to_delete = splits_metadata_to_delete. len ( ) ;
357
+ let mut splits_metadata_to_delete_per_index: HashMap < IndexUid , Vec < SplitMetadata > > =
358
+ HashMap :: with_capacity ( storages. len ( ) ) ;
352
359
353
- let splits_metadata_to_delete_per_index: HashMap < IndexUid , Vec < SplitMetadata > > =
354
- splits_metadata_to_delete
355
- . into_iter ( )
356
- . map ( |meta| ( meta. index_uid . clone ( ) , meta) )
357
- . into_group_map ( ) ;
360
+ for meta in splits_metadata_to_delete {
361
+ if !storages. contains_key ( & meta. index_uid ) {
362
+ rate_limited_info ! ( limit_per_min=6 , index_uid=?meta. index_uid, "split not listed in storage map: skipping" ) ;
363
+ continue ;
364
+ }
365
+ splits_metadata_to_delete_per_index
366
+ . entry ( meta. index_uid . clone ( ) )
367
+ . or_default ( )
368
+ . push ( meta) ;
369
+ }
358
370
359
371
// ignore return we continue either way
360
372
let _: Result < ( ) , ( ) > = delete_splits (
@@ -366,12 +378,6 @@ async fn delete_splits_marked_for_deletion_several_indexes(
366
378
& mut split_removal_info,
367
379
)
368
380
. await ;
369
-
370
- if num_splits_to_delete < DELETE_SPLITS_BATCH_SIZE {
371
- // stop the gc if this was the last batch
372
- // we are guaranteed to make progress due to .after_split()
373
- break ;
374
- }
375
381
}
376
382
377
383
split_removal_info
0 commit comments