@@ -141,7 +141,7 @@ class DuckLakeLogicalCompaction : public LogicalExtensionOperator {
141141class DuckLakeCompactor {
142142public:
143143 DuckLakeCompactor (ClientContext &context, DuckLakeCatalog &catalog, DuckLakeTransaction &transaction,
144- Binder &binder, TableIndex table_id);
144+ Binder &binder, TableIndex table_id, uint64_t max_files );
145145 DuckLakeCompactor (ClientContext &context, DuckLakeCatalog &catalog, DuckLakeTransaction &transaction,
146146 Binder &binder, TableIndex table_id, double delete_threshold);
147147 void GenerateCompactions (DuckLakeTableEntry &table, vector<unique_ptr<LogicalOperator>> &compactions);
@@ -154,14 +154,15 @@ class DuckLakeCompactor {
154154 Binder &binder;
155155 TableIndex table_id;
156156 double delete_threshold = 0.95 ;
157+ optional_idx max_files;
157158
158159 CompactionType type;
159160};
160161
161162DuckLakeCompactor::DuckLakeCompactor (ClientContext &context, DuckLakeCatalog &catalog, DuckLakeTransaction &transaction,
162- Binder &binder, TableIndex table_id)
163+ Binder &binder, TableIndex table_id, uint64_t max_files )
163164 : context(context), catalog(catalog), transaction(transaction), binder(binder), table_id(table_id),
164- type(CompactionType::MERGE_ADJACENT_TABLES) {
165+ max_files(max_files), type(CompactionType::MERGE_ADJACENT_TABLES) {
165166}
166167
167168DuckLakeCompactor::DuckLakeCompactor (ClientContext &context, DuckLakeCatalog &catalog, DuckLakeTransaction &transaction,
@@ -249,6 +250,7 @@ void DuckLakeCompactor::GenerateCompactions(DuckLakeTableEntry &table,
249250 }
250251 // we have gathered all the candidate files per compaction group
251252 // iterate over them to generate actual compaction commands
253+ uint64_t compacted_files = 0 ;
252254 for (auto &entry : candidates) {
253255 auto &candidate_list = entry.second .candidate_files ;
254256 if (candidate_list.size () <= 1 ) {
@@ -285,13 +287,21 @@ void DuckLakeCompactor::GenerateCompactions(DuckLakeTableEntry &table,
285287
286288 if (start_idx < compaction_idx) {
287289 idx_t compaction_file_count = compaction_idx - start_idx;
290+ if (compaction_file_count == 1 ) {
291+ // If we only have one file to compact, we have nothing to compact
292+ continue ;
293+ }
288294 vector<DuckLakeCompactionFileEntry> compaction_files;
289295 for (idx_t i = start_idx; i < compaction_idx; i++) {
290296 compaction_files.push_back (std::move (files[candidate_list[i]]));
291297 }
292298 compactions.push_back (GenerateCompactionCommand (std::move (compaction_files)));
293299 start_idx += compaction_file_count - 1 ;
294300 }
301+ compacted_files++;
302+ if (compacted_files >= max_files.GetIndex ()) {
303+ break ;
304+ }
295305 }
296306 }
297307}
@@ -494,10 +504,11 @@ static unique_ptr<LogicalOperator> GenerateCompactionOperator(TableFunctionBindI
494504static void GenerateCompaction (ClientContext &context, DuckLakeTransaction &transaction,
495505 DuckLakeCatalog &ducklake_catalog, TableFunctionBindInput &input,
496506 DuckLakeTableEntry &cur_table, CompactionType type, double delete_threshold,
497- vector<unique_ptr<LogicalOperator>> &compactions) {
507+ uint64_t max_files, vector<unique_ptr<LogicalOperator>> &compactions) {
498508 switch (type) {
499509 case CompactionType::MERGE_ADJACENT_TABLES: {
500- DuckLakeCompactor compactor (context, ducklake_catalog, transaction, *input.binder , cur_table.GetTableId ());
510+ DuckLakeCompactor compactor (context, ducklake_catalog, transaction, *input.binder , cur_table.GetTableId (),
511+ max_files);
501512 compactor.GenerateCompactions (cur_table, compactions);
502513 break ;
503514 }
@@ -532,6 +543,17 @@ unique_ptr<LogicalOperator> BindCompaction(ClientContext &context, TableFunction
532543 }
533544
534545 vector<unique_ptr<LogicalOperator>> compactions;
546+ uint64_t max_files = NumericLimits<uint64_t >::Maximum () - 1 ;
547+ auto max_files_entry = input.named_parameters .find (" max_compacted_files" );
548+ if (max_files_entry != input.named_parameters .end ()) {
549+ if (max_files_entry->second .IsNull ()) {
550+ throw BinderException (" The max_compacted_files option must be a non-null integer." );
551+ }
552+ max_files = UBigIntValue::Get (max_files_entry->second );
553+ if (max_files == 0 ) {
554+ throw BinderException (" The max_compacted_files option must be greater than zero." );
555+ }
556+ }
535557 if (input.inputs .size () == 1 ) {
536558 if (schema.empty () && table.empty ()) {
537559 // No default schema/table, we will perform rewrites on deletes in the whole database
@@ -541,7 +563,7 @@ unique_ptr<LogicalOperator> BindCompaction(ClientContext &context, TableFunction
541563 if (entry.type == CatalogType::TABLE_ENTRY) {
542564 auto &cur_table = entry.Cast <DuckLakeTableEntry>();
543565 GenerateCompaction (context, transaction, ducklake_catalog, input, cur_table, type,
544- delete_threshold, compactions);
566+ delete_threshold, max_files, compactions);
545567 }
546568 });
547569 }
@@ -554,7 +576,7 @@ unique_ptr<LogicalOperator> BindCompaction(ClientContext &context, TableFunction
554576 if (entry.type == CatalogType::TABLE_ENTRY) {
555577 auto &cur_table = entry.Cast <DuckLakeTableEntry>();
556578 GenerateCompaction (context, transaction, ducklake_catalog, input, cur_table, type, delete_threshold,
557- compactions);
579+ max_files, compactions);
558580 }
559581 });
560582 return GenerateCompactionOperator (input, bind_index, compactions);
@@ -568,10 +590,11 @@ unique_ptr<LogicalOperator> BindCompaction(ClientContext &context, TableFunction
568590 if (schema_entry != input.named_parameters .end ()) {
569591 schema = StringValue::Get (schema_entry->second );
570592 }
571- EntryLookupInfo table_lookup (CatalogType::TABLE_ENTRY, table, nullptr , QueryErrorContext ());
593+
594+ const EntryLookupInfo table_lookup (CatalogType::TABLE_ENTRY, table, nullptr , QueryErrorContext ());
572595 auto table_entry = catalog.GetEntry (context, schema, table_lookup, OnEntryNotFound::THROW_EXCEPTION);
573596 auto &ducklake_table = table_entry->Cast <DuckLakeTableEntry>();
574- GenerateCompaction (context, transaction, ducklake_catalog, input, ducklake_table, type, delete_threshold,
597+ GenerateCompaction (context, transaction, ducklake_catalog, input, ducklake_table, type, delete_threshold, max_files,
575598 compactions);
576599
577600 return GenerateCompactionOperator (input, bind_index, compactions);
@@ -591,6 +614,7 @@ TableFunctionSet DuckLakeMergeAdjacentFilesFunction::GetFunctions() {
591614 function.bind_operator = MergeAdjacentFilesBind;
592615 if (type.size () == 2 ) {
593616 function.named_parameters [" schema" ] = LogicalType::VARCHAR;
617+ function.named_parameters [" max_compacted_files" ] = LogicalType::UBIGINT;
594618 }
595619 set.AddFunction (function);
596620 }
0 commit comments