1919#include < cstring>
2020#include < limits>
2121#include < memory>
22+ #include < optional>
2223#include < utility>
2324
2425#include " fmt/format.h"
@@ -246,6 +247,10 @@ class ConfigParser {
246247 return Status::OK ();
247248 }
248249
250+ bool ContainsKey (const std::string& key) const {
251+ return config_map_.find (key) != config_map_.end ();
252+ }
253+
249254 private:
250255 const std::map<std::string, std::string> config_map_;
251256};
@@ -254,9 +259,8 @@ class ConfigParser {
254259// storing various configurable fields and their default values.
255260struct CoreOptions ::Impl {
256261 int64_t page_size = 64 * 1024 ;
257- int64_t target_file_size = 256 * 1024 * 1024 ; // TODO(xinyu.lxy): target_file_size of primary
258- // key table is 128 MB and append table is 256 MB
259- int64_t blob_target_file_size = 256 * 1024 * 1024 ;
262+ std::optional<int64_t > target_file_size;
263+ std::optional<int64_t > blob_target_file_size;
260264 int64_t source_split_target_size = 128 * 1024 * 1024 ;
261265 int64_t source_split_open_file_cost = 4 * 1024 * 1024 ;
262266 int64_t manifest_target_file_size = 8 * 1024 * 1024 ;
@@ -293,6 +297,7 @@ struct CoreOptions::Impl {
293297 int32_t read_batch_size = 1024 ;
294298 int32_t write_batch_size = 1024 ;
295299 int32_t commit_max_retries = 10 ;
300+ int32_t compaction_min_file_num = 5 ;
296301
297302 SortOrder sequence_field_sort_order = SortOrder::ASCENDING;
298303 MergeEngine merge_engine = MergeEngine::DEDUPLICATE;
@@ -303,6 +308,7 @@ struct CoreOptions::Impl {
303308 int32_t file_compression_zstd_level = 1 ;
304309
305310 bool ignore_delete = false ;
311+ bool write_only = false ;
306312 bool deletion_vectors_enabled = false ;
307313 bool force_lookup = false ;
308314 bool partial_update_remove_record_on_delete = false ;
@@ -313,6 +319,8 @@ struct CoreOptions::Impl {
313319 bool data_evolution_enabled = false ;
314320 bool legacy_partition_name_enabled = true ;
315321 bool global_index_enabled = true ;
322+ bool commit_force_compact = false ;
323+ bool compaction_force_rewrite_all_files = false ;
316324 std::optional<std::string> global_index_external_path;
317325
318326 std::optional<std::string> scan_tag_name;
@@ -348,11 +356,17 @@ Result<CoreOptions> CoreOptions::FromMap(
348356
349357 // Parse memory size configurations
350358 PAIMON_RETURN_NOT_OK (parser.ParseMemorySize (Options::PAGE_SIZE, &impl->page_size ));
351- PAIMON_RETURN_NOT_OK (
352- parser.ParseMemorySize (Options::TARGET_FILE_SIZE, &impl->target_file_size ));
353- impl->blob_target_file_size = impl->target_file_size ;
354- PAIMON_RETURN_NOT_OK (
355- parser.ParseMemorySize (Options::BLOB_TARGET_FILE_SIZE, &impl->blob_target_file_size ));
359+ if (parser.ContainsKey (Options::TARGET_FILE_SIZE)) {
360+ int64_t target_file_size;
361+ PAIMON_RETURN_NOT_OK (parser.ParseMemorySize (Options::TARGET_FILE_SIZE, &target_file_size));
362+ impl->target_file_size = target_file_size;
363+ }
364+ if (parser.ContainsKey (Options::BLOB_TARGET_FILE_SIZE)) {
365+ int64_t blob_target_file_size;
366+ PAIMON_RETURN_NOT_OK (
367+ parser.ParseMemorySize (Options::BLOB_TARGET_FILE_SIZE, &blob_target_file_size));
368+ impl->blob_target_file_size = blob_target_file_size;
369+ }
356370 PAIMON_RETURN_NOT_OK (parser.ParseMemorySize (Options::MANIFEST_TARGET_FILE_SIZE,
357371 &impl->manifest_target_file_size ));
358372 PAIMON_RETURN_NOT_OK (
@@ -411,6 +425,9 @@ Result<CoreOptions> CoreOptions::FromMap(
411425 // Parse ignore delete
412426 PAIMON_RETURN_NOT_OK (parser.Parse <bool >(Options::IGNORE_DELETE, &impl->ignore_delete ));
413427
428+ // Parse write-only
429+ PAIMON_RETURN_NOT_OK (parser.Parse <bool >(Options::WRITE_ONLY, &impl->write_only ));
430+
414431 // Parse default agg function
415432 std::string field_default_func;
416433 PAIMON_RETURN_NOT_OK (parser.ParseString (Options::FIELDS_DEFAULT_AGG_FUNC, &field_default_func));
@@ -490,6 +507,18 @@ Result<CoreOptions> CoreOptions::FromMap(
490507 impl->scan_tag_name = scan_tag_name;
491508 }
492509
510+ // Parse commit.force-compact
511+ PAIMON_RETURN_NOT_OK (
512+ parser.Parse <bool >(Options::COMMIT_FORCE_COMPACT, &impl->commit_force_compact ));
513+
514+ // Parse compaction.min.file-num
515+ PAIMON_RETURN_NOT_OK (
516+ parser.Parse (Options::COMPACTION_MIN_FILE_NUM, &impl->compaction_min_file_num ));
517+
518+ // Parse compaction.force-rewrite-all-files
519+ PAIMON_RETURN_NOT_OK (parser.Parse <bool >(Options::COMPACTION_FORCE_REWRITE_ALL_FILES,
520+ &impl->compaction_force_rewrite_all_files ));
521+
493522 return options;
494523}
495524
@@ -531,12 +560,25 @@ int64_t CoreOptions::GetPageSize() const {
531560 return impl_->page_size ;
532561}
533562
534- int64_t CoreOptions::GetTargetFileSize () const {
535- return impl_->target_file_size ;
563+ int64_t CoreOptions::GetTargetFileSize (bool has_primary_key) const {
564+ if (impl_->target_file_size == std::nullopt ) {
565+ return has_primary_key ? 128 * 1024 * 1024 : 256 * 1024 * 1024 ;
566+ }
567+ return impl_->target_file_size .value ();
536568}
537569
538570int64_t CoreOptions::GetBlobTargetFileSize () const {
539- return impl_->blob_target_file_size ;
571+ if (impl_->blob_target_file_size == std::nullopt ) {
572+ return GetTargetFileSize (/* has_primary_key=*/ false );
573+ }
574+ return impl_->blob_target_file_size .value ();
575+ }
576+
577+ int64_t CoreOptions::GetCompactionFileSize (bool has_primary_key) const {
578+ // file size to join the compaction, we don't process on middle file size to avoid
579+ // compact a same file twice (the compression is not calculate so accurately. the output
580+ // file maybe be less than target file generated by rolling file write).
581+ return GetTargetFileSize (has_primary_key) / 10 * 7 ;
540582}
541583
542584std::string CoreOptions::GetPartitionDefaultName () const {
@@ -594,6 +636,10 @@ int64_t CoreOptions::GetWriteBufferSize() const {
594636 return impl_->write_buffer_size ;
595637}
596638
639+ bool CoreOptions::CommitForceCompact () const {
640+ return impl_->commit_force_compact ;
641+ }
642+
597643int64_t CoreOptions::GetCommitTimeout () const {
598644 return impl_->commit_timeout ;
599645}
@@ -602,6 +648,10 @@ int32_t CoreOptions::GetCommitMaxRetries() const {
602648 return impl_->commit_max_retries ;
603649}
604650
651+ int32_t CoreOptions::GetCompactionMinFileNum () const {
652+ return impl_->compaction_min_file_num ;
653+ }
654+
605655const ExpireConfig& CoreOptions::GetExpireConfig () const {
606656 return impl_->expire_config ;
607657}
@@ -626,6 +676,10 @@ bool CoreOptions::IgnoreDelete() const {
626676 return impl_->ignore_delete ;
627677}
628678
679+ bool CoreOptions::WriteOnly () const {
680+ return impl_->write_only ;
681+ }
682+
629683std::optional<std::string> CoreOptions::GetFieldsDefaultFunc () const {
630684 return impl_->field_default_func ;
631685}
@@ -674,6 +728,10 @@ bool CoreOptions::NeedLookup() const {
674728 impl_->force_lookup ;
675729}
676730
731+ bool CoreOptions::CompactionForceRewriteAllFiles () const {
732+ return impl_->compaction_force_rewrite_all_files ;
733+ }
734+
677735std::map<std::string, std::string> CoreOptions::GetFieldsSequenceGroups () const {
678736 auto raw_options = impl_->raw_options ;
679737 std::map<std::string, std::string> sequence_groups;
0 commit comments