@@ -95,15 +95,6 @@ Result<std::unique_ptr<MergeFileSplitRead>> MergeFileSplitRead::Create(
9595 PAIMON_RETURN_NOT_OK (GenerateKeyValueReadSchema (
9696 *table_schema, core_options, context->GetReadSchema (), &value_schema, &read_schema,
9797 &key_comparator, &interval_partition_comparator, &user_defined_seq_comparator));
98- PAIMON_ASSIGN_OR_RAISE (std::unique_ptr<MergeFunction> merge_function,
99- PrimaryKeyTableUtils::CreateMergeFunction (
100- value_schema, table_schema->PrimaryKeys (), core_options));
101- if (core_options.NeedLookup () && core_options.GetMergeEngine () != MergeEngine::FIRST_ROW) {
102- // don't wrap first row, it is already OK
103- merge_function = std::make_unique<LookupMergeFunction>(std::move (merge_function));
104- }
105- auto merge_function_wrapper =
106- std::make_shared<ReducerMergeFunctionWrapper>(std::move (merge_function));
10798
10899 PAIMON_ASSIGN_OR_RAISE (std::shared_ptr<Predicate> predicate_for_keys,
109100 GenerateKeyPredicates (context->GetPredicate (), *table_schema));
@@ -120,7 +111,7 @@ Result<std::unique_ptr<MergeFileSplitRead>> MergeFileSplitRead::Create(
120111 path_factory, context,
121112 std::make_unique<SchemaManager>(core_options.GetFileSystem (), context->GetPath (),
122113 context->GetCoreOptions ().GetBranch ()),
123- key_arity, value_schema, read_schema, projection, merge_function_wrapper, key_comparator,
114+ key_arity, value_schema, read_schema, projection, key_comparator,
124115 interval_partition_comparator, user_defined_seq_comparator, predicate_for_keys, memory_pool,
125116 executor));
126117}
@@ -144,11 +135,34 @@ Result<std::unique_ptr<BatchReader>> MergeFileSplitRead::CreateReader(
144135 CreateNoMergeReader (data_split, /* only_filter_key=*/ data_split->IsStreaming (),
145136 data_file_path_factory));
146137 } else {
138+ if (!merge_function_wrapper_) {
139+ // In deletion vector mode, streaming data split or postpone bucket mode, we don't need
140+ // to use merge function. Even if the merge function in CoreOptions is not supported, it
141+ // should not affect data reading. So we create merge_function_wrapper_ lazily, to avoid
142+ // raise errors when creating MergeFileSplitRead at the beginning.
143+ PAIMON_ASSIGN_OR_RAISE (
144+ merge_function_wrapper_,
145+ CreateMergeFunctionWrapper (options_, context_->GetTableSchema (), value_schema_));
146+ }
147147 PAIMON_ASSIGN_OR_RAISE (batch_reader, CreateMergeReader (data_split, data_file_path_factory));
148148 }
149149 return std::make_unique<CompleteRowKindBatchReader>(std::move (batch_reader), pool_);
150150}
151151
152+ Result<std::shared_ptr<MergeFunctionWrapper<KeyValue>>>
153+ MergeFileSplitRead::CreateMergeFunctionWrapper (const CoreOptions& core_options,
154+ const std::shared_ptr<TableSchema>& table_schema,
155+ const std::shared_ptr<arrow::Schema>& value_schema) {
156+ PAIMON_ASSIGN_OR_RAISE (std::unique_ptr<MergeFunction> merge_function,
157+ PrimaryKeyTableUtils::CreateMergeFunction (
158+ value_schema, table_schema->PrimaryKeys (), core_options));
159+ if (core_options.NeedLookup () && core_options.GetMergeEngine () != MergeEngine::FIRST_ROW) {
160+ // don't wrap first row, it is already OK
161+ merge_function = std::make_unique<LookupMergeFunction>(std::move (merge_function));
162+ }
163+ return std::make_shared<ReducerMergeFunctionWrapper>(std::move (merge_function));
164+ }
165+
152166Result<std::unique_ptr<BatchReader>> MergeFileSplitRead::ApplyIndexAndDvReaderIfNeeded (
153167 std::unique_ptr<FileBatchReader>&& file_reader, const std::shared_ptr<DataFileMeta>& file,
154168 const std::shared_ptr<arrow::Schema>& data_schema,
@@ -223,7 +237,6 @@ MergeFileSplitRead::MergeFileSplitRead(
223237 std::unique_ptr<SchemaManager>&& schema_manager, int32_t key_arity,
224238 const std::shared_ptr<arrow::Schema>& value_schema,
225239 const std::shared_ptr<arrow::Schema>& read_schema, const std::vector<int32_t >& projection,
226- const std::shared_ptr<MergeFunctionWrapper<KeyValue>>& merge_function_wrapper,
227240 const std::shared_ptr<FieldsComparator>& key_comparator,
228241 const std::shared_ptr<FieldsComparator>& interval_partition_comparator,
229242 const std::shared_ptr<FieldsComparator>& user_defined_seq_comparator,
@@ -234,7 +247,6 @@ MergeFileSplitRead::MergeFileSplitRead(
234247 value_schema_(value_schema),
235248 read_schema_(read_schema),
236249 projection_(projection),
237- merge_function_wrapper_(merge_function_wrapper),
238250 key_comparator_(key_comparator),
239251 interval_partition_comparator_(interval_partition_comparator),
240252 user_defined_seq_comparator_(user_defined_seq_comparator),
0 commit comments