Skip to content

Commit 91c0fb1

Browse files
committed
Lookup join custom partition
1 parent 68a84fd commit 91c0fb1

File tree

5 files changed

+271
-52
lines changed

5 files changed

+271
-52
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.flink.table.connector.source.lookup.cache;
20+
21+
import org.apache.flink.table.connector.source.LookupTableSource.LookupRuntimeProvider;
22+
import org.apache.flink.table.partitioner.RowDataCustomPartitioner;
23+
24+
/** Missing doc */
25+
public interface PartitionedLookupProvider extends LookupRuntimeProvider {
26+
/**
27+
* Build a {@link PartitionedLookupProvider} from the specified {@link LookupRuntimeProvider}.
28+
*/
29+
static PartitionedLookupProvider of(
30+
RowDataCustomPartitioner partitioner, LookupRuntimeProvider provider) {
31+
return new PartitionedLookupProvider() {
32+
@Override
33+
public RowDataCustomPartitioner getPartitioner() {
34+
return partitioner;
35+
}
36+
37+
@Override
38+
public LookupRuntimeProvider getProvider() {
39+
return provider;
40+
}
41+
};
42+
}
43+
44+
RowDataCustomPartitioner getPartitioner();
45+
46+
LookupRuntimeProvider getProvider();
47+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.flink.table.partitioner;
20+
21+
import org.apache.flink.api.common.functions.Function;
22+
import org.apache.flink.table.data.RowData;
23+
24+
public interface RowDataCustomPartitioner extends java.io.Serializable, Function {
25+
int partition(RowData record, int numPartitions);
26+
}

flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/nodes/exec/common/CommonExecLookupJoin.java

Lines changed: 62 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.apache.flink.streaming.api.operators.SimpleOperatorFactory;
3030
import org.apache.flink.streaming.api.operators.StreamOperatorFactory;
3131
import org.apache.flink.streaming.api.operators.async.AsyncWaitOperatorFactory;
32+
import org.apache.flink.streaming.api.transformations.PartitionTransformation;
3233
import org.apache.flink.table.api.TableException;
3334
import org.apache.flink.table.catalog.DataTypeFactory;
3435
import org.apache.flink.table.connector.ChangelogMode;
@@ -40,6 +41,7 @@
4041
import org.apache.flink.table.functions.TableFunction;
4142
import org.apache.flink.table.functions.UserDefinedFunction;
4243
import org.apache.flink.table.functions.UserDefinedFunctionHelper;
44+
import org.apache.flink.table.partitioner.RowDataCustomPartitioner;
4345
import org.apache.flink.table.planner.calcite.FlinkTypeFactory;
4446
import org.apache.flink.table.planner.codegen.CodeGeneratorContext;
4547
import org.apache.flink.table.planner.codegen.LookupJoinCodeGenerator;
@@ -55,6 +57,7 @@
5557
import org.apache.flink.table.planner.plan.schema.LegacyTableSourceTable;
5658
import org.apache.flink.table.planner.plan.schema.TableSourceTable;
5759
import org.apache.flink.table.planner.plan.utils.LookupJoinUtil;
60+
import org.apache.flink.table.planner.plan.utils.LookupJoinUtil.LookupFunctionAndPartitioner;
5861
import org.apache.flink.table.planner.utils.JavaScalaConversionUtil;
5962
import org.apache.flink.table.planner.utils.ShortcutUtils;
6063
import org.apache.flink.table.runtime.collector.ListenableCollector;
@@ -69,6 +72,7 @@
6972
import org.apache.flink.table.runtime.operators.join.lookup.LookupJoinRunner;
7073
import org.apache.flink.table.runtime.operators.join.lookup.LookupJoinWithCalcRunner;
7174
import org.apache.flink.table.runtime.operators.join.lookup.ResultRetryStrategy;
75+
import org.apache.flink.table.runtime.partitioner.RowDataCustomPartitionerWrapper;
7276
import org.apache.flink.table.runtime.types.PlannerTypeUtils;
7377
import org.apache.flink.table.runtime.types.TypeInfoDataTypeConverter;
7478
import org.apache.flink.table.runtime.typeutils.InternalSerializers;
@@ -98,6 +102,7 @@
98102
import java.util.Optional;
99103

100104
import static org.apache.flink.table.planner.calcite.FlinkTypeFactory.toLogicalType;
105+
import static org.apache.flink.table.planner.plan.nodes.exec.common.CommonExecSink.PARTITIONER_TRANSFORMATION;
101106
import static org.apache.flink.table.planner.utils.ShortcutUtils.unwrapTypeFactory;
102107
import static org.apache.flink.util.Preconditions.checkArgument;
103108
import static org.apache.flink.util.Preconditions.checkNotNull;
@@ -252,23 +257,28 @@ protected Transformation<RowData> createJoinTransformation(
252257
ResultRetryStrategy retryStrategy =
253258
retryOptions != null ? retryOptions.toRetryStrategy() : null;
254259

255-
UserDefinedFunction lookupFunction =
260+
Transformation<RowData> inputTransformation =
261+
(Transformation<RowData>) inputEdge.translateToPlan(planner);
262+
263+
LookupFunctionAndPartitioner lookupFunctionAndPartitioner =
256264
LookupJoinUtil.getLookupFunction(
257265
temporalTable,
258266
lookupKeys.keySet(),
259267
planner.getFlinkContext().getClassLoader(),
260268
isAsyncEnabled,
261269
retryStrategy);
270+
UserDefinedFunction lookupFunction = lookupFunctionAndPartitioner.getUserDefinedFunction();
271+
Optional<RowDataCustomPartitioner> partitioner =
272+
lookupFunctionAndPartitioner.getPartitioner();
262273
UserDefinedFunctionHelper.prepareInstance(config, lookupFunction);
263274

264275
boolean isLeftOuterJoin = joinType == FlinkJoinType.LEFT;
265276
if (isAsyncEnabled) {
266277
assert lookupFunction instanceof AsyncTableFunction;
267278
}
268279

269-
Transformation<RowData> inputTransformation =
270-
(Transformation<RowData>) inputEdge.translateToPlan(planner);
271-
280+
// upsert materialize mod expect that input stream is partitioned by the look-up key. We
281+
// must trade off between correctness and performance.
272282
if (upsertMaterialize) {
273283
// upsertMaterialize only works on sync lookup mode, async lookup is unsupported.
274284
assert !isAsyncEnabled && !inputChangelogMode.containsOnly(RowKind.INSERT);
@@ -286,46 +296,57 @@ protected Transformation<RowData> createJoinTransformation(
286296
isLeftOuterJoin,
287297
planner.getExecEnv().getConfig().isObjectReuseEnabled(),
288298
lookupKeyContainsPrimaryKey);
299+
}
300+
301+
StreamOperatorFactory<RowData> operatorFactory;
302+
if (isAsyncEnabled) {
303+
operatorFactory =
304+
createAsyncLookupJoin(
305+
temporalTable,
306+
config,
307+
planner.getFlinkContext().getClassLoader(),
308+
lookupKeys,
309+
(AsyncTableFunction<Object>) lookupFunction,
310+
planner.createRelBuilder(),
311+
inputRowType,
312+
tableSourceRowType,
313+
resultRowType,
314+
isLeftOuterJoin,
315+
asyncLookupOptions);
289316
} else {
290-
StreamOperatorFactory<RowData> operatorFactory;
291-
if (isAsyncEnabled) {
292-
operatorFactory =
293-
createAsyncLookupJoin(
294-
temporalTable,
295-
config,
296-
planner.getFlinkContext().getClassLoader(),
297-
lookupKeys,
298-
(AsyncTableFunction<Object>) lookupFunction,
299-
planner.createRelBuilder(),
300-
inputRowType,
301-
tableSourceRowType,
302-
resultRowType,
303-
isLeftOuterJoin,
304-
asyncLookupOptions);
305-
} else {
306-
operatorFactory =
307-
createSyncLookupJoin(
308-
temporalTable,
309-
config,
310-
planner.getFlinkContext().getClassLoader(),
311-
lookupKeys,
312-
(TableFunction<Object>) lookupFunction,
313-
planner.createRelBuilder(),
314-
inputRowType,
315-
tableSourceRowType,
316-
resultRowType,
317-
isLeftOuterJoin,
318-
planner.getExecEnv().getConfig().isObjectReuseEnabled());
319-
}
317+
operatorFactory =
318+
createSyncLookupJoin(
319+
temporalTable,
320+
config,
321+
planner.getFlinkContext().getClassLoader(),
322+
lookupKeys,
323+
(TableFunction<Object>) lookupFunction,
324+
planner.createRelBuilder(),
325+
inputRowType,
326+
tableSourceRowType,
327+
resultRowType,
328+
isLeftOuterJoin,
329+
planner.getExecEnv().getConfig().isObjectReuseEnabled());
330+
}
320331

321-
return ExecNodeUtil.createOneInputTransformation(
322-
inputTransformation,
323-
createTransformationMeta(LOOKUP_JOIN_TRANSFORMATION, config),
324-
operatorFactory,
325-
InternalTypeInfo.of(resultRowType),
326-
inputTransformation.getParallelism(),
327-
false);
332+
if (partitioner.isPresent()) {
333+
Transformation<RowData> partitionedTransform =
334+
new PartitionTransformation<>(
335+
inputTransformation,
336+
new RowDataCustomPartitionerWrapper(partitioner.get()));
337+
createTransformationMeta(
338+
PARTITIONER_TRANSFORMATION, "Partitioner", "Partitioner", config)
339+
.fill(partitionedTransform);
340+
partitionedTransform.setParallelism(inputTransformation.getParallelism(), false);
341+
inputTransformation = partitionedTransform;
328342
}
343+
return ExecNodeUtil.createOneInputTransformation(
344+
inputTransformation,
345+
createTransformationMeta(LOOKUP_JOIN_TRANSFORMATION, config),
346+
operatorFactory,
347+
InternalTypeInfo.of(resultRowType),
348+
inputTransformation.getParallelism(),
349+
false);
329350
}
330351

331352
protected abstract Transformation<RowData> createSyncLookupJoinWithState(

0 commit comments

Comments
 (0)