Skip to content

Commit b25b5b7

Browse files
[FLINK-34466] create KafkaDatasetFacet
Signed-off-by: Pawel Leszczynski <[email protected]>
1 parent ca14634 commit b25b5b7

28 files changed

+698
-691
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package org.apache.flink.connector.kafka.lineage;
2+
3+
import org.apache.flink.api.common.typeinfo.TypeInformation;
4+
import org.apache.flink.connector.kafka.source.KafkaPropertiesUtil;
5+
6+
import javax.annotation.Nullable;
7+
8+
import java.util.Objects;
9+
import java.util.Properties;
10+
11+
/** Default implementation of {@link KafkaDatasetFacet}. */
12+
public class DefaultKafkaDatasetFacet implements KafkaDatasetFacet {
13+
14+
public static final String KAFKA_FACET_NAME = "kafka";
15+
16+
private Properties properties;
17+
18+
@Nullable private final TypeInformation typeInformation;
19+
private final KafkaDatasetIdentifier topicIdentifier;
20+
21+
public DefaultKafkaDatasetFacet(
22+
KafkaDatasetIdentifier topicIdentifier,
23+
Properties properties,
24+
@Nullable TypeInformation typeInformation) {
25+
this(topicIdentifier, typeInformation);
26+
27+
this.properties = new Properties();
28+
KafkaPropertiesUtil.copyProperties(properties, this.properties);
29+
}
30+
31+
public DefaultKafkaDatasetFacet(
32+
KafkaDatasetIdentifier topicIdentifier, @Nullable TypeInformation typeInformation) {
33+
this.topicIdentifier = topicIdentifier;
34+
this.typeInformation = typeInformation;
35+
}
36+
37+
public void setProperties(Properties properties) {
38+
this.properties = new Properties();
39+
KafkaPropertiesUtil.copyProperties(properties, this.properties);
40+
}
41+
42+
public Properties getProperties() {
43+
return properties;
44+
}
45+
46+
public TypeInformation getTypeInformation() {
47+
return typeInformation;
48+
}
49+
50+
public KafkaDatasetIdentifier getTopicIdentifier() {
51+
return topicIdentifier;
52+
}
53+
54+
@Override
55+
public boolean equals(Object o) {
56+
if (this == o) {
57+
return true;
58+
}
59+
if (o == null || getClass() != o.getClass()) {
60+
return false;
61+
}
62+
DefaultKafkaDatasetFacet that = (DefaultKafkaDatasetFacet) o;
63+
return Objects.equals(properties, that.properties)
64+
&& Objects.equals(typeInformation, that.typeInformation)
65+
&& Objects.equals(topicIdentifier, that.topicIdentifier);
66+
}
67+
68+
@Override
69+
public int hashCode() {
70+
return Objects.hash(properties, typeInformation, topicIdentifier);
71+
}
72+
73+
@Override
74+
public String name() {
75+
return KAFKA_FACET_NAME;
76+
}
77+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package org.apache.flink.connector.kafka.lineage;
2+
3+
import javax.annotation.Nullable;
4+
5+
import java.util.Collections;
6+
import java.util.List;
7+
import java.util.Objects;
8+
import java.util.regex.Pattern;
9+
10+
/** Default implementation of {@link KafkaDatasetIdentifier}. */
11+
public class DefaultKafkaDatasetIdentifier implements KafkaDatasetIdentifier {
12+
13+
@Nullable private final List<String> topics;
14+
@Nullable private final Pattern topicPattern;
15+
16+
public DefaultKafkaDatasetIdentifier(List<String> fixedTopics, Pattern topicPattern) {
17+
this.topics = fixedTopics;
18+
this.topicPattern = topicPattern;
19+
}
20+
21+
public static DefaultKafkaDatasetIdentifier ofPattern(Pattern pattern) {
22+
return new DefaultKafkaDatasetIdentifier(Collections.emptyList(), pattern);
23+
}
24+
25+
public static DefaultKafkaDatasetIdentifier ofTopics(List<String> fixedTopics) {
26+
return new DefaultKafkaDatasetIdentifier(fixedTopics, null);
27+
}
28+
29+
@Nullable
30+
public List<String> getTopics() {
31+
return topics;
32+
}
33+
34+
@Nullable
35+
public Pattern getTopicPattern() {
36+
return topicPattern;
37+
}
38+
39+
@Override
40+
public boolean equals(Object o) {
41+
if (this == o) {
42+
return true;
43+
}
44+
if (o == null || getClass() != o.getClass()) {
45+
return false;
46+
}
47+
DefaultKafkaDatasetIdentifier that = (DefaultKafkaDatasetIdentifier) o;
48+
return Objects.equals(topics, that.topics)
49+
&& Objects.equals(topicPattern, that.topicPattern);
50+
}
51+
52+
@Override
53+
public int hashCode() {
54+
return Objects.hash(topics, topicPattern);
55+
}
56+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package org.apache.flink.connector.kafka.lineage;
2+
3+
import org.apache.flink.api.common.typeinfo.TypeInformation;
4+
import org.apache.flink.streaming.api.lineage.LineageDatasetFacet;
5+
6+
import java.util.Properties;
7+
8+
/** Facet definition to contain all Kafka specific information on Kafka sources and sinks. */
9+
public interface KafkaDatasetFacet extends LineageDatasetFacet {
10+
Properties getProperties();
11+
12+
TypeInformation getTypeInformation();
13+
14+
KafkaDatasetIdentifier getTopicIdentifier();
15+
16+
void setProperties(Properties properties);
17+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package org.apache.flink.connector.kafka.lineage;
2+
3+
import java.util.Optional;
4+
5+
/** Contains method to extract {@link KafkaDatasetFacet}. */
6+
public interface KafkaDatasetFacetProvider {
7+
8+
/**
9+
* Returns a Kafka dataset facet or `Optional.empty` in case an implementing class is not able
10+
* to identify a dataset.
11+
*
12+
* @return
13+
*/
14+
Optional<KafkaDatasetFacet> getKafkaDatasetFacet();
15+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package org.apache.flink.connector.kafka.lineage;
2+
3+
import javax.annotation.Nullable;
4+
5+
import java.util.List;
6+
import java.util.regex.Pattern;
7+
8+
/** Kafka dataset identifier which can contain either a list of topics or a topic pattern. */
9+
public interface KafkaDatasetIdentifier {
10+
@Nullable
11+
List<String> getTopics();
12+
13+
@Nullable
14+
Pattern getTopicPattern();
15+
16+
/**
17+
* Assigns lineage dataset's name which is topic pattern if it is present or comma separated
18+
* list of topics.
19+
*
20+
* @return
21+
*/
22+
default String toLineageName() {
23+
if (getTopicPattern() != null) {
24+
return getTopicPattern().toString();
25+
}
26+
return String.join(",", getTopics());
27+
}
28+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package org.apache.flink.connector.kafka.lineage;
2+
3+
import java.util.Optional;
4+
5+
/** Contains method which allows extracting topic identifier. */
6+
public interface KafkaDatasetIdentifierProvider {
7+
8+
/**
9+
* Gets Kafka dataset identifier or empty in case a class implementing is not able to extract
10+
* dataset identifier.
11+
*
12+
* @return
13+
*/
14+
Optional<DefaultKafkaDatasetIdentifier> getDatasetIdentifier();
15+
}

flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/lineage/LineageFacetProvider.java

Lines changed: 0 additions & 19 deletions
This file was deleted.

flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/lineage/LineageUtil.java

Lines changed: 10 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -20,89 +20,30 @@
2020
package org.apache.flink.connector.kafka.lineage;
2121

2222
import org.apache.flink.api.connector.source.Boundedness;
23-
import org.apache.flink.connector.kafka.lineage.facets.KafkaTopicListFacet;
24-
import org.apache.flink.connector.kafka.lineage.facets.KafkaTopicPatternFacet;
2523
import org.apache.flink.streaming.api.lineage.LineageDataset;
2624
import org.apache.flink.streaming.api.lineage.LineageDatasetFacet;
2725
import org.apache.flink.streaming.api.lineage.LineageVertex;
2826
import org.apache.flink.streaming.api.lineage.SourceLineageVertex;
2927

30-
import java.util.ArrayList;
3128
import java.util.Collection;
3229
import java.util.Collections;
3330
import java.util.List;
3431
import java.util.Map;
35-
import java.util.Optional;
3632
import java.util.Properties;
3733
import java.util.stream.Collectors;
3834

39-
/** Utility class with useful methods for managing dataset facets. */
35+
/** Utility class with useful methods for managing lineage objects. */
4036
public class LineageUtil {
4137

4238
private static final String KAFKA_DATASET_PREFIX = "kafka://";
4339
private static final String COMMA = ",";
4440
private static final String SEMICOLON = ";";
4541

46-
/**
47-
* Loads facet from any object implementing @link{DatasetFacetProvider} interface.
48-
*
49-
* @param object
50-
* @return
51-
*/
52-
public static Collection<LineageDatasetFacet> facetsFrom(Object object) {
53-
return Optional.of(object)
54-
.filter(LineageFacetProvider.class::isInstance)
55-
.map(LineageFacetProvider.class::cast)
56-
.map(LineageFacetProvider::getDatasetFacets)
57-
.orElse(Collections.emptyList());
58-
}
59-
60-
/**
61-
* Creates dataset from a list of facets. Uses {@link KafkaTopicListFacet} to extract dataset
62-
* name from. Dataset per each element of topic list is created
63-
*
64-
* @param facets
65-
* @return
66-
*/
67-
public static Collection<LineageDataset> datasetsFrom(
68-
String namespace, Collection<LineageDatasetFacet> facets) {
69-
// Check if topic list facet is available -> if so explode the list of facets
70-
Optional<KafkaTopicListFacet> topicList =
71-
facets.stream()
72-
.filter(KafkaTopicListFacet.class::isInstance)
73-
.map(KafkaTopicListFacet.class::cast)
74-
.findAny();
75-
76-
List<LineageDataset> datasets = new ArrayList<>();
77-
78-
// Explode list of other facets
79-
if (topicList.isPresent()) {
80-
List<LineageDatasetFacet> facetsWithoutTopicList =
81-
facets.stream().filter(f -> !f.equals(topicList)).collect(Collectors.toList());
82-
83-
datasets.addAll(
84-
topicList.get().topics.stream()
85-
.map(t -> datasetOf(namespace, t, facetsWithoutTopicList))
86-
.collect(Collectors.toList()));
87-
}
88-
89-
// Check if topic pattern is present
90-
// If so topic pattern will be used as a dataset name
91-
datasets.addAll(
92-
facets.stream()
93-
.filter(KafkaTopicPatternFacet.class::isInstance)
94-
.map(KafkaTopicPatternFacet.class::cast)
95-
.map(f -> datasetOf(namespace, f.pattern.toString(), facets))
96-
.collect(Collectors.toList()));
97-
return datasets;
98-
}
99-
100-
private static LineageDataset datasetOf(
101-
String namespace, String name, Collection<LineageDatasetFacet> facets) {
42+
public static LineageDataset datasetOf(String namespace, KafkaDatasetFacet kafkaDatasetFacet) {
10243
return new LineageDataset() {
10344
@Override
10445
public String name() {
105-
return name;
46+
return kafkaDatasetFacet.getTopicIdentifier().toLineageName();
10647
}
10748

10849
@Override
@@ -112,16 +53,19 @@ public String namespace() {
11253

11354
@Override
11455
public Map<String, LineageDatasetFacet> facets() {
115-
return facets.stream()
116-
.distinct()
117-
.collect(Collectors.toMap(LineageDatasetFacet::name, item -> item));
56+
return Collections.singletonMap(
57+
DefaultKafkaDatasetFacet.KAFKA_FACET_NAME, kafkaDatasetFacet);
11858
}
11959
};
12060
}
12161

122-
public static String datasetNamespaceOf(Properties properties) {
62+
public static String namespaceOf(Properties properties) {
12363
String bootstrapServers = properties.getProperty("bootstrap.servers");
12464

65+
if (bootstrapServers == null) {
66+
return KAFKA_DATASET_PREFIX;
67+
}
68+
12569
if (bootstrapServers.contains(COMMA)) {
12670
bootstrapServers = bootstrapServers.split(COMMA)[0];
12771
} else if (bootstrapServers.contains(SEMICOLON)) {

0 commit comments

Comments
 (0)