1
- package org .apache .flink .cdc .connectors .mongodb . source ;
1
+ package org .apache .flink .cdc .connectors .mongodb ;
2
2
3
- import com .mongodb .client .MongoCollection ;
4
- import com .mongodb .client .MongoDatabase ;
5
- import com .mongodb .client .model .Filters ;
6
- import com .mongodb .client .model .Updates ;
7
3
import org .apache .flink .api .common .typeutils .TypeSerializer ;
4
+ import org .apache .flink .cdc .connectors .mongodb .source .MongoDBSourceTestBase ;
5
+ import org .apache .flink .cdc .debezium .JsonDebeziumDeserializationSchema ;
8
6
import org .apache .flink .core .execution .JobClient ;
9
7
import org .apache .flink .metrics .Gauge ;
10
8
import org .apache .flink .metrics .Metric ;
14
12
import org .apache .flink .streaming .api .datastream .DataStream ;
15
13
import org .apache .flink .streaming .api .datastream .DataStreamSource ;
16
14
import org .apache .flink .streaming .api .environment .StreamExecutionEnvironment ;
15
+ import org .apache .flink .streaming .api .functions .source .SourceFunction ;
17
16
import org .apache .flink .streaming .api .operators .collect .CollectResultIterator ;
18
17
import org .apache .flink .streaming .api .operators .collect .CollectSinkOperator ;
19
18
import org .apache .flink .streaming .api .operators .collect .CollectSinkOperatorFactory ;
20
19
import org .apache .flink .streaming .api .operators .collect .CollectStreamSink ;
20
+
21
+ import com .mongodb .client .MongoCollection ;
22
+ import com .mongodb .client .MongoDatabase ;
23
+ import com .mongodb .client .model .Filters ;
24
+ import com .mongodb .client .model .Updates ;
21
25
import org .bson .Document ;
22
26
import org .junit .Test ;
23
27
import org .junit .runner .RunWith ;
24
28
import org .junit .runners .Parameterized ;
25
- import org .apache .flink .streaming .api .functions .source .SourceFunction ;
26
- import com .ververica .cdc .debezium .JsonDebeziumDeserializationSchema ;
27
- import com .ververica .cdc .connectors .mongodb .MongoDBSource ;
28
29
29
30
import java .time .Duration ;
30
- import java .util .Arrays ;
31
- import java .util .List ;
32
- import java .util .Map ;
33
- import java .util .UUID ;
31
+ import java .util .*;
34
32
import java .util .stream .Collectors ;
35
33
import java .util .stream .Stream ;
36
34
40
38
import static org .junit .Assert .assertEquals ;
41
39
import static org .junit .Assert .assertTrue ;
42
40
43
-
44
41
/** IT tests for {@link MongoDBSource}. */
45
42
@ RunWith (Parameterized .class )
46
43
public class MongoDBMetricCase extends MongoDBSourceTestBase {
@@ -60,32 +57,38 @@ public void testSourceMetrics() throws Exception {
60
57
String customerDatabase = mongoContainer .executeCommandFileInSeparateDatabase ("customer" );
61
58
StreamExecutionEnvironment env = StreamExecutionEnvironment .getExecutionEnvironment ();
62
59
env .setParallelism (1 );
63
- SourceFunction <String > sourceFunction = MongoDBSource .<String >builder ()
64
- .hosts (mongoContainer .getHostAndPort ())
65
- .username (FLINK_USER )
66
- .password (FLINK_USER_PASSWORD )
67
- .databaseList (customerDatabase ) // 设置捕获的数据库,支持正则表达式
68
- .collectionList (getCollectionNameRegex (customerDatabase , new String [] {"customers" })) //设置捕获的集合,支持正则表达式
69
- .deserializer (new JsonDebeziumDeserializationSchema ())
70
- .build ();
71
- DataStreamSource <String > stream =
72
- env .addSource (sourceFunction , "MongoDB CDC Source" );
60
+ env .enableCheckpointing (200L );
61
+ SourceFunction <String > sourceFunction =
62
+ MongoDBSource .<String >builder ()
63
+ .hosts (mongoContainer .getHostAndPort ())
64
+ .username (FLINK_USER )
65
+ .password (FLINK_USER_PASSWORD )
66
+ .databaseList (customerDatabase ) // 设置捕获的数据库,支持正则表达式
67
+ .collectionList (
68
+ getCollectionNameRegex (
69
+ customerDatabase ,
70
+ new String [] {"customers" })) // 设置捕获的集合,支持正则表达式
71
+ .deserializer (new JsonDebeziumDeserializationSchema ())
72
+ .build ();
73
+ DataStreamSource <String > stream = env .addSource (sourceFunction , "MongoDB" );
73
74
CollectResultIterator <String > iterator = addCollector (env , stream );
74
75
JobClient jobClient = env .executeAsync ();
75
76
iterator .setJobClient (jobClient );
76
77
77
- // ---------------------------- Snapshot phase ------------------------------
78
- // Wait until we receive all 21 snapshot records
78
+ // // ---------------------------- Snapshot phase ------------------------------
79
+ // // Wait until we receive all 21 snapshot records
79
80
int numSnapshotRecordsExpected = 21 ;
80
81
int numSnapshotRecordsReceived = 0 ;
82
+
81
83
while (numSnapshotRecordsReceived < numSnapshotRecordsExpected && iterator .hasNext ()) {
82
84
iterator .next ();
83
85
numSnapshotRecordsReceived ++;
84
86
}
85
87
86
88
// Check metrics
87
89
List <OperatorMetricGroup > metricGroups =
88
- metricReporter .findOperatorMetricGroups (jobClient .getJobID (), "MongoDB CDC Source" );
90
+ metricReporter .findOperatorMetricGroups (jobClient .getJobID (), "MongoDB" );
91
+
89
92
// There should be only 1 parallelism of source, so it's safe to get the only group
90
93
OperatorMetricGroup group = metricGroups .get (0 );
91
94
Map <String , Metric > metrics = metricReporter .getMetricsByGroup (group );
@@ -100,51 +103,48 @@ public void testSourceMetrics() throws Exception {
100
103
Gauge <Long > currentEmitEventTimeLag =
101
104
(Gauge <Long >) metrics .get (MetricNames .CURRENT_EMIT_EVENT_TIME_LAG );
102
105
assertEquals (
103
- // InternalSourceReaderMetricGroup.UNDEFINED,
104
- -1L ,
106
+ InternalSourceReaderMetricGroup .UNDEFINED ,
105
107
(long ) currentEmitEventTimeLag .getValue ());
106
-
107
108
// currentFetchEventTimeLag should be UNDEFINED during snapshot phase
108
- // assertTrue(metrics.containsKey(MetricNames.CURRENT_FETCH_EVENT_TIME_LAG));
109
- // Gauge<Long> currentFetchEventTimeLag =
110
- // (Gauge<Long>) metrics.get(MetricNames.CURRENT_FETCH_EVENT_TIME_LAG);
111
- // assertEquals(
112
- // -1L, (long) currentFetchEventTimeLag.getValue());
113
-
114
- // // sourceIdleTime should be positive (we can't know the exact value)
115
- // assertTrue(metrics.containsKey(MetricNames.SOURCE_IDLE_TIME));
116
- // Gauge<Long> sourceIdleTime = (Gauge<Long>) metrics.get(MetricNames.SOURCE_IDLE_TIME);
117
- // assertTrue(sourceIdleTime.getValue() > 0);
118
- // assertTrue(sourceIdleTime.getValue() < TIMEOUT.toMillis());
119
- //
120
- // // --------------------------------- Binlog phase -----------------------------
121
- // makeFirstPartChangeStreamEvents(
122
- // mongodbClient.getDatabase(customerDatabase), "customers");
123
- // // Wait until we receive 4 changes made above
124
- // int numBinlogRecordsExpected = 4;
125
- // int numBinlogRecordsReceived = 0;
126
- // while (numBinlogRecordsReceived < numBinlogRecordsExpected && iterator.hasNext()) {
127
- // iterator.next();
128
- // numBinlogRecordsReceived++;
129
- // }
130
- //
131
- // // Check metrics
132
- // // numRecordsOut
133
- // assertEquals(
134
- // numSnapshotRecordsExpected + numBinlogRecordsExpected,
135
- // group.getIOMetricGroup().getNumRecordsOutCounter().getCount());
136
- //
137
- // // currentEmitEventTimeLag should be reasonably positive (we can't know the exact value)
138
- // assertTrue(currentEmitEventTimeLag.getValue() > 0);
139
- // assertTrue(currentEmitEventTimeLag.getValue() < TIMEOUT.toMillis());
140
- //
141
- // // currentEmitEventTimeLag should be reasonably positive (we can't know the exact value)
142
- // assertTrue(currentFetchEventTimeLag.getValue() > 0);
143
- // assertTrue(currentFetchEventTimeLag.getValue() < TIMEOUT.toMillis());
144
- //
145
- // // currentEmitEventTimeLag should be reasonably positive (we can't know the exact value)
146
- // assertTrue(sourceIdleTime.getValue() > 0);
147
- // assertTrue(sourceIdleTime.getValue() < TIMEOUT.toMillis());
109
+ assertTrue (metrics .containsKey (MetricNames .CURRENT_FETCH_EVENT_TIME_LAG ));
110
+ Gauge <Long > currentFetchEventTimeLag =
111
+ (Gauge <Long >) metrics .get (MetricNames .CURRENT_FETCH_EVENT_TIME_LAG );
112
+ assertEquals (
113
+ InternalSourceReaderMetricGroup .UNDEFINED ,
114
+ (long ) currentFetchEventTimeLag .getValue ());
115
+ // sourceIdleTime should be positive (we can't know the exact value)
116
+ assertTrue (metrics .containsKey (MetricNames .SOURCE_IDLE_TIME ));
117
+ Gauge <Long > sourceIdleTime = (Gauge <Long >) metrics .get (MetricNames .SOURCE_IDLE_TIME );
118
+ assertTrue (sourceIdleTime .getValue () > 0 );
119
+ assertTrue (sourceIdleTime .getValue () < TIMEOUT .toMillis ());
120
+
121
+ // --------------------------------- Binlog phase -----------------------------
122
+ makeFirstPartChangeStreamEvents (mongodbClient .getDatabase (customerDatabase ), "customers" );
123
+ // Wait until we receive 4 changes made above
124
+ int numBinlogRecordsExpected = 4 ;
125
+ int numBinlogRecordsReceived = 0 ;
126
+ while (numBinlogRecordsReceived < numBinlogRecordsExpected && iterator .hasNext ()) {
127
+ iterator .next ();
128
+ numBinlogRecordsReceived ++;
129
+ }
130
+
131
+ // Check metrics
132
+ // numRecordsOut
133
+ assertEquals (
134
+ numSnapshotRecordsExpected + numBinlogRecordsExpected ,
135
+ group .getIOMetricGroup ().getNumRecordsOutCounter ().getCount ());
136
+
137
+ // currentEmitEventTimeLag should be reasonably positive (we can't know the exact value)
138
+ assertTrue (currentEmitEventTimeLag .getValue () > 0 );
139
+ assertTrue (currentEmitEventTimeLag .getValue () < TIMEOUT .toMillis ());
140
+
141
+ // currentEmitEventTimeLag should be reasonably positive (we can't know the exact value)
142
+ assertTrue (currentFetchEventTimeLag .getValue () > 0 );
143
+ assertTrue (currentFetchEventTimeLag .getValue () < TIMEOUT .toMillis ());
144
+
145
+ // currentEmitEventTimeLag should be reasonably positive (we can't know the exact value)
146
+ assertTrue (sourceIdleTime .getValue () > 0 );
147
+ assertTrue (sourceIdleTime .getValue () < TIMEOUT .toMillis ());
148
148
149
149
jobClient .cancel ().get ();
150
150
iterator .close ();
@@ -153,7 +153,7 @@ public void testSourceMetrics() throws Exception {
153
153
private <T > CollectResultIterator <T > addCollector (
154
154
StreamExecutionEnvironment env , DataStream <T > stream ) {
155
155
TypeSerializer <T > serializer =
156
- stream .getTransformation ().getOutputType ().createSerializer (env .getConfig ());
156
+ stream .getTransformation ().getOutputType ().createSerializer (env .getConfig ()); //
157
157
String accumulatorName = "dataStreamCollect_" + UUID .randomUUID ();
158
158
CollectSinkOperatorFactory <T > factory =
159
159
new CollectSinkOperatorFactory <>(serializer , accumulatorName );
0 commit comments