Skip to content

Commit a53d809

Browse files
authored
[Improve][Format] Support complex data type parse of debezium_json (#8330)
1 parent 287b8c8 commit a53d809

File tree

8 files changed

+551
-58
lines changed

8 files changed

+551
-58
lines changed

seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/DateUtils.java

+32-1
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,15 @@
2525
import java.util.Map;
2626
import java.util.regex.Pattern;
2727

28+
import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE;
29+
import static java.time.format.DateTimeFormatter.ISO_LOCAL_TIME;
30+
import static java.time.format.DateTimeFormatter.ISO_OFFSET_TIME;
2831
import static java.time.temporal.ChronoField.DAY_OF_MONTH;
32+
import static java.time.temporal.ChronoField.HOUR_OF_DAY;
33+
import static java.time.temporal.ChronoField.MINUTE_OF_HOUR;
2934
import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
35+
import static java.time.temporal.ChronoField.NANO_OF_SECOND;
36+
import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
3037
import static java.time.temporal.ChronoField.YEAR;
3138

3239
public class DateUtils {
@@ -49,7 +56,10 @@ public class DateUtils {
4956
Pattern.compile("\\d{4}年\\d{2}月\\d{2}日"),
5057
Pattern.compile("\\d{4}/\\d{2}/\\d{2}"),
5158
Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}"),
52-
Pattern.compile("\\d{8}")
59+
Pattern.compile("\\d{8}"),
60+
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d{1,9})?Z"),
61+
Pattern.compile("\\d{2}:\\d{2}:\\d{2}\\+\\d{2}:\\d{2}"),
62+
Pattern.compile("\\d{2}:\\d{2}:\\d{2}(\\.\\d{1,9})?"),
5363
};
5464

5565
public static final Map<Pattern, DateTimeFormatter> DATE_FORMATTER_MAP = new HashMap();
@@ -116,6 +126,27 @@ public class DateUtils {
116126
.appendValue(DAY_OF_MONTH, 2)
117127
.toFormatter())
118128
.toFormatter());
129+
DATE_FORMATTER_MAP.put(
130+
PATTERN_ARRAY[5],
131+
new DateTimeFormatterBuilder()
132+
.parseCaseInsensitive()
133+
.append(ISO_LOCAL_DATE)
134+
.appendLiteral('T')
135+
.append(
136+
new DateTimeFormatterBuilder()
137+
.appendValue(HOUR_OF_DAY, 2)
138+
.appendLiteral(':')
139+
.appendValue(MINUTE_OF_HOUR, 2)
140+
.optionalStart()
141+
.appendLiteral(':')
142+
.appendValue(SECOND_OF_MINUTE, 2)
143+
.optionalStart()
144+
.appendFraction(NANO_OF_SECOND, 0, 9, true)
145+
.appendLiteral('Z')
146+
.toFormatter())
147+
.toFormatter());
148+
DATE_FORMATTER_MAP.put(PATTERN_ARRAY[6], ISO_OFFSET_TIME);
149+
DATE_FORMATTER_MAP.put(PATTERN_ARRAY[7], ISO_LOCAL_TIME);
119150
}
120151

121152
/**

seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/debezium/DebeziumJsonDeserializationSchema.java

+17-29
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,14 @@
3838
public class DebeziumJsonDeserializationSchema implements DeserializationSchema<SeaTunnelRow> {
3939
private static final long serialVersionUID = 1L;
4040

41+
private static final String OP_KEY = "op";
4142
private static final String OP_READ = "r"; // snapshot read
4243
private static final String OP_CREATE = "c"; // insert
4344
private static final String OP_UPDATE = "u"; // update
4445
private static final String OP_DELETE = "d"; // delete
46+
private static final String DATA_PAYLOAD = "payload";
47+
private static final String DATA_BEFORE = "before";
48+
private static final String DATA_AFTER = "after";
4549

4650
private static final String REPLICA_IDENTITY_EXCEPTION =
4751
"The \"before\" field of %s operation is null, "
@@ -105,21 +109,21 @@ private void deserializeMessage(
105109
}
106110

107111
try {
108-
JsonNode payload = getPayload(convertBytes(message));
109-
String op = payload.get("op").asText();
112+
JsonNode payload = getPayload(jsonDeserializer.deserializeToJsonNode(message));
113+
String op = payload.get(OP_KEY).asText();
110114

111115
switch (op) {
112116
case OP_CREATE:
113117
case OP_READ:
114-
SeaTunnelRow insert = convertJsonNode(payload.get("after"));
118+
SeaTunnelRow insert = debeziumRowConverter.parse(payload.get(DATA_AFTER));
115119
insert.setRowKind(RowKind.INSERT);
116120
if (tablePath != null) {
117121
insert.setTableId(tablePath.toString());
118122
}
119123
out.collect(insert);
120124
break;
121125
case OP_UPDATE:
122-
SeaTunnelRow before = convertJsonNode(payload.get("before"));
126+
SeaTunnelRow before = debeziumRowConverter.parse(payload.get(DATA_BEFORE));
123127
if (before == null) {
124128
throw new IllegalStateException(
125129
String.format(REPLICA_IDENTITY_EXCEPTION, "UPDATE"));
@@ -130,7 +134,7 @@ private void deserializeMessage(
130134
}
131135
out.collect(before);
132136

133-
SeaTunnelRow after = convertJsonNode(payload.get("after"));
137+
SeaTunnelRow after = debeziumRowConverter.parse(payload.get(DATA_AFTER));
134138
after.setRowKind(RowKind.UPDATE_AFTER);
135139

136140
if (tablePath != null) {
@@ -139,10 +143,10 @@ private void deserializeMessage(
139143
out.collect(after);
140144
break;
141145
case OP_DELETE:
142-
SeaTunnelRow delete = convertJsonNode(payload.get("before"));
146+
SeaTunnelRow delete = debeziumRowConverter.parse(payload.get(DATA_BEFORE));
143147
if (delete == null) {
144148
throw new IllegalStateException(
145-
String.format(REPLICA_IDENTITY_EXCEPTION, "UPDATE"));
149+
String.format(REPLICA_IDENTITY_EXCEPTION, "DELETE"));
146150
}
147151
delete.setRowKind(RowKind.DELETE);
148152
if (tablePath != null) {
@@ -153,39 +157,23 @@ private void deserializeMessage(
153157
default:
154158
throw new IllegalStateException(format("Unknown operation type '%s'.", op));
155159
}
156-
} catch (RuntimeException e) {
160+
} catch (Exception e) {
157161
// a big try catch to protect the processing.
158162
if (!ignoreParseErrors) {
159163
throw CommonError.jsonOperationError(FORMAT, new String(message), e);
160164
}
161165
}
162166
}
163167

164-
private JsonNode getPayload(JsonNode jsonNode) {
165-
if (debeziumEnabledSchema) {
166-
return jsonNode.get("payload");
167-
}
168-
return jsonNode;
169-
}
170-
171-
private JsonNode convertBytes(byte[] message) {
172-
try {
173-
return jsonDeserializer.deserializeToJsonNode(message);
174-
} catch (IOException t) {
175-
throw CommonError.jsonOperationError(FORMAT, new String(message), t);
176-
}
177-
}
178-
179-
private SeaTunnelRow convertJsonNode(JsonNode root) {
180-
return debeziumRowConverter.serializeValue(root);
181-
}
182-
183168
@Override
184169
public SeaTunnelDataType<SeaTunnelRow> getProducedType() {
185170
return this.rowType;
186171
}
187172

188-
private static SeaTunnelRowType createJsonRowType(SeaTunnelRowType databaseSchema) {
189-
return databaseSchema;
173+
private JsonNode getPayload(JsonNode jsonNode) {
174+
if (debeziumEnabledSchema) {
175+
return jsonNode.get(DATA_PAYLOAD);
176+
}
177+
return jsonNode;
190178
}
191179
}

seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/debezium/DebeziumRowConverter.java

+100-28
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
2727
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
2828
import org.apache.seatunnel.api.table.type.SqlType;
29+
import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated;
30+
import org.apache.seatunnel.common.utils.DateUtils;
31+
import org.apache.seatunnel.format.json.exception.SeaTunnelJsonFormatException;
2932

3033
import java.io.IOException;
3134
import java.io.Serializable;
@@ -37,40 +40,48 @@
3740
import java.time.LocalTime;
3841
import java.time.ZoneOffset;
3942
import java.time.format.DateTimeFormatter;
43+
import java.time.temporal.TemporalAccessor;
44+
import java.time.temporal.TemporalQueries;
4045
import java.util.ArrayList;
46+
import java.util.HashMap;
4147
import java.util.Iterator;
4248
import java.util.LinkedHashMap;
4349
import java.util.List;
4450
import java.util.Map;
51+
import java.util.concurrent.TimeUnit;
4552

4653
public class DebeziumRowConverter implements Serializable {
54+
private static final String DECIMAL_SCALE_KEY = "scale";
55+
private static final String DECIMAL_VALUE_KEY = "value";
4756

57+
private final Map<String, DateTimeFormatter> fieldFormatterMap = new HashMap<>();
4858
private final SeaTunnelRowType rowType;
4959

5060
public DebeziumRowConverter(SeaTunnelRowType rowType) {
5161
this.rowType = rowType;
5262
}
5363

54-
public SeaTunnelRow serializeValue(JsonNode node) {
55-
return (SeaTunnelRow) getValue(rowType, node);
64+
public SeaTunnelRow parse(JsonNode node) throws IOException {
65+
return (SeaTunnelRow) getValue(null, rowType, node);
5666
}
5767

58-
private Object getValue(SeaTunnelDataType<?> dataType, JsonNode value) {
68+
private Object getValue(String fieldName, SeaTunnelDataType<?> dataType, JsonNode value)
69+
throws IOException {
5970
SqlType sqlType = dataType.getSqlType();
6071
if (value == null) {
6172
return null;
6273
}
6374
switch (sqlType) {
6475
case BOOLEAN:
65-
return value.booleanValue();
76+
return value.asBoolean();
6677
case TINYINT:
67-
return (byte) value.intValue();
78+
return (byte) value.asInt();
6879
case SMALLINT:
69-
return (short) value.intValue();
80+
return (short) value.asInt();
7081
case INT:
71-
return value.intValue();
82+
return value.asInt();
7283
case BIGINT:
73-
return value.longValue();
84+
return value.asLong();
7485
case FLOAT:
7586
return value.floatValue();
7687
case DOUBLE:
@@ -88,42 +99,100 @@ private Object getValue(SeaTunnelDataType<?> dataType, JsonNode value) {
8899
throw new RuntimeException("Invalid bytes for Decimal field", e);
89100
}
90101
}
102+
if (value.has(DECIMAL_SCALE_KEY)) {
103+
return new BigDecimal(
104+
new BigInteger(value.get(DECIMAL_VALUE_KEY).binaryValue()),
105+
value.get(DECIMAL_SCALE_KEY).intValue());
106+
}
107+
return new BigDecimal(value.asText());
91108
case STRING:
92-
return value.textValue();
109+
return value.asText();
93110
case BYTES:
94111
try {
95112
return value.binaryValue();
96113
} catch (IOException e) {
97114
throw new RuntimeException("Invalid bytes field", e);
98115
}
99116
case DATE:
100-
try {
101-
int d = Integer.parseInt(value.toString());
102-
return LocalDate.ofEpochDay(d);
103-
} catch (NumberFormatException e) {
104-
return LocalDate.parse(
105-
value.textValue(), DateTimeFormatter.ofPattern("yyyy-MM-dd"));
117+
String dateStr = value.asText();
118+
if (value.canConvertToLong()) {
119+
return LocalDate.ofEpochDay(Long.parseLong(dateStr));
120+
}
121+
DateTimeFormatter dateFormatter = fieldFormatterMap.get(fieldName);
122+
if (dateFormatter == null) {
123+
dateFormatter = DateUtils.matchDateFormatter(dateStr);
124+
fieldFormatterMap.put(fieldName, dateFormatter);
125+
}
126+
if (dateFormatter == null) {
127+
throw new SeaTunnelJsonFormatException(
128+
CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE,
129+
String.format(
130+
"SeaTunnel can not parse this date format [%s] of field [%s]",
131+
dateStr, fieldName));
106132
}
133+
return dateFormatter.parse(dateStr).query(TemporalQueries.localDate());
107134
case TIME:
108-
try {
109-
long t = Long.parseLong(value.toString());
110-
return LocalTime.ofNanoOfDay(t * 1000L);
111-
} catch (NumberFormatException e) {
112-
return LocalTime.parse(value.textValue());
135+
String timeStr = value.asText();
136+
if (value.canConvertToLong()) {
137+
long time = Long.parseLong(timeStr);
138+
if (timeStr.length() == 8) {
139+
time = TimeUnit.SECONDS.toMicros(time);
140+
} else if (timeStr.length() == 11) {
141+
time = TimeUnit.MILLISECONDS.toMicros(time);
142+
}
143+
return LocalTime.ofNanoOfDay(time);
144+
}
145+
146+
DateTimeFormatter timeFormatter = fieldFormatterMap.get(fieldName);
147+
if (timeFormatter == null) {
148+
timeFormatter = DateUtils.matchDateFormatter(timeStr);
149+
fieldFormatterMap.put(fieldName, timeFormatter);
113150
}
151+
if (timeFormatter == null) {
152+
throw new SeaTunnelJsonFormatException(
153+
CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE,
154+
String.format(
155+
"SeaTunnel can not parse this date format [%s] of field [%s]",
156+
timeStr, fieldName));
157+
}
158+
159+
TemporalAccessor parsedTime = timeFormatter.parse(timeStr);
160+
return parsedTime.query(TemporalQueries.localTime());
114161
case TIMESTAMP:
115-
try {
162+
String timestampStr = value.asText();
163+
if (value.canConvertToLong()) {
116164
long timestamp = Long.parseLong(value.toString());
165+
if (timestampStr.length() == 10) {
166+
timestamp = TimeUnit.SECONDS.toMillis(timestamp);
167+
} else if (timestampStr.length() == 19) {
168+
timestamp = TimeUnit.NANOSECONDS.toMillis(timestamp);
169+
} else if (timestampStr.length() == 16) {
170+
timestamp = TimeUnit.MICROSECONDS.toMillis(timestamp);
171+
}
117172
return LocalDateTime.ofInstant(Instant.ofEpochMilli(timestamp), ZoneOffset.UTC);
118-
} catch (NumberFormatException e) {
119-
return LocalDateTime.parse(
120-
value.textValue(),
121-
DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"));
122173
}
174+
175+
DateTimeFormatter timestampFormatter = fieldFormatterMap.get(fieldName);
176+
if (timestampFormatter == null) {
177+
timestampFormatter = DateUtils.matchDateFormatter(timestampStr);
178+
fieldFormatterMap.put(fieldName, timestampFormatter);
179+
}
180+
if (timestampFormatter == null) {
181+
throw new SeaTunnelJsonFormatException(
182+
CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE,
183+
String.format(
184+
"SeaTunnel can not parse this date format [%s] of field [%s]",
185+
timestampStr, fieldName));
186+
}
187+
188+
TemporalAccessor parsedTimestamp = timestampFormatter.parse(timestampStr);
189+
LocalTime localTime = parsedTimestamp.query(TemporalQueries.localTime());
190+
LocalDate localDate = parsedTimestamp.query(TemporalQueries.localDate());
191+
return LocalDateTime.of(localDate, localTime);
123192
case ARRAY:
124193
List<Object> arrayValue = new ArrayList<>();
125194
for (JsonNode o : value) {
126-
arrayValue.add(getValue(((ArrayType) dataType).getElementType(), o));
195+
arrayValue.add(getValue(fieldName, ((ArrayType) dataType).getElementType(), o));
127196
}
128197
return arrayValue;
129198
case MAP:
@@ -132,7 +201,7 @@ private Object getValue(SeaTunnelDataType<?> dataType, JsonNode value) {
132201
Map.Entry<String, JsonNode> entry = it.next();
133202
mapValue.put(
134203
entry.getKey(),
135-
getValue(((MapType) dataType).getValueType(), entry.getValue()));
204+
getValue(null, ((MapType) dataType).getValueType(), entry.getValue()));
136205
}
137206
return mapValue;
138207
case ROW:
@@ -141,7 +210,10 @@ private Object getValue(SeaTunnelDataType<?> dataType, JsonNode value) {
141210
for (int i = 0; i < rowType.getTotalFields(); i++) {
142211
row.setField(
143212
i,
144-
getValue(rowType.getFieldType(i), value.get(rowType.getFieldName(i))));
213+
getValue(
214+
rowType.getFieldName(i),
215+
rowType.getFieldType(i),
216+
value.get(rowType.getFieldName(i))));
145217
}
146218
return row;
147219
default:

0 commit comments

Comments
 (0)