Skip to content

Commit 9a1f887

Browse files
authored
Merge pull request #1910 from alibaba/datax_0913
Datax month 9 features
2 parents 051fe82 + 74f12ff commit 9a1f887

File tree

49 files changed

+2505
-609
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+2505
-609
lines changed

Diff for: README.md

+9
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,15 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N
110110

111111
DataX 后续计划月度迭代更新,也欢迎感兴趣的同学提交 Pull requests,月度更新内容会介绍介绍如下。
112112

113+
- [datax_v202309]https://github.com/alibaba/DataX/releases/tag/datax_v202309)
114+
- 支持Phoenix 同步数据添加 where条件
115+
- 支持华为 GuassDB读写插件
116+
- 修复ClickReader 插件运行报错 Can't find bundle for base name
117+
- 增加 DataX调试模块
118+
- 修复 orc空文件报错问题
119+
- 优化obwriter性能
120+
- txtfilewriter 增加导出为insert语句功能支持
121+
113122
- [datax_v202308]https://github.com/alibaba/DataX/releases/tag/datax_v202308)
114123
- OTS 插件更新
115124
- databend 插件更新

Diff for: clickhousereader/src/main/java/com/alibaba/datax/plugin/reader/clickhousereader/ClickhouseReader.java

-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ public class ClickhouseReader extends Reader {
2727
private static final Logger LOG = LoggerFactory.getLogger(ClickhouseReader.class);
2828

2929
public static class Job extends Reader.Job {
30-
private static MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(ClickhouseReader.class);
31-
3230
private Configuration jobConfig = null;
3331
private CommonRdbmsReader.Job commonRdbmsReaderMaster;
3432

Diff for: clickhousereader/src/test/java/com/alibaba/datax/plugin/reader/clickhousereader/ClickhouseReaderTest.java

-74
This file was deleted.

Diff for: common/src/main/java/com/alibaba/datax/common/element/DateColumn.java

+49-12
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,62 @@
55

66
import java.math.BigDecimal;
77
import java.math.BigInteger;
8+
import java.sql.Time;
89
import java.util.Date;
910

1011
/**
1112
* Created by jingxing on 14-8-24.
1213
*/
1314
public class DateColumn extends Column {
1415

15-
private DateType subType = DateType.DATETIME;
16-
17-
public static enum DateType {
18-
DATE, TIME, DATETIME
19-
}
20-
21-
/**
22-
* 构建值为null的DateColumn,使用Date子类型为DATETIME
23-
* */
24-
public DateColumn() {
25-
this((Long)null);
26-
}
16+
private DateType subType = DateType.DATETIME;
17+
18+
private int nanos = 0;
19+
20+
private int precision = -1;
21+
22+
public static enum DateType {
23+
DATE, TIME, DATETIME
24+
}
25+
26+
/**
27+
* 构建值为time(java.sql.Time)的DateColumn,使用Date子类型为TIME,只有时间,没有日期
28+
*/
29+
public DateColumn(Time time, int nanos, int jdbcPrecision) {
30+
this(time);
31+
if (time != null) {
32+
setNanos(nanos);
33+
}
34+
if (jdbcPrecision == 10) {
35+
setPrecision(0);
36+
}
37+
if (jdbcPrecision >= 12 && jdbcPrecision <= 17) {
38+
setPrecision(jdbcPrecision - 11);
39+
}
40+
}
41+
42+
public long getNanos() {
43+
return nanos;
44+
}
45+
46+
public void setNanos(int nanos) {
47+
this.nanos = nanos;
48+
}
49+
50+
public int getPrecision() {
51+
return precision;
52+
}
53+
54+
public void setPrecision(int precision) {
55+
this.precision = precision;
56+
}
57+
58+
/**
59+
* 构建值为null的DateColumn,使用Date子类型为DATETIME
60+
*/
61+
public DateColumn() {
62+
this((Long) null);
63+
}
2764

2865
/**
2966
* 构建值为stamp(Unix时间戳)的DateColumn,使用Date子类型为DATETIME
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package com.alibaba.datax.common.util;
2+
3+
import org.apache.commons.lang3.StringUtils;
4+
5+
import java.util.HashMap;
6+
import java.util.Map;
7+
8+
/**
9+
* @author jitongchen
10+
* @date 2023/9/7 9:47 AM
11+
*/
12+
public class LimitLogger {
13+
14+
private static Map<String, Long> lastPrintTime = new HashMap<>();
15+
16+
public static void limit(String name, long limit, LoggerFunction function) {
17+
if (StringUtils.isBlank(name)) {
18+
name = "__all__";
19+
}
20+
if (limit <= 0) {
21+
function.apply();
22+
} else {
23+
if (!lastPrintTime.containsKey(name)) {
24+
lastPrintTime.put(name, System.currentTimeMillis());
25+
function.apply();
26+
} else {
27+
if (System.currentTimeMillis() > lastPrintTime.get(name) + limit) {
28+
lastPrintTime.put(name, System.currentTimeMillis());
29+
function.apply();
30+
}
31+
}
32+
}
33+
}
34+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package com.alibaba.datax.common.util;
2+
3+
/**
4+
* @author molin.lxd
5+
* @date 2021-05-09
6+
*/
7+
public interface LoggerFunction {
8+
9+
void apply();
10+
}

Diff for: hdfsreader/pom.xml

+38-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
<?xml version="1.0" encoding="UTF-8"?>
2-
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
34
<parent>
45
<artifactId>datax-all</artifactId>
56
<groupId>com.alibaba.datax</groupId>
@@ -111,6 +112,42 @@
111112
<version>${datax-project-version}</version>
112113
</dependency>
113114

115+
<dependency>
116+
<groupId>org.apache.parquet</groupId>
117+
<artifactId>parquet-column</artifactId>
118+
<version>1.12.0</version>
119+
</dependency>
120+
<dependency>
121+
<groupId>org.apache.parquet</groupId>
122+
<artifactId>parquet-avro</artifactId>
123+
<version>1.12.0</version>
124+
</dependency>
125+
<dependency>
126+
<groupId>org.apache.parquet</groupId>
127+
<artifactId>parquet-common</artifactId>
128+
<version>1.12.0</version>
129+
</dependency>
130+
<dependency>
131+
<groupId>org.apache.parquet</groupId>
132+
<artifactId>parquet-format</artifactId>
133+
<version>2.3.0</version>
134+
</dependency>
135+
<dependency>
136+
<groupId>org.apache.parquet</groupId>
137+
<artifactId>parquet-jackson</artifactId>
138+
<version>1.12.0</version>
139+
</dependency>
140+
<dependency>
141+
<groupId>org.apache.parquet</groupId>
142+
<artifactId>parquet-encoding</artifactId>
143+
<version>1.12.0</version>
144+
</dependency>
145+
<dependency>
146+
<groupId>org.apache.parquet</groupId>
147+
<artifactId>parquet-hadoop</artifactId>
148+
<version>1.12.0</version>
149+
</dependency>
150+
114151
</dependencies>
115152

116153
<build>

Diff for: hdfsreader/src/main/assembly/package.xml

+22
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,28 @@
3737
<!--</includes>-->
3838
<!--<outputDirectory>plugin/reader/hdfsreader/libs</outputDirectory>-->
3939
<!--</fileSet>-->
40+
<!--<fileSet>-->
41+
<!--<directory>src/main/libs</directory>-->
42+
<!--<includes>-->
43+
<!--<include>*.*</include>-->
44+
<!--</includes>-->
45+
<!--<outputDirectory>plugin/reader/hdfsreader/libs</outputDirectory>-->
46+
<!--</fileSet>-->
47+
48+
<fileSet>
49+
<directory>src/main/libs</directory>
50+
<includes>
51+
<include>*.*</include>
52+
</includes>
53+
<outputDirectory>plugin/reader/ossreader/libs</outputDirectory>
54+
</fileSet>
55+
<fileSet>
56+
<directory>src/main/libs</directory>
57+
<includes>
58+
<include>*.*</include>
59+
</includes>
60+
<outputDirectory>plugin/reader/hivereader/libs</outputDirectory>
61+
</fileSet>
4062
</fileSets>
4163

4264
<dependencySets>

Diff for: hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/Constant.java

+1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ public class Constant {
1010
public static final String CSV = "CSV";
1111
public static final String SEQ = "SEQ";
1212
public static final String RC = "RC";
13+
public static final String PARQUET = "PARQUET";
1314
}

0 commit comments

Comments
 (0)