diff --git a/pom.xml b/pom.xml
index 530680dcb..de9dcc058 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
io.confluent
kafka-connect-storage-common-parent
- 11.0.16
+ 11.1.0
kafka-connect-hdfs
@@ -52,12 +52,11 @@
- https://packages.confluent.io/maven/
2.0.0-M2
1.2.17-cp8
0.11.1
2.5.3
- 11.0.16
+ 11.1.0
3.2.2
0.13.0
2.17.1
@@ -70,7 +69,7 @@
confluent
Confluent
- ${confluent.maven.repo}
+ https://packages.confluent.io/maven/
diff --git a/src/main/java/io/confluent/connect/hdfs/orc/OrcFileReader.java b/src/main/java/io/confluent/connect/hdfs/orc/OrcFileReader.java
index 4ccb18222..40db808e1 100644
--- a/src/main/java/io/confluent/connect/hdfs/orc/OrcFileReader.java
+++ b/src/main/java/io/confluent/connect/hdfs/orc/OrcFileReader.java
@@ -22,13 +22,15 @@
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions;
import org.apache.hadoop.hive.ql.io.orc.Reader;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry;
import org.apache.kafka.connect.data.ConnectSchema;
import org.apache.kafka.connect.data.Date;
+import org.apache.kafka.connect.data.Decimal;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Timestamp;
@@ -53,40 +55,8 @@ public Schema getSchema(HdfsSinkConnectorConfig conf, Path path) {
Reader reader = OrcFile.createReader(path, readerOptions);
if (reader.getObjectInspector().getCategory() == ObjectInspector.Category.STRUCT) {
- SchemaBuilder schemaBuilder = SchemaBuilder.struct().name("record").version(1);
StructObjectInspector objectInspector = (StructObjectInspector) reader.getObjectInspector();
-
- for (StructField schema : objectInspector.getAllStructFieldRefs()) {
- ObjectInspector fieldObjectInspector = schema.getFieldObjectInspector();
- String typeName = fieldObjectInspector.getTypeName();
- Schema.Type schemaType;
-
- switch (fieldObjectInspector.getCategory()) {
- case PRIMITIVE:
- PrimitiveTypeEntry typeEntry = PrimitiveObjectInspectorUtils
- .getTypeEntryFromTypeName(typeName);
- if (java.sql.Date.class.isAssignableFrom(typeEntry.primitiveJavaClass)) {
- schemaType = Date.SCHEMA.type();
- } else if (java.sql.Timestamp.class.isAssignableFrom(typeEntry.primitiveJavaClass)) {
- schemaType = Timestamp.SCHEMA.type();
- } else {
- schemaType = ConnectSchema.schemaType(typeEntry.primitiveJavaClass);
- }
- break;
- case LIST:
- schemaType = Schema.Type.ARRAY;
- break;
- case MAP:
- schemaType = Schema.Type.MAP;
- break;
- default:
- throw new DataException("Unknown type " + fieldObjectInspector.getCategory().name());
- }
-
- schemaBuilder.field(schema.getFieldName(), SchemaBuilder.type(schemaType).build());
- }
-
- return schemaBuilder.build();
+ return deriveStruct(objectInspector);
} else {
throw new ConnectException(
"Top level type must be of type STRUCT, but was "
@@ -98,6 +68,59 @@ public Schema getSchema(HdfsSinkConnectorConfig conf, Path path) {
}
}
+ private Schema derivePrimitive(PrimitiveObjectInspector inspector) {
+ Class> klass = inspector.getTypeInfo().getPrimitiveJavaClass();
+ if (java.sql.Date.class.isAssignableFrom(klass)) {
+ return Date.SCHEMA;
+ } else if (java.sql.Timestamp.class.isAssignableFrom(klass)) {
+ return Timestamp.SCHEMA;
+ } else if (org.apache.hadoop.hive.common.type.HiveDecimal.class.isAssignableFrom(klass)) {
+ return Decimal.schema(inspector.scale());
+ }
+ return SchemaBuilder.type(ConnectSchema.schemaType(klass)).build();
+
+ }
+
+ private Schema deriveSchema(ObjectInspector inspector) {
+
+ switch (inspector.getCategory()) {
+ case PRIMITIVE:
+ return derivePrimitive((PrimitiveObjectInspector) inspector);
+ case MAP:
+ return deriveMap((MapObjectInspector) inspector);
+ case LIST:
+ return deriveList((ListObjectInspector) inspector);
+ case STRUCT:
+ return deriveStruct((StructObjectInspector) inspector);
+ default:
+ throw new DataException("Unknown type " + inspector.getCategory()
+ .name());
+ }
+ }
+
+ private Schema deriveStruct(StructObjectInspector inspector) {
+
+ SchemaBuilder schemaBuilder = SchemaBuilder.struct();
+ for (StructField field: inspector.getAllStructFieldRefs()) {
+ ObjectInspector fieldInspector = field.getFieldObjectInspector();
+ schemaBuilder.field(field.getFieldName(), deriveSchema(fieldInspector));
+ }
+ schemaBuilder.name("record").version(1);
+ return schemaBuilder.build();
+ }
+
+
+ private Schema deriveMap(MapObjectInspector inspector) {
+ return SchemaBuilder.map(
+ deriveSchema(inspector.getMapKeyObjectInspector()),
+ deriveSchema(inspector.getMapValueObjectInspector())
+ ).build();
+ }
+
+ private Schema deriveList(ListObjectInspector inspector) {
+ return SchemaBuilder.array(deriveSchema(inspector.getListElementObjectInspector())).build();
+ }
+
@Override
public boolean hasNext() {
throw new UnsupportedOperationException();
diff --git a/src/main/java/io/confluent/connect/hdfs/orc/OrcHiveUtil.java b/src/main/java/io/confluent/connect/hdfs/orc/OrcHiveUtil.java
index 526b83891..a66b6d6b9 100644
--- a/src/main/java/io/confluent/connect/hdfs/orc/OrcHiveUtil.java
+++ b/src/main/java/io/confluent/connect/hdfs/orc/OrcHiveUtil.java
@@ -44,7 +44,7 @@ public OrcHiveUtil(HdfsSinkConnectorConfig config, HiveMetaStore hiveMetaStore)
@Override
public void alterSchema(String database, String tableName, Schema schema) {
Table table = hiveMetaStore.getTable(database, tableName);
- List columns = HiveSchemaConverter.convertSchema(schema);
+ List columns = HiveSchemaConverter.convertSchemaMaybeLogical(schema);
table.setFields(columns);
hiveMetaStore.alterTable(table);
}
@@ -85,7 +85,7 @@ private Table constructOrcTable(
}
// convert Connect schema schema to Hive columns
- List columns = HiveSchemaConverter.convertSchema(schema);
+ List columns = HiveSchemaConverter.convertSchemaMaybeLogical(schema);
table.setFields(columns);
table.setPartCols(partitioner.partitionFields());
return table;
diff --git a/src/main/java/io/confluent/connect/hdfs/orc/OrcRecordWriterProvider.java b/src/main/java/io/confluent/connect/hdfs/orc/OrcRecordWriterProvider.java
index aba47b9e5..53b612d22 100644
--- a/src/main/java/io/confluent/connect/hdfs/orc/OrcRecordWriterProvider.java
+++ b/src/main/java/io/confluent/connect/hdfs/orc/OrcRecordWriterProvider.java
@@ -70,7 +70,7 @@ public void preFooterWrite(OrcFile.WriterContext writerContext) {
}
};
- typeInfo = HiveSchemaConverter.convert(schema);
+ typeInfo = HiveSchemaConverter.convertMaybeLogical(schema);
ObjectInspector objectInspector = OrcStruct.createObjectInspector(typeInfo);
log.info("Opening ORC record writer for: {}", filename);
@@ -90,7 +90,7 @@ public void preFooterWrite(OrcFile.WriterContext writerContext) {
);
Struct struct = (Struct) record.value();
- OrcStruct row = OrcUtil.createOrcStruct(typeInfo, OrcUtil.convertStruct(struct));
+ OrcStruct row = (OrcStruct) OrcUtil.convert(typeInfo, struct.schema(), struct);
writer.addRow(row);
} else {
diff --git a/src/main/java/io/confluent/connect/hdfs/orc/OrcUtil.java b/src/main/java/io/confluent/connect/hdfs/orc/OrcUtil.java
index 25d783cb9..66d6fc054 100644
--- a/src/main/java/io/confluent/connect/hdfs/orc/OrcUtil.java
+++ b/src/main/java/io/confluent/connect/hdfs/orc/OrcUtil.java
@@ -15,7 +15,6 @@
package io.confluent.connect.hdfs.orc;
-import static org.apache.kafka.connect.data.Schema.Type.ARRAY;
import static org.apache.kafka.connect.data.Schema.Type.BOOLEAN;
import static org.apache.kafka.connect.data.Schema.Type.BYTES;
import static org.apache.kafka.connect.data.Schema.Type.FLOAT32;
@@ -24,32 +23,38 @@
import static org.apache.kafka.connect.data.Schema.Type.INT32;
import static org.apache.kafka.connect.data.Schema.Type.INT64;
import static org.apache.kafka.connect.data.Schema.Type.INT8;
-import static org.apache.kafka.connect.data.Schema.Type.MAP;
import static org.apache.kafka.connect.data.Schema.Type.STRING;
-import static org.apache.kafka.connect.data.Schema.Type.STRUCT;
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.AbstractMap;
+import java.util.AbstractMap.SimpleEntry;
import java.util.HashMap;
import java.util.Map;
-import java.util.function.BiFunction;
+
+import java.util.stream.Collectors;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.ArrayPrimitiveWritable;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.MapWritable;
-import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.io.Text;
import org.apache.kafka.connect.data.Date;
+import org.apache.kafka.connect.data.Decimal;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Schema.Type;
@@ -59,24 +64,23 @@
import java.util.LinkedList;
import java.util.List;
+import java.util.function.BiFunction;
public final class OrcUtil {
- private static Map> CONVERSION_MAP = new HashMap<>();
+ private static final Map> PRIMITIVE_CONVERSION_MAP =
+ new HashMap<>();
static {
- CONVERSION_MAP.put(ARRAY, OrcUtil::convertArray);
- CONVERSION_MAP.put(BOOLEAN, OrcUtil::convertBoolean);
- CONVERSION_MAP.put(BYTES, OrcUtil::convertBytes);
- CONVERSION_MAP.put(FLOAT32, OrcUtil::convertFloat32);
- CONVERSION_MAP.put(FLOAT64, OrcUtil::convertFloat64);
- CONVERSION_MAP.put(INT8, OrcUtil::convertInt8);
- CONVERSION_MAP.put(INT16, OrcUtil::convertInt16);
- CONVERSION_MAP.put(INT32, OrcUtil::convertInt32);
- CONVERSION_MAP.put(INT64, OrcUtil::convertInt64);
- CONVERSION_MAP.put(MAP, OrcUtil::convertMap);
- CONVERSION_MAP.put(STRING, OrcUtil::convertString);
- CONVERSION_MAP.put(STRUCT, OrcUtil::convertStruct);
+ PRIMITIVE_CONVERSION_MAP.put(BOOLEAN, OrcUtil::convertBoolean);
+ PRIMITIVE_CONVERSION_MAP.put(BYTES, OrcUtil::convertBytes);
+ PRIMITIVE_CONVERSION_MAP.put(FLOAT32, OrcUtil::convertFloat32);
+ PRIMITIVE_CONVERSION_MAP.put(FLOAT64, OrcUtil::convertFloat64);
+ PRIMITIVE_CONVERSION_MAP.put(INT8, OrcUtil::convertInt8);
+ PRIMITIVE_CONVERSION_MAP.put(INT16, OrcUtil::convertInt16);
+ PRIMITIVE_CONVERSION_MAP.put(INT32, OrcUtil::convertInt32);
+ PRIMITIVE_CONVERSION_MAP.put(INT64, OrcUtil::convertInt64);
+ PRIMITIVE_CONVERSION_MAP.put(STRING, OrcUtil::convertString);
}
/**
@@ -87,8 +91,8 @@ public final class OrcUtil {
* @return the struct object
*/
@SuppressWarnings("unchecked")
- public static OrcStruct createOrcStruct(TypeInfo typeInfo, Object... objs) {
- SettableStructObjectInspector oi = (SettableStructObjectInspector)
+ public static OrcStruct createOrcStruct(TypeInfo typeInfo, Object[] objs) {
+ SettableStructObjectInspector oi = (SettableStructObjectInspector)
OrcStruct.createObjectInspector(typeInfo);
List fields = (List) oi.getAllStructFieldRefs();
@@ -107,87 +111,114 @@ public static OrcStruct createOrcStruct(TypeInfo typeInfo, Object... objs) {
* @param struct the struct to convert
* @return the struct as a writable array
*/
- public static Object[] convertStruct(Struct struct) {
+ public static Object[] convertStruct(TypeInfo typeInfo, Struct struct) {
List