22
22
import org .apache .hadoop .hive .ql .io .orc .OrcFile ;
23
23
import org .apache .hadoop .hive .ql .io .orc .OrcFile .ReaderOptions ;
24
24
import org .apache .hadoop .hive .ql .io .orc .Reader ;
25
+ import org .apache .hadoop .hive .serde2 .objectinspector .ListObjectInspector ;
26
+ import org .apache .hadoop .hive .serde2 .objectinspector .MapObjectInspector ;
25
27
import org .apache .hadoop .hive .serde2 .objectinspector .ObjectInspector ;
28
+ import org .apache .hadoop .hive .serde2 .objectinspector .PrimitiveObjectInspector ;
26
29
import org .apache .hadoop .hive .serde2 .objectinspector .StructField ;
27
30
import org .apache .hadoop .hive .serde2 .objectinspector .StructObjectInspector ;
28
- import org .apache .hadoop .hive .serde2 .objectinspector .primitive .PrimitiveObjectInspectorUtils ;
29
- import org .apache .hadoop .hive .serde2 .objectinspector .primitive .PrimitiveObjectInspectorUtils .PrimitiveTypeEntry ;
30
31
import org .apache .kafka .connect .data .ConnectSchema ;
31
32
import org .apache .kafka .connect .data .Date ;
33
+ import org .apache .kafka .connect .data .Decimal ;
32
34
import org .apache .kafka .connect .data .Schema ;
33
35
import org .apache .kafka .connect .data .SchemaBuilder ;
34
36
import org .apache .kafka .connect .data .Timestamp ;
@@ -53,40 +55,8 @@ public Schema getSchema(HdfsSinkConnectorConfig conf, Path path) {
53
55
Reader reader = OrcFile .createReader (path , readerOptions );
54
56
55
57
if (reader .getObjectInspector ().getCategory () == ObjectInspector .Category .STRUCT ) {
56
- SchemaBuilder schemaBuilder = SchemaBuilder .struct ().name ("record" ).version (1 );
57
58
StructObjectInspector objectInspector = (StructObjectInspector ) reader .getObjectInspector ();
58
-
59
- for (StructField schema : objectInspector .getAllStructFieldRefs ()) {
60
- ObjectInspector fieldObjectInspector = schema .getFieldObjectInspector ();
61
- String typeName = fieldObjectInspector .getTypeName ();
62
- Schema .Type schemaType ;
63
-
64
- switch (fieldObjectInspector .getCategory ()) {
65
- case PRIMITIVE :
66
- PrimitiveTypeEntry typeEntry = PrimitiveObjectInspectorUtils
67
- .getTypeEntryFromTypeName (typeName );
68
- if (java .sql .Date .class .isAssignableFrom (typeEntry .primitiveJavaClass )) {
69
- schemaType = Date .SCHEMA .type ();
70
- } else if (java .sql .Timestamp .class .isAssignableFrom (typeEntry .primitiveJavaClass )) {
71
- schemaType = Timestamp .SCHEMA .type ();
72
- } else {
73
- schemaType = ConnectSchema .schemaType (typeEntry .primitiveJavaClass );
74
- }
75
- break ;
76
- case LIST :
77
- schemaType = Schema .Type .ARRAY ;
78
- break ;
79
- case MAP :
80
- schemaType = Schema .Type .MAP ;
81
- break ;
82
- default :
83
- throw new DataException ("Unknown type " + fieldObjectInspector .getCategory ().name ());
84
- }
85
-
86
- schemaBuilder .field (schema .getFieldName (), SchemaBuilder .type (schemaType ).build ());
87
- }
88
-
89
- return schemaBuilder .build ();
59
+ return deriveStruct (objectInspector );
90
60
} else {
91
61
throw new ConnectException (
92
62
"Top level type must be of type STRUCT, but was "
@@ -98,6 +68,59 @@ public Schema getSchema(HdfsSinkConnectorConfig conf, Path path) {
98
68
}
99
69
}
100
70
71
+ private Schema derivePrimitive (PrimitiveObjectInspector inspector ) {
72
+ Class <?> klass = inspector .getTypeInfo ().getPrimitiveJavaClass ();
73
+ if (java .sql .Date .class .isAssignableFrom (klass )) {
74
+ return Date .SCHEMA ;
75
+ } else if (java .sql .Timestamp .class .isAssignableFrom (klass )) {
76
+ return Timestamp .SCHEMA ;
77
+ } else if (org .apache .hadoop .hive .common .type .HiveDecimal .class .isAssignableFrom (klass )) {
78
+ return Decimal .schema (inspector .scale ());
79
+ }
80
+ return SchemaBuilder .type (ConnectSchema .schemaType (klass )).build ();
81
+
82
+ }
83
+
84
+ private Schema deriveSchema (ObjectInspector inspector ) {
85
+
86
+ switch (inspector .getCategory ()) {
87
+ case PRIMITIVE :
88
+ return derivePrimitive ((PrimitiveObjectInspector ) inspector );
89
+ case MAP :
90
+ return deriveMap ((MapObjectInspector ) inspector );
91
+ case LIST :
92
+ return deriveList ((ListObjectInspector ) inspector );
93
+ case STRUCT :
94
+ return deriveStruct ((StructObjectInspector ) inspector );
95
+ default :
96
+ throw new DataException ("Unknown type " + inspector .getCategory ()
97
+ .name ());
98
+ }
99
+ }
100
+
101
+ private Schema deriveStruct (StructObjectInspector inspector ) {
102
+
103
+ SchemaBuilder schemaBuilder = SchemaBuilder .struct ();
104
+ for (StructField field : inspector .getAllStructFieldRefs ()) {
105
+ ObjectInspector fieldInspector = field .getFieldObjectInspector ();
106
+ schemaBuilder .field (field .getFieldName (), deriveSchema (fieldInspector ));
107
+ }
108
+ schemaBuilder .name ("record" ).version (1 );
109
+ return schemaBuilder .build ();
110
+ }
111
+
112
+
113
+ private Schema deriveMap (MapObjectInspector inspector ) {
114
+ return SchemaBuilder .map (
115
+ deriveSchema (inspector .getMapKeyObjectInspector ()),
116
+ deriveSchema (inspector .getMapValueObjectInspector ())
117
+ ).build ();
118
+ }
119
+
120
+ private Schema deriveList (ListObjectInspector inspector ) {
121
+ return SchemaBuilder .array (deriveSchema (inspector .getListElementObjectInspector ())).build ();
122
+ }
123
+
101
124
@ Override
102
125
public boolean hasNext () {
103
126
throw new UnsupportedOperationException ();
0 commit comments