Skip to content

Commit c3fa16a

Browse files
authored
Use implicit row constructors. Optionally skip nested rows. Nullability bug fixes. (#64)
1 parent 8edcd27 commit c3fa16a

File tree

10 files changed

+204
-35
lines changed

10 files changed

+204
-35
lines changed

hoptimator-catalog/src/main/java/com/linkedin/hoptimator/catalog/AvroConverter.java

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ public static Schema avro(String namespace, String name, RelDataType dataType) {
2727
.filter(x -> !x.getName().startsWith("__")) // don't write out hidden fields
2828
.map(x -> new Schema.Field(sanitize(x.getName()), avro(namespace, x.getName(), x.getType()), describe(x), null))
2929
.collect(Collectors.toList());
30-
return Schema.createRecord(sanitize(name), dataType.toString(), namespace, false, fields);
30+
return createAvroSchemaWithNullability(Schema.createRecord(sanitize(name), dataType.toString(), namespace, false, fields),
31+
dataType.isNullable());
3132
} else {
3233
switch (dataType.getSqlTypeName()) {
3334
case INTEGER:
@@ -42,6 +43,15 @@ public static Schema avro(String namespace, String name, RelDataType dataType) {
4243
return createAvroTypeWithNullability(Schema.Type.DOUBLE, dataType.isNullable());
4344
case CHAR:
4445
return createAvroTypeWithNullability(Schema.Type.STRING, dataType.isNullable());
46+
case BOOLEAN:
47+
return createAvroTypeWithNullability(Schema.Type.BOOLEAN, dataType.isNullable());
48+
case ARRAY:
49+
return createAvroSchemaWithNullability(Schema.createArray(avro(null, null, dataType.getComponentType())),
50+
dataType.isNullable());
51+
// TODO support map types
52+
// Appears to require a Calcite version bump
53+
// case MAP:
54+
// return createAvroSchemaWithNullability(Schema.createMap(avroPrimitive(dataType.getValueType())), dataType.isNullable());
4555
case UNKNOWN:
4656
case NULL:
4757
return Schema.createUnion(Schema.create(Schema.Type.NULL));
@@ -56,14 +66,18 @@ public static Schema avro(String namespace, String name, RelProtoDataType relPro
5666
return avro(namespace, name, relProtoDataType.apply(factory));
5767
}
5868

59-
private static Schema createAvroTypeWithNullability(Schema.Type rawType, boolean nullable) {
69+
private static Schema createAvroSchemaWithNullability(Schema schema, boolean nullable) {
6070
if (nullable) {
61-
return Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(rawType));
71+
return Schema.createUnion(Schema.create(Schema.Type.NULL), schema);
6272
} else {
63-
return Schema.create(rawType);
73+
return schema;
6474
}
6575
}
6676

77+
private static Schema createAvroTypeWithNullability(Schema.Type rawType, boolean nullable) {
78+
return createAvroSchemaWithNullability(Schema.create(rawType), nullable);
79+
}
80+
6781
public static RelDataType rel(Schema schema, RelDataTypeFactory typeFactory) {
6882
RelDataType unknown = typeFactory.createUnknownType();
6983
switch (schema.getType()) {
@@ -74,17 +88,25 @@ public static RelDataType rel(Schema schema, RelDataTypeFactory typeFactory) {
7488
.filter(x -> x.getValue().getSqlTypeName() != unknown.getSqlTypeName())
7589
.collect(Collectors.toList()));
7690
case INT:
77-
// schema.isNullable() should be false for basic types iiuc
78-
return createRelTypeWithNullability(typeFactory, SqlTypeName.INTEGER, schema.isNullable());
91+
return createRelType(typeFactory, SqlTypeName.INTEGER);
7992
case LONG:
80-
return createRelTypeWithNullability(typeFactory, SqlTypeName.BIGINT, schema.isNullable());
93+
return createRelType(typeFactory, SqlTypeName.BIGINT);
8194
case ENUM:
95+
case FIXED:
8296
case STRING:
83-
return createRelTypeWithNullability(typeFactory, SqlTypeName.VARCHAR, schema.isNullable());
97+
return createRelType(typeFactory, SqlTypeName.VARCHAR);
8498
case FLOAT:
85-
return createRelTypeWithNullability(typeFactory, SqlTypeName.FLOAT, schema.isNullable());
99+
return createRelType(typeFactory, SqlTypeName.FLOAT);
86100
case DOUBLE:
87-
return createRelTypeWithNullability(typeFactory, SqlTypeName.DOUBLE, schema.isNullable());
101+
return createRelType(typeFactory, SqlTypeName.DOUBLE);
102+
case BOOLEAN:
103+
return createRelType(typeFactory, SqlTypeName.BOOLEAN);
104+
case ARRAY:
105+
return typeFactory.createArrayType(rel(schema.getElementType(), typeFactory), -1);
106+
// TODO support map types
107+
// Appears to require a Calcite version bump
108+
// case MAP:
109+
// return typeFactory.createMapType(typeFactory.createSqlType(SqlTypeName.VARCHAR), rel(schema.getValueType(), typeFactory));
88110
case UNION:
89111
if (schema.isNullable() && schema.getTypes().size() == 2) {
90112
Schema innerType = schema.getTypes().stream().filter(x -> x.getType() != Schema.Type.NULL).findFirst().get();
@@ -102,9 +124,9 @@ public static RelDataType rel(Schema schema) {
102124
return rel(schema, DataType.DEFAULT_TYPE_FACTORY);
103125
}
104126

105-
private static RelDataType createRelTypeWithNullability(RelDataTypeFactory typeFactory, SqlTypeName typeName, boolean nullable) {
127+
private static RelDataType createRelType(RelDataTypeFactory typeFactory, SqlTypeName typeName) {
106128
RelDataType rawType = typeFactory.createSqlType(typeName);
107-
return typeFactory.createTypeWithNullability(rawType, nullable);
129+
return typeFactory.createTypeWithNullability(rawType, false);
108130
}
109131

110132
public static RelProtoDataType proto(Schema schema) {

hoptimator-catalog/src/main/java/com/linkedin/hoptimator/catalog/DataType.java

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
/** Common data types. Not authoratitive or exhaustive. */
1515
public enum DataType {
1616

17-
VARCHAR_NULL(x -> x.createTypeWithNullability(x.createSqlType(SqlTypeName.VARCHAR), true)),
17+
VARCHAR(x -> x.createTypeWithNullability(x.createSqlType(SqlTypeName.VARCHAR), true)),
1818
VARCHAR_NOT_NULL(x -> x.createTypeWithNullability(x.createSqlType(SqlTypeName.VARCHAR), false));
1919

2020
public static final RelDataTypeFactory DEFAULT_TYPE_FACTORY = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT);
@@ -56,16 +56,24 @@ public static Struct struct(RelDataType relDataType) {
5656
/** Convenience builder for non-scalar types */
5757
public interface Struct extends RelProtoDataType {
5858

59-
default Struct with(String name, DataType dataType) {
59+
default Struct with(String name, RelDataType dataType) {
6060
return x -> {
6161
RelDataType existing = apply(x);
6262
RelDataTypeFactory.Builder builder = new RelDataTypeFactory.Builder(x);
6363
builder.addAll(existing.getFieldList());
64-
builder.add(name, dataType.rel(x));
64+
builder.add(name, dataType);
6565
return builder.build();
6666
};
6767
}
6868

69+
default Struct with(String name, DataType dataType) {
70+
return with(name, dataType.rel());
71+
}
72+
73+
default Struct with(String name, Struct struct) {
74+
return with(name, struct.rel());
75+
}
76+
6977
default RelDataType rel() {
7078
return apply(DEFAULT_TYPE_FACTORY);
7179
}
@@ -85,6 +93,17 @@ default Struct drop(String name) {
8593
};
8694
}
8795

96+
default Struct dropNestedRows() {
97+
return x -> {
98+
RelDataType dataType = apply(x);
99+
RelDataTypeFactory.Builder builder = new RelDataTypeFactory.Builder(x);
100+
builder.addAll(dataType.getFieldList().stream()
101+
.filter(y -> y.getType().getSqlTypeName() != SqlTypeName.ROW)
102+
.collect(Collectors.toList()));
103+
return builder.build();
104+
};
105+
}
106+
88107
default Struct get(String name) {
89108
return x -> {
90109
RelDataTypeField field = apply(x).getField(name, true, false);

hoptimator-catalog/src/main/java/com/linkedin/hoptimator/catalog/ScriptImplementor.java

Lines changed: 50 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,33 @@
11
package com.linkedin.hoptimator.catalog;
22

3+
import org.apache.calcite.rel.RelNode;
4+
import org.apache.calcite.rel.type.RelDataType;
5+
import org.apache.calcite.rel.type.RelDataTypeFactory;
6+
import org.apache.calcite.rel.type.RelDataTypeField;
7+
import org.apache.calcite.rel.type.RelDataTypeSystem;
8+
import org.apache.calcite.rel.rel2sql.RelToSqlConverter;
9+
import org.apache.calcite.rel.rel2sql.SqlImplementor;
310
import org.apache.calcite.sql.SqlWriter;
4-
//import org.apache.calcite.sql.SqlWriterConfig;
11+
// needed in next Calcite version
12+
// import org.apache.calcite.sql.SqlWriterConfig;
513
import org.apache.calcite.sql.SqlDataTypeSpec;
6-
import org.apache.calcite.sql.SqlRowTypeNameSpec;
714
import org.apache.calcite.sql.SqlBasicTypeNameSpec;
15+
import org.apache.calcite.sql.SqlCollectionTypeNameSpec;
16+
import org.apache.calcite.sql.SqlRowTypeNameSpec;
17+
import org.apache.calcite.sql.SqlCall;
818
import org.apache.calcite.sql.SqlDialect;
919
import org.apache.calcite.sql.SqlIdentifier;
20+
import org.apache.calcite.sql.SqlKind;
21+
import org.apache.calcite.sql.SqlNode;
22+
import org.apache.calcite.sql.SqlNodeList;
23+
import org.apache.calcite.sql.SqlRowTypeNameSpec;
24+
import org.apache.calcite.sql.SqlSelect;
1025
import org.apache.calcite.sql.dialect.AnsiSqlDialect;
26+
import org.apache.calcite.sql.fun.SqlRowOperator;
1127
import org.apache.calcite.sql.parser.SqlParserPos;
12-
import org.apache.calcite.sql.type.SqlTypeFactoryImpl;
1328
import org.apache.calcite.sql.pretty.SqlPrettyWriter;
14-
import org.apache.calcite.rel.RelNode;
15-
import org.apache.calcite.rel.type.RelDataType;
16-
import org.apache.calcite.rel.type.RelDataTypeFactory;
17-
import org.apache.calcite.rel.type.RelDataTypeField;
18-
import org.apache.calcite.rel.type.RelDataTypeSystem;
19-
import org.apache.calcite.rel.rel2sql.RelToSqlConverter;
20-
import org.apache.calcite.rel.rel2sql.SqlImplementor;
29+
import org.apache.calcite.sql.type.SqlTypeFactoryImpl;
30+
import org.apache.calcite.sql.util.SqlShuttle;
2131

2232
import java.util.Map;
2333
import java.util.List;
@@ -94,6 +104,7 @@ default String sql() {
94104
/** Render the script as DDL/SQL in the given dialect */
95105
default String sql(SqlDialect dialect) {
96106
SqlWriter w = new SqlPrettyWriter(dialect);
107+
// TODO: fix in next Calcite version
97108
// above is deprecated; replace with:
98109
// SqlWriter w = new SqlPrettyWriter(SqlWriterConfig.of().withDialect(dialect));
99110
implement(w);
@@ -129,9 +140,31 @@ public QueryImplementor(RelNode relNode) {
129140
public void implement(SqlWriter w) {
130141
RelToSqlConverter converter = new RelToSqlConverter(w.getDialect());
131142
SqlImplementor.Result result = converter.visitRoot(relNode);
132-
w.literal(result.asSelect().toSqlString(w.getDialect()).getSql());
143+
SqlSelect select = result.asSelect();
144+
if (select.getSelectList() != null) {
145+
select.setSelectList((SqlNodeList) select.getSelectList().accept(REMOVE_ROW_CONSTRUCTOR));
146+
}
147+
w.literal(select.toSqlString(w.getDialect()).getSql());
133148
}
134-
}
149+
150+
// A `ROW(...)` operator which will unparse as just `(...)`.
151+
private final SqlRowOperator IMPLIED_ROW_OPERATOR = new SqlRowOperator(""); // empty string name
152+
153+
// a shuttle that replaces `Row(...)` with just `(...)`
154+
private final SqlShuttle REMOVE_ROW_CONSTRUCTOR = new SqlShuttle() {
155+
@Override
156+
public SqlNode visit(SqlCall call) {
157+
List<SqlNode> operands = call.getOperandList().stream().map(x -> x.accept(this)).collect(Collectors.toList());
158+
if ((call.getKind() == SqlKind.ROW || call.getKind() == SqlKind.COLUMN_LIST
159+
|| call.getOperator() instanceof SqlRowOperator)
160+
&& operands.size() > 1) {
161+
return IMPLIED_ROW_OPERATOR.createCall(call.getParserPosition(), operands);
162+
} else {
163+
return call.getOperator().createCall(call.getParserPosition(), operands);
164+
}
165+
}
166+
};
167+
}
135168

136169
/**
137170
* Implements a CREATE TABLE...WITH... DDL statement.
@@ -291,14 +324,18 @@ private static SqlDataTypeSpec toSpec(RelDataType dataType) {
291324
.map(x -> toSpec(x))
292325
.collect(Collectors.toList());
293326
return maybeNullable(dataType, new SqlDataTypeSpec(new SqlRowTypeNameSpec(SqlParserPos.ZERO, fieldNames, fieldTypes), SqlParserPos.ZERO));
327+
} if (dataType.getComponentType() != null) {
328+
return maybeNullable(dataType, new SqlDataTypeSpec(new SqlCollectionTypeNameSpec(new SqlBasicTypeNameSpec(
329+
dataType.getComponentType().getSqlTypeName(), SqlParserPos.ZERO), dataType.getSqlTypeName(), SqlParserPos.ZERO),
330+
SqlParserPos.ZERO));
294331
} else {
295332
return maybeNullable(dataType, new SqlDataTypeSpec(new SqlBasicTypeNameSpec(dataType.getSqlTypeName(), SqlParserPos.ZERO), SqlParserPos.ZERO));
296333
}
297334
}
298335

299336
private static SqlDataTypeSpec maybeNullable(RelDataType dataType, SqlDataTypeSpec spec) {
300337
if (!dataType.isNullable()) {
301-
return spec.withNullable(true);
338+
return spec.withNullable(false);
302339
} else {
303340
// we don't want "VARCHAR NULL", only "VARCHAR NOT NULL"
304341
return spec;

hoptimator-catalog/src/main/java/com/linkedin/hoptimator/catalog/TableResolver.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,19 @@
22

33
import org.apache.calcite.rel.type.RelDataType;
44
import org.apache.calcite.rel.type.RelDataTypeFactory;
5+
import org.apache.calcite.rel.type.RelProtoDataType;
56

67
import java.util.concurrent.ExecutionException;
8+
import java.util.function.Function;
79

810
/** Resolves a table name into a concrete row type. Usually involves a network call. */
911
public interface TableResolver {
1012
RelDataType resolve(String table) throws InterruptedException, ExecutionException;
1113

14+
static TableResolver from(Function<String, RelDataType> f) {
15+
return x -> f.apply(x);
16+
}
17+
1218
/** Appends an extra column to the resolved type */
1319
default TableResolver with(String name, RelDataType dataType) {
1420
return x -> {
@@ -19,4 +25,20 @@ default TableResolver with(String name, RelDataType dataType) {
1925
return builder.build();
2026
};
2127
}
28+
29+
default TableResolver with(String name, DataType dataType) {
30+
return with(name, dataType.rel());
31+
}
32+
33+
default TableResolver with(String name, DataType.Struct struct) {
34+
return with(name, struct.rel());
35+
}
36+
37+
default TableResolver mapStruct(Function<DataType.Struct, DataType.Struct> f) {
38+
return x -> f.apply(DataType.struct(resolve(x))).rel();
39+
}
40+
41+
default TableResolver map(Function<RelDataType, RelDataType> f) {
42+
return x -> f.apply(resolve(x));
43+
}
2244
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package com.linkedin.hoptimator.catalog;
2+
3+
import org.apache.calcite.plan.RelOptUtil;
4+
import org.apache.calcite.rel.type.RelDataType;
5+
import org.apache.calcite.util.Litmus;
6+
import org.apache.avro.Schema;
7+
8+
import static org.junit.Assert.assertEquals;
9+
import static org.junit.Assert.assertTrue;
10+
import org.junit.Test;
11+
12+
public class AvroConverterTest {
13+
14+
@Test
15+
public void convertsNestedSchemas() {
16+
String schemaString = "{\"type\":\"record\",\"name\":\"E\",\"namespace\":\"ns\",\"fields\":[{\"name\":\"h\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"H\",\"namespace\":\"ns\",\"fields\":[{\"name\":\"A\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"A\",\"fields\":[]}]}]}]}]}";
17+
18+
Schema avroSchema1 = (new Schema.Parser()).parse(schemaString);
19+
RelDataType rel1 = AvroConverter.rel(avroSchema1);
20+
assertEquals(rel1.toString(), rel1.getFieldCount(), avroSchema1.getFields().size());
21+
assertTrue(rel1.toString(), rel1.getField("h", false, false) != null);
22+
RelDataType rel2 = rel1.getField("h", false, false).getType();
23+
assertTrue(rel2.toString(), rel2.isNullable());
24+
Schema avroSchema2 = avroSchema1.getField("h").schema().getTypes().get(1);
25+
assertEquals(rel2.toString(), rel2.getFieldCount(), avroSchema2.getFields().size());
26+
assertTrue(rel2.toString(), rel2.getField("A", false, false) != null);
27+
RelDataType rel3 = rel2.getField("A", false, false).getType();
28+
assertTrue(rel3.toString(), rel3.isNullable());
29+
Schema avroSchema3 = avroSchema2.getField("A").schema().getTypes().get(1);
30+
assertEquals(rel3.toString(), rel3.getFieldCount(), avroSchema3.getFields().size());
31+
Schema avroSchema4 = AvroConverter.avro("NS", "R", rel1);
32+
assertTrue("!avroSchema4.isNullable()", !avroSchema4.isNullable());
33+
assertEquals(avroSchema4.toString(), avroSchema4.getFields().size(), rel1.getFieldCount());
34+
Schema avroSchema5 = AvroConverter.avro("NS", "R", rel2);
35+
assertTrue("avroSchema5.isNullable()", avroSchema5.isNullable());
36+
assertEquals(avroSchema5.toString(), avroSchema5.getTypes().get(1).getFields().size(), rel2.getFieldCount());
37+
Schema avroSchema6 = AvroConverter.avro("NS", "R", rel3);
38+
assertEquals(avroSchema6.toString(), avroSchema6.getTypes().get(1).getFields().size(), rel3.getFieldCount());
39+
RelDataType rel4 = AvroConverter.rel(avroSchema4);
40+
assertTrue("types match", RelOptUtil.eq("rel4", rel4, "rel1", rel1, Litmus.THROW));
41+
}
42+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package com.linkedin.hoptimator.catalog;
2+
3+
import org.apache.calcite.rel.type.RelDataType;
4+
5+
import static org.junit.Assert.assertTrue;
6+
import org.junit.Test;
7+
8+
public class DataTypeTest {
9+
10+
@Test
11+
public void skipsNestedRows() {
12+
DataType.Struct struct = DataType.struct().with("one", DataType.VARCHAR)
13+
.with("two", DataType.struct().with("three", DataType.VARCHAR));
14+
RelDataType row1 = struct.rel();
15+
assertTrue(row1.toString(), row1.getFieldCount() == 2);
16+
assertTrue(row1.toString(), row1.getField("one", false, false) != null);
17+
assertTrue(row1.toString(), row1.getField("two", false, false) != null);
18+
RelDataType row2 = struct.dropNestedRows().rel();
19+
assertTrue(row2.toString(), row2.getFieldCount() == 1);
20+
assertTrue(row2.toString(), row2.getField("one", false, false) != null);
21+
assertTrue(row2.toString(), row2.getField("two", false, false) == null);
22+
}
23+
}

hoptimator-catalog/src/test/java/com/linkedin/hoptimator/catalog/ScriptImplementorTest.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ public void implementsFlinkCreateTableDDL() {
2828
// Output isn't necessarily deterministic, but should be something like:
2929
// CREATE TABLE IF NOT EXISTS "DATABASE"."TABLE1" ("idValue1" VARCHAR) WITH
3030
// ('connector'='kafka', 'properties.bootstrap.servers'='localhost:9092', 'topic'='topic1')
31-
assertTrue(out.contains("CREATE TABLE IF NOT EXISTS \"DATABASE\".\"TABLE1\" (\"idValue1\" VARCHAR) WITH "));
32-
assertTrue(out.contains("'connector'='kafka'"));
33-
assertTrue(out.contains("'properties.bootstrap.servers'='localhost:9092'"));
34-
assertTrue(out.contains("'topic'='topic1'"));
35-
assertFalse(out.contains("Row"));
31+
assertTrue(out, out.contains("CREATE TABLE IF NOT EXISTS \"DATABASE\".\"TABLE1\" (\"idValue1\" VARCHAR) WITH "));
32+
assertTrue(out, out.contains("'connector'='kafka'"));
33+
assertTrue(out, out.contains("'properties.bootstrap.servers'='localhost:9092'"));
34+
assertTrue(out, out.contains("'topic'='topic1'"));
35+
assertFalse(out, out.contains("Row"));
3636
}
3737
}

hoptimator-kafka-adapter/src/main/java/com/linkedin/hoptimator/catalog/kafka/RawKafkaSchemaFactory.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ public Schema create(SchemaPlus parentSchema, String name, Map<String, Object> o
2727
String principal = (String) operand.getOrDefault("principal", "User:ANONYMOUS");
2828
Map<String, Object> clientConfig = (Map<String, Object>) operand.get("clientConfig");
2929
DataType.Struct rowType = DataType.struct()
30-
.with("PAYLOAD", DataType.VARCHAR_NULL)
31-
.with("KEY", DataType.VARCHAR_NULL);
30+
.with("PAYLOAD", DataType.VARCHAR)
31+
.with("KEY", DataType.VARCHAR);
3232
ConfigProvider connectorConfigProvider = ConfigProvider.from(clientConfig)
3333
.withPrefix("properties.")
3434
.with("connector", "upsert-kafka")

hoptimator-operator/src/main/java/com/linkedin/hoptimator/operator/subscription/SubscriptionReconciler.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ public Result reconcile(Request request) {
148148
// Mark the Subscription as failed.
149149
status.setFailed(true);
150150
status.setMessage("Error: " + e.getMessage());
151+
result = new Result(true, operator.failureRetryDuration());
151152
}
152153
} else if (status.getReady() == null && status.getResources() != null) {
153154
// Phase 2

0 commit comments

Comments
 (0)