diff --git a/checkstyle/suppressions.xml b/checkstyle/suppressions.xml index 1ec8172bc..550797ae6 100644 --- a/checkstyle/suppressions.xml +++ b/checkstyle/suppressions.xml @@ -26,4 +26,9 @@ files="StorageSinkConnectorConfig.java" /> + + diff --git a/core/pom.xml b/core/pom.xml index a2d9c2dc5..752773200 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -36,6 +36,10 @@ io.confluent kafka-connect-avro-data + + io.confluent + kafka-connect-protobuf-converter + joda-time joda-time diff --git a/core/src/main/java/io/confluent/connect/storage/schema/SchemaProjector.java b/core/src/main/java/io/confluent/connect/storage/schema/SchemaProjector.java new file mode 100644 index 000000000..11c266e2d --- /dev/null +++ b/core/src/main/java/io/confluent/connect/storage/schema/SchemaProjector.java @@ -0,0 +1,218 @@ +/* + * Copyright 2018 Confluent Inc. + * + * Licensed under the Confluent Community License (the "License"); you may not use + * this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.confluent.io/confluent-community-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +package io.confluent.connect.storage.schema; + +import org.apache.kafka.connect.data.Field; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.errors.SchemaProjectorException; +import io.confluent.connect.avro.AvroData; +import io.confluent.connect.protobuf.ProtobufData; + +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + + +public class SchemaProjector { + + private static final Set> promotable = + new HashSet<>(); + + static { + Schema.Type[] promotableTypes = {Schema.Type.INT8, Schema.Type.INT16, Schema.Type.INT32, + Schema.Type.INT64, Schema.Type.FLOAT32, Schema.Type.FLOAT64}; + for (int i = 0; i < promotableTypes.length; ++i) { + for (int j = i; j < promotableTypes.length; ++j) { + promotable.add(new AbstractMap.SimpleImmutableEntry<>( + promotableTypes[i], + promotableTypes[j])); + } + } + } + + public static Object project(Schema source, Object record, Schema target) + throws SchemaProjectorException { + checkMaybeCompatible(source, target); + if (source.isOptional() && !target.isOptional()) { + if (target.defaultValue() != null) { + if (record != null) { + return projectRequiredSchema(source, record, target); + } else { + return target.defaultValue(); + } + } else { + throw new SchemaProjectorException("Writer schema is optional, " + + "however, target schema does not provide a default value."); + } + } else { + if (record != null) { + return projectRequiredSchema(source, record, target); + } else { + return null; + } + } + } + + private static Object projectRequiredSchema(Schema source, Object record, Schema target) + throws SchemaProjectorException { + switch (target.type()) { + case INT8: + case INT16: + case INT32: + case INT64: + case FLOAT32: + case FLOAT64: + case BOOLEAN: + case BYTES: + case STRING: + return projectPrimitive(source, record, target); + case STRUCT: + return projectStruct(source, (Struct) record, target); + case ARRAY: + return projectArray(source, record, target); + case MAP: + return projectMap(source, record, target); + default: + return null; + } + } + + private static Object projectStruct(Schema source, Struct sourceStruct, Schema target) + throws SchemaProjectorException { + Struct targetStruct = new Struct(target); + for (Field targetField : target.fields()) { + String fieldName = targetField.name(); + Field sourceField = source.field(fieldName); + if (sourceField != null) { + Object sourceFieldValue = sourceStruct.get(fieldName); + try { + Object targetFieldValue = project( + sourceField.schema(), + sourceFieldValue, + targetField.schema()); + targetStruct.put(fieldName, targetFieldValue); + } catch (SchemaProjectorException e) { + throw new SchemaProjectorException("Error projecting " + sourceField.name(), e); + } + } else if (targetField.schema().isOptional()) { + // Ignore missing field + } else if (targetField.schema().defaultValue() != null) { + targetStruct.put(fieldName, targetField.schema().defaultValue()); + } else { + throw new SchemaProjectorException("Required field `" + fieldName + + "` is missing from source schema: " + source); + } + } + return targetStruct; + } + + + private static void checkMaybeCompatible(Schema source, Schema target) { + if (source.type() != target.type() && !isPromotable(source.type(), target.type())) { + throw new SchemaProjectorException("Schema type mismatch. source type: " + source.type() + + " and target type: " + target.type()); + } else if (!Objects.equals(source.name(), target.name())) { + throw new SchemaProjectorException("Schema name mismatch. source name: " + source.name() + + " and target name: " + target.name()); + } else if (source.parameters() != null && target.parameters() != null) { + if (isEnumSchema(source) && isEnumSchema(target)) { + if (!target.parameters().entrySet().containsAll(source.parameters().entrySet())) { + throw new SchemaProjectorException("Schema parameters mismatch. Source parameter: " + + source.parameters() + + " is not a subset of target parameters: " + target.parameters()); + } + } else if (!Objects.equals(source.parameters(), target.parameters())) { + throw new SchemaProjectorException("Schema parameters not equal. source parameters: " + + source.parameters() + " and target parameters: " + target.parameters()); + } + } + } + + static boolean isEnumSchema(Schema schema) { + return schema.parameters() != null + && (schema.parameters().containsKey(AvroData.AVRO_TYPE_ENUM) + || schema.parameters().containsKey(ProtobufData.PROTOBUF_TYPE_ENUM)); + } + + private static Object projectArray(Schema source, Object record, Schema target) + throws SchemaProjectorException { + List array = (List) record; + List retArray = new ArrayList<>(); + for (Object entry : array) { + retArray.add(project(source.valueSchema(), entry, target.valueSchema())); + } + return retArray; + } + + private static Object projectMap(Schema source, Object record, Schema target) + throws SchemaProjectorException { + Map map = (Map) record; + Map retMap = new HashMap<>(); + for (Map.Entry entry : map.entrySet()) { + Object key = entry.getKey(); + Object value = entry.getValue(); + Object retKey = project(source.keySchema(), key, target.keySchema()); + Object retValue = project(source.valueSchema(), value, target.valueSchema()); + retMap.put(retKey, retValue); + } + return retMap; + } + + private static Object projectPrimitive(Schema source, Object record, Schema target) + throws SchemaProjectorException { + assert source.type().isPrimitive(); + assert target.type().isPrimitive(); + Object result; + if (isPromotable(source.type(), target.type()) && record instanceof Number) { + Number numberRecord = (Number) record; + switch (target.type()) { + case INT8: + result = numberRecord.byteValue(); + break; + case INT16: + result = numberRecord.shortValue(); + break; + case INT32: + result = numberRecord.intValue(); + break; + case INT64: + result = numberRecord.longValue(); + break; + case FLOAT32: + result = numberRecord.floatValue(); + break; + case FLOAT64: + result = numberRecord.doubleValue(); + break; + default: + throw new SchemaProjectorException("Not promotable type."); + } + } else { + result = record; + } + return result; + } + + private static boolean isPromotable(Schema.Type sourceType, Schema.Type targetType) { + return promotable.contains(new AbstractMap.SimpleImmutableEntry<>(sourceType, targetType)); + } +} diff --git a/core/src/main/java/io/confluent/connect/storage/schema/StorageSchemaCompatibility.java b/core/src/main/java/io/confluent/connect/storage/schema/StorageSchemaCompatibility.java index e9bdcec21..ed18a7410 100644 --- a/core/src/main/java/io/confluent/connect/storage/schema/StorageSchemaCompatibility.java +++ b/core/src/main/java/io/confluent/connect/storage/schema/StorageSchemaCompatibility.java @@ -17,7 +17,6 @@ import org.apache.kafka.connect.connector.ConnectRecord; import org.apache.kafka.connect.data.Schema; -import org.apache.kafka.connect.data.SchemaProjector; import org.apache.kafka.connect.errors.SchemaProjectorException; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.connect.source.SourceRecord; @@ -273,7 +272,14 @@ protected boolean checkSchemaParameters( Schema originalSchema, Schema currentSchema ) { - return !Objects.equals(originalSchema.parameters(), currentSchema.parameters()); + if (SchemaProjector.isEnumSchema(originalSchema) + && SchemaProjector.isEnumSchema(currentSchema)) { + Map originalParams = originalSchema.parameters(); + Map currentParams = currentSchema.parameters(); + return !currentParams.entrySet().containsAll(originalParams.entrySet()); + } else { + return !Objects.equals(originalSchema.parameters(), currentSchema.parameters()); + } } protected boolean isPromotable(Schema.Type sourceType, Schema.Type targetType) { diff --git a/core/src/test/java/io/confluent/connect/storage/schema/SchemaProjectorTest.java b/core/src/test/java/io/confluent/connect/storage/schema/SchemaProjectorTest.java new file mode 100644 index 000000000..34f4f2334 --- /dev/null +++ b/core/src/test/java/io/confluent/connect/storage/schema/SchemaProjectorTest.java @@ -0,0 +1,90 @@ +package io.confluent.connect.storage.schema; + +import io.confluent.connect.avro.AvroData; +import io.confluent.connect.protobuf.ProtobufData; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.apache.kafka.connect.errors.SchemaProjectorException; +import org.junit.Test; + +import static org.junit.Assert.assertThrows; + +public class SchemaProjectorTest { + + private static SchemaBuilder buildAvroEnumSchema(String name, int version, String... values) { + // Enum schema is unwrapped as strings; symbols are represented as parameters + SchemaBuilder enumSchema = SchemaBuilder.string() + .version(version) + .name(name); + enumSchema.parameter(AvroData.AVRO_TYPE_ENUM, name); + for (String value: values) { + enumSchema.parameter(AvroData.AVRO_TYPE_ENUM + "." + value, value); + } + return enumSchema; + } + + private static SchemaBuilder buildProtobufEnumSchema(String name, int version, String... values) { + // Enum schema is unwrapped as strings or integers; symbols are represented as parameters + SchemaBuilder enumSchema = SchemaBuilder.string() + .version(version) + .name(name); + enumSchema.parameter(ProtobufData.PROTOBUF_TYPE_ENUM, name); + for (String value: values) { + enumSchema.parameter(ProtobufData.PROTOBUF_TYPE_ENUM + "." + value, value); + } + return enumSchema; + } + + private static SchemaBuilder buildStringSchema(String name, int version) { + return SchemaBuilder.string() + .version(version) + .name(name); + } + + private static final Schema ENUM_SCHEMA_A = + buildAvroEnumSchema("e1", 1, "RED", "GREEN", "BLUE").build(); + private static final Schema ENUM_SCHEMA_A2 = + buildAvroEnumSchema("e1", 2, "RED", "GREEN").build(); + private static final Schema ENUM_SCHEMA_B = + buildAvroEnumSchema("e1", 1, "RED", "GREEN", "BLUE", "YELLOW").build(); + private static final Schema ENUM_SCHEMA_C = + buildProtobufEnumSchema("e1", 1, "RED", "GREEN", "BLUE").build(); + private static final Schema ENUM_SCHEMA_C2 = + buildProtobufEnumSchema("e1", 2, "RED", "GREEN").build(); + private static final Schema ENUM_SCHEMA_D = + buildProtobufEnumSchema("e1", 1, "RED", "GREEN", "BLUE", "YELLOW").build(); + private static final Schema STRING_SCHEMA_A = + buildStringSchema("schema1", 1).build(); + private static final Schema STRING_SCHEMA_B = + buildStringSchema("schema2", 1).build(); + + @Test + public void testCheckMaybeCompatibleWithStringSchema() { + String value = "test"; + + // Test String schema and Enum schema are treated differently; String schema name mismatch + assertThrows(SchemaProjectorException.class, () -> SchemaProjector.project(STRING_SCHEMA_A, value, STRING_SCHEMA_B)); + } + + @Test + public void testCheckMaybeCompatibleWithAvroEnumSchema() { + String value = "RED"; + + // Exception on addition of enum symbol + assertThrows(SchemaProjectorException.class, () -> SchemaProjector.project(ENUM_SCHEMA_B, value, ENUM_SCHEMA_A)); + + // No exception on removal of enum symbol + SchemaProjector.project(ENUM_SCHEMA_A2, value, ENUM_SCHEMA_A); + } + + @Test + public void testCheckMaybeCompatibleWithProtobufEnumSchema() { + String value = "RED"; + + // Exception on addition of enum symbol + assertThrows(SchemaProjectorException.class, () -> SchemaProjector.project(ENUM_SCHEMA_D, value, ENUM_SCHEMA_C)); + + // No exception on removal of enum symbol + SchemaProjector.project(ENUM_SCHEMA_C2, value, ENUM_SCHEMA_C); + } +} diff --git a/core/src/test/java/io/confluent/connect/storage/schema/StorageSchemaCompatibilityTest.java b/core/src/test/java/io/confluent/connect/storage/schema/StorageSchemaCompatibilityTest.java index c969e9410..e8e58a735 100644 --- a/core/src/test/java/io/confluent/connect/storage/schema/StorageSchemaCompatibilityTest.java +++ b/core/src/test/java/io/confluent/connect/storage/schema/StorageSchemaCompatibilityTest.java @@ -10,16 +10,23 @@ import java.util.HashMap; import java.util.Map; +import io.confluent.connect.protobuf.ProtobufData; import org.apache.kafka.connect.data.Field; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaBuilder; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.errors.ConnectException; +import org.apache.kafka.connect.errors.SchemaProjectorException; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.connect.source.SourceRecord; import org.junit.Test; +import io.confluent.connect.avro.AvroData; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.fail; public class StorageSchemaCompatibilityTest { @@ -40,6 +47,30 @@ private static SchemaBuilder buildIntSchema(String name, int version) { .name(name); } + private static SchemaBuilder buildAvroEnumSchema(String name, int version, String... values) { + // Enum schema is unwrapped as strings; symbols are represented as parameters + SchemaBuilder enumSchema = SchemaBuilder.string() + .version(version) + .name(name); + enumSchema.parameter(AvroData.AVRO_TYPE_ENUM, name); + for (String value: values) { + enumSchema.parameter(AvroData.AVRO_TYPE_ENUM + "." + value, value); + } + return enumSchema; + } + + private static SchemaBuilder buildProtobufEnumSchema(String name, int version, String... values) { + // Enum schema is unwrapped as strings or integers; symbols are represented as parameters + SchemaBuilder enumSchema = SchemaBuilder.string() + .version(version) + .name(name); + enumSchema.parameter(ProtobufData.PROTOBUF_TYPE_ENUM, name); + for (String value: values) { + enumSchema.parameter(ProtobufData.PROTOBUF_TYPE_ENUM + "." + value, value); + } + return enumSchema; + } + private static final Schema SCHEMA_A = buildIntSchema("a", 2).build(); private static final Schema SCHEMA_A_COPY = @@ -97,7 +128,157 @@ private static SchemaBuilder buildStructSchema(String name, int version) { buildStructSchema("b", 2).field("extra", Schema.STRING_SCHEMA).build(); private static final Schema SCHEMA_B_EXTRA_OPTIONAL_FIELD = buildStructSchema("b", 2).field("extra", Schema.OPTIONAL_STRING_SCHEMA).build(); + private static final Schema ENUM_SCHEMA_A = + buildAvroEnumSchema("e1", 1, "RED", "GREEN", "BLUE").build(); + private static final Schema ENUM_SCHEMA_B = + buildAvroEnumSchema("e1", 1, "RED", "GREEN").build(); + private static final Schema ENUM_SCHEMA_C = + buildProtobufEnumSchema("e1", 1, "RED", "GREEN", "BLUE").build(); + private static final Schema ENUM_SCHEMA_D = + buildProtobufEnumSchema("e1", 1, "RED", "GREEN").build(); + + @Test + public void testShouldChangeSchemaWithEnumAdditionAndBackwardCompatibility() { + String value = "BLUE"; + + // Avro schema test + SinkRecord sinkRecordAvro = new SinkRecord( + "test-topic", + 0, + null, + null, + ENUM_SCHEMA_A, + value, + 0 + ); + + boolean result = StorageSchemaCompatibility.BACKWARD.shouldChangeSchema(sinkRecordAvro, null, ENUM_SCHEMA_B); + assertTrue(result); + + // Protobuf schema test + SinkRecord sinkRecordProtobuf = new SinkRecord( + "test-topic", + 0, + null, + null, + ENUM_SCHEMA_C, + value, + 0 + ); + + result = StorageSchemaCompatibility.BACKWARD.shouldChangeSchema(sinkRecordProtobuf, null, ENUM_SCHEMA_D); + assertTrue(result); + } + + @Test + public void testShouldChangeSchemaWithEnumDeletionAndBackwardCompatibility() { + String value = "RED"; + + // Avro schema test + SinkRecord sinkRecordAvro = new SinkRecord( + "test-topic", + 0, + null, + null, + ENUM_SCHEMA_B, + value, + 0 + ); + + boolean result = StorageSchemaCompatibility.BACKWARD.shouldChangeSchema(sinkRecordAvro, null, ENUM_SCHEMA_A); + assertFalse(result); + + // Protobuf schema test + SinkRecord sinkRecordProtobuf = new SinkRecord( + "test-topic", + 0, + null, + null, + ENUM_SCHEMA_D, + value, + 0 + ); + + result = StorageSchemaCompatibility.BACKWARD.shouldChangeSchema(sinkRecordProtobuf, null, ENUM_SCHEMA_C); + assertFalse(result); + } + + @Test + public void testShouldChangeSchemaWithEnumAdditionAndForwardCompatibility() { + String value = "BLUE"; + + // Avro schema test + SinkRecord sinkRecordAvro = new SinkRecord( + "test-topic", + 0, + null, + null, + ENUM_SCHEMA_A, + value, + 0 + ); + + boolean result = StorageSchemaCompatibility.FORWARD.shouldChangeSchema(sinkRecordAvro, null, ENUM_SCHEMA_B); + assertTrue(result); + + // Protobuf schema test + SinkRecord sinkRecordProtobuf = new SinkRecord( + "test-topic", + 0, + null, + null, + ENUM_SCHEMA_C, + value, + 0 + ); + + result = StorageSchemaCompatibility.FORWARD.shouldChangeSchema(sinkRecordProtobuf, null, ENUM_SCHEMA_D); + assertTrue(result); + } + + @Test + public void testShouldChangeSchemaWithEnumDeletionAndForwardCompatibility() { + String value = "RED"; + + // Avro schema test + SinkRecord sinkRecordAvro = new SinkRecord( + "test-topic", + 0, + null, + null, + ENUM_SCHEMA_B, + value, + 0 + ); + + boolean result = StorageSchemaCompatibility.FORWARD.shouldChangeSchema(sinkRecordAvro, null, ENUM_SCHEMA_A); + assertFalse(result); + // Protobuf schema test + SinkRecord sinkRecordProtobuf = new SinkRecord( + "test-topic", + 0, + null, + null, + ENUM_SCHEMA_D, + value, + 0 + ); + + result = StorageSchemaCompatibility.FORWARD.shouldChangeSchema(sinkRecordProtobuf, null, ENUM_SCHEMA_C); + assertFalse(result); + } + + @Test + public void testProjectSchemaAfterAddingEnumSymbol() { + String value = "GREEN"; + + // Avro schema test + assertThrows(SchemaProjectorException.class, () -> SchemaProjector.project(ENUM_SCHEMA_A, value, ENUM_SCHEMA_B)); + + // Protobuf schema test + assertThrows(SchemaProjectorException.class, () -> SchemaProjector.project(ENUM_SCHEMA_C, value, ENUM_SCHEMA_D)); + } @Test public void noneCompatibilityShouldConsiderSameVersionsAsUnchanged() { diff --git a/pom.xml b/pom.xml index 657f3ec1f..26a09f377 100644 --- a/pom.xml +++ b/pom.xml @@ -142,6 +142,11 @@ kafka-connect-avro-data ${confluent.version.range} + + io.confluent + kafka-connect-protobuf-converter + ${confluent.version.range} + org.apache.parquet parquet-column