-
-
Notifications
You must be signed in to change notification settings - Fork 143
[AVRO] #589: Fix schema not including base class for records with subclasses #593
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
cowtowncoder
merged 11 commits into
FasterXML:2.19
from
MichalFoksa:feature/2.19/589-schema-does-not-include-base-class-for-records-with-subclasses
May 22, 2025
Merged
Changes from 3 commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
03d344a
Record visitor includes concrete base class (base class is not abstra…
MichalFoksa 46cbe12
Add subType schema into this union, unless it is already there.
MichalFoksa 99311ae
Add subType schema into this union, unless it is already there.
MichalFoksa 8f09acc
comments
MichalFoksa c540e07
Using reference-equality Set for unionSchemas. It makes for simples c…
MichalFoksa 88fa1dc
Making classes used in test private so that they do not soak outside.
MichalFoksa 792e0d2
Prevention of StackOverflowError exception when base class is explici…
MichalFoksa a13d0d6
test cases renamed
MichalFoksa 67c6042
Add release notes
cowtowncoder 41d3060
Warnings removal, cosmetic changes
cowtowncoder 31ac1b7
Minor tweaking to reduce diff, use Jackson's RuntimeException
cowtowncoder File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,6 +31,33 @@ public class RecordVisitor | |
*/ | ||
protected final boolean _overridden; | ||
|
||
/** | ||
* When Avro schema for this JavaType ({@code _type}) results in UNION of multiple Avro types, _typeSchema keeps track | ||
* which Avro type in the UNION represents this JavaType ({@code _type}) so that fields of this JavaType can be set to the right Avro type by {@code builtAvroSchema()}. | ||
* | ||
* Example: | ||
* <pre> | ||
* @JsonSubTypes({ | ||
* @JsonSubTypes.Type(value = Apple.class), | ||
* @JsonSubTypes.Type(value = Pear.class) }) | ||
* class Fruit {} | ||
* | ||
* class Apple extends Fruit {} | ||
* class Orange extends Fruit {} | ||
* </pre> | ||
* When _type = Fruit.class | ||
* Then | ||
* _avroSchema if Fruit.class is union of Fruit record, Apple record and Orange record schemas: [ | ||
* { name: Fruit, type: record, field: [..] }, <--- _typeSchema points here | ||
* { name: Apple, type: record, field: [..] }, | ||
* { name: Orange, type: record, field: [..]} | ||
* ] | ||
* _typeSchema points to Fruit.class without subtypes record schema | ||
* | ||
* FIXME: When _thisSchema is not null, then _overridden must be true, therefore (_overridden == true) can be replaced with (_thisSchema != null), | ||
* but it might be considered API change cause _overridden has protected access modifier. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can do that as a follow-up for 2.20 (2.x branch) |
||
*/ | ||
private Schema _typeSchema; | ||
protected Schema _avroSchema; | ||
|
||
protected List<Schema.Field> _fields = new ArrayList<>(); | ||
|
@@ -42,32 +69,76 @@ public RecordVisitor(SerializerProvider p, JavaType type, VisitorFormatWrapperIm | |
_visitorWrapper = visitorWrapper; | ||
// Check if the schema for this record is overridden | ||
BeanDescription bean = getProvider().getConfig().introspectDirectClassAnnotations(_type); | ||
List<NamedType> subTypes = getProvider().getAnnotationIntrospector().findSubtypes(bean.getClassInfo()); | ||
AvroSchema ann = bean.getClassInfo().getAnnotation(AvroSchema.class); | ||
if (ann != null) { | ||
_avroSchema = AvroSchemaHelper.parseJsonSchema(ann.value()); | ||
_overridden = true; | ||
} else if (subTypes != null && !subTypes.isEmpty()) { | ||
List<Schema> unionSchemas = new ArrayList<>(); | ||
try { | ||
for (NamedType subType : subTypes) { | ||
JsonSerializer<?> ser = getProvider().findValueSerializer(subType.getType()); | ||
VisitorFormatWrapperImpl visitor = _visitorWrapper.createChildWrapper(); | ||
ser.acceptJsonFormatVisitor(visitor, getProvider().getTypeFactory().constructType(subType.getType())); | ||
unionSchemas.add(visitor.getAvroSchema()); | ||
} | ||
_avroSchema = Schema.createUnion(unionSchemas); | ||
_overridden = true; | ||
} catch (JsonMappingException jme) { | ||
throw new RuntimeException("Failed to build schema", jme); | ||
} | ||
} else { | ||
_avroSchema = AvroSchemaHelper.initializeRecordSchema(bean); | ||
// If Avro schema for this _type results in UNION I want to know Avro type where to assign fields | ||
_typeSchema = AvroSchemaHelper.initializeRecordSchema(bean); | ||
_avroSchema = _typeSchema; | ||
_overridden = false; | ||
AvroMeta meta = bean.getClassInfo().getAnnotation(AvroMeta.class); | ||
if (meta != null) { | ||
_avroSchema.addProp(meta.key(), meta.value()); | ||
} | ||
|
||
List<NamedType> subTypes = getProvider().getAnnotationIntrospector().findSubtypes(bean.getClassInfo()); | ||
if (subTypes != null && !subTypes.isEmpty()) { | ||
|
||
// At this point calculating hashCode for _typeSchema fails with NPE because RecordSchema.fields is NULL | ||
// (see org.apache.avro.Schema.RecordSchema#computeHash). | ||
// Therefore, _typeSchema must be added into union at very end, or unionSchemas must not be HashSet (or any | ||
// other type calling hashCode() for equality check). | ||
Set<Schema> unionSchemas = new HashSet<>(); | ||
// ArrayList<Schema> unionSchemas = new ArrayList<>(); | ||
|
||
// IdentityHashMap is used because it is using reference-equality. | ||
// Set<Schema> unionSchemas = Collections.newSetFromMap(new IdentityHashMap<>()); | ||
|
||
// Initialize with this schema is | ||
// if (_type.isConcrete()) { | ||
// unionSchemas.add(_typeSchema); | ||
// } | ||
|
||
try { | ||
for (NamedType subType : subTypes) { | ||
JsonSerializer<?> ser = getProvider().findValueSerializer(subType.getType()); | ||
VisitorFormatWrapperImpl visitor = _visitorWrapper.createChildWrapper(); | ||
ser.acceptJsonFormatVisitor(visitor, getProvider().getTypeFactory().constructType(subType.getType())); | ||
Schema subTypeSchema = visitor.getAvroSchema(); | ||
// Add subType schema into this union, unless it is already there. | ||
// When subType schema is itself a union, include all its types into this union | ||
if (subTypeSchema.getType() == Type.UNION) { | ||
// subTypeSchema.getTypes().stream() | ||
// .filter(unionElement -> !unionSchemas.contains(unionElement)) | ||
// .forEach(unionSchemas::add); | ||
// or | ||
// for( Schema unionElement: subTypeSchema.getTypes()) { | ||
// if (unionSchemas.contains(unionElement)) { | ||
// continue; | ||
// } | ||
// unionSchemas.add(unionElement); | ||
// } | ||
unionSchemas.addAll(subTypeSchema.getTypes()); | ||
} else { | ||
// if (!unionSchemas.contains(subTypeSchema)) { | ||
// unionSchemas.add(subTypeSchema); | ||
// } | ||
unionSchemas.add(subTypeSchema); | ||
} | ||
} | ||
|
||
ArrayList<Schema> unionList = new ArrayList<>(unionSchemas); | ||
// add _type schema into union | ||
if (_type.isConcrete()) { | ||
unionList.add(_typeSchema); | ||
} | ||
_avroSchema = Schema.createUnion(unionList); | ||
} catch (JsonMappingException jme) { | ||
throw new RuntimeException("Failed to build schema", jme); | ||
} | ||
} | ||
} | ||
_visitorWrapper.getSchemas().addSchema(type, _avroSchema); | ||
} | ||
|
@@ -76,7 +147,7 @@ public RecordVisitor(SerializerProvider p, JavaType type, VisitorFormatWrapperIm | |
public Schema builtAvroSchema() { | ||
if (!_overridden) { | ||
// Assumption now is that we are done, so let's assign fields | ||
_avroSchema.setFields(_fields); | ||
_typeSchema.setFields(_fields); | ||
} | ||
return _avroSchema; | ||
} | ||
|
198 changes: 198 additions & 0 deletions
198
...est/java/com/fasterxml/jackson/dataformat/avro/schema/PolymorphicTypeAnnotationsTest.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,198 @@ | ||
package com.fasterxml.jackson.dataformat.avro.schema; | ||
|
||
import com.fasterxml.jackson.annotation.JsonSubTypes; | ||
import com.fasterxml.jackson.databind.JsonMappingException; | ||
import com.fasterxml.jackson.dataformat.avro.AvroMapper; | ||
import com.fasterxml.jackson.dataformat.avro.annotation.AvroNamespace; | ||
import org.apache.avro.Schema; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import java.io.IOException; | ||
|
||
import static org.assertj.core.api.Assertions.assertThat; | ||
|
||
public class PolymorphicTypeAnnotationsTest { | ||
|
||
private static final AvroMapper MAPPER = AvroMapper.builder().build(); | ||
// it is easier maintain string schema representation when namespace is constant, rather than being inferend from this class package name | ||
private static final String TEST_NAMESPACE = "test"; | ||
|
||
@JsonSubTypes({ | ||
@JsonSubTypes.Type(value = Cat.class), | ||
@JsonSubTypes.Type(value = Dog.class), | ||
}) | ||
private interface AnimalInterface { | ||
} | ||
|
||
private static abstract class AbstractMammal implements AnimalInterface { | ||
public int legs; | ||
} | ||
|
||
static class Cat extends AbstractMammal { | ||
public String color; | ||
} | ||
|
||
static class Dog extends AbstractMammal { | ||
public int size; | ||
} | ||
|
||
@Test | ||
public void subclasses_of_interface_test() throws JsonMappingException { | ||
// GIVEN | ||
final Schema catSchema = MAPPER.schemaFor(Cat.class).getAvroSchema(); | ||
final Schema dogSchema = MAPPER.schemaFor(Dog.class).getAvroSchema(); | ||
|
||
// WHEN | ||
Schema actualSchema = MAPPER.schemaFor(AnimalInterface.class).getAvroSchema(); | ||
|
||
System.out.println("Animal schema:\n" + actualSchema.toString(true)); | ||
|
||
// THEN | ||
assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); | ||
// Because AnimalInterface is interface and AbstractMammal is abstract, they are not expected to be among types in union | ||
assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(catSchema, dogSchema); | ||
} | ||
|
||
@JsonSubTypes({ | ||
@JsonSubTypes.Type(value = Apple.class), | ||
@JsonSubTypes.Type(value = Pear.class), | ||
}) | ||
@AvroNamespace(TEST_NAMESPACE) // @AvroNamespace makes it easier to create schema string representation | ||
static class Fruit { | ||
public boolean eatable; | ||
} | ||
|
||
private static final String FRUIT_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"Fruit\",\"namespace\":\"test\",\"fields\":[{\"name\":\"eatable\",\"type\":\"boolean\"}]}"; | ||
|
||
static class Apple extends Fruit { | ||
public String color; | ||
} | ||
|
||
static class Pear extends Fruit { | ||
public int seeds; | ||
} | ||
|
||
@Test | ||
public void subclasses_of_concrete_class_test() throws IOException { | ||
// GIVEN | ||
final Schema fruitItselfSchema = MAPPER.schemaFrom(FRUIT_ITSELF_SCHEMA_STR).getAvroSchema(); | ||
final Schema appleSchema = MAPPER.schemaFor(Apple.class).getAvroSchema(); | ||
final Schema pearSchema = MAPPER.schemaFor(Pear.class).getAvroSchema(); | ||
|
||
// WHEN | ||
Schema actualSchema = MAPPER.schemaFor(Fruit.class).getAvroSchema(); | ||
|
||
System.out.println("Fruit schema:\n" + actualSchema.toString(true)); | ||
|
||
// THEN | ||
assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); | ||
assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(fruitItselfSchema, appleSchema, pearSchema); | ||
} | ||
|
||
@JsonSubTypes({ | ||
@JsonSubTypes.Type(value = LandVehicle.class), | ||
@JsonSubTypes.Type(value = AbstractWaterVehicle.class), | ||
}) | ||
@AvroNamespace(TEST_NAMESPACE) | ||
private static class Vehicle { | ||
} | ||
|
||
private static final String VEHICLE_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"Vehicle\",\"namespace\":\"test\",\"fields\":[]}"; | ||
|
||
@JsonSubTypes({ | ||
@JsonSubTypes.Type(value = Car.class), | ||
@JsonSubTypes.Type(value = MotorCycle.class), | ||
}) | ||
@AvroNamespace(TEST_NAMESPACE) | ||
private static class LandVehicle extends Vehicle { | ||
} | ||
|
||
private static final String LAND_VEHICLE_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"LandVehicle\",\"namespace\":\"test\",\"fields\":[]}"; | ||
|
||
private static class Car extends LandVehicle { | ||
} | ||
|
||
private static class MotorCycle extends LandVehicle { | ||
} | ||
|
||
@JsonSubTypes({ | ||
@JsonSubTypes.Type(value = Boat.class), | ||
@JsonSubTypes.Type(value = Submarine.class), | ||
}) | ||
private static abstract class AbstractWaterVehicle extends Vehicle { | ||
public int propellers; | ||
} | ||
|
||
private static class Boat extends AbstractWaterVehicle { | ||
} | ||
|
||
private static class Submarine extends AbstractWaterVehicle { | ||
} | ||
|
||
@Test | ||
public void subclasses_of_subclasses_test() throws IOException { | ||
// GIVEN | ||
final Schema vehicleItselfSchema = MAPPER.schemaFrom(VEHICLE_ITSELF_SCHEMA_STR).getAvroSchema(); | ||
final Schema landVehicleItselfSchema = MAPPER.schemaFrom(LAND_VEHICLE_ITSELF_SCHEMA_STR).getAvroSchema(); | ||
final Schema carSchema = MAPPER.schemaFor(Car.class).getAvroSchema(); | ||
final Schema motorCycleSchema = MAPPER.schemaFor(MotorCycle.class).getAvroSchema(); | ||
final Schema boatSchema = MAPPER.schemaFor(Boat.class).getAvroSchema(); | ||
final Schema submarineSchema = MAPPER.schemaFor(Submarine.class).getAvroSchema(); | ||
|
||
// WHEN | ||
Schema actualSchema = MAPPER.schemaFor(Vehicle.class).getAvroSchema(); | ||
|
||
System.out.println("Vehicle schema:\n" + actualSchema.toString(true)); | ||
|
||
// THEN | ||
assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); | ||
assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder( | ||
vehicleItselfSchema, | ||
landVehicleItselfSchema, carSchema, motorCycleSchema, | ||
// AbstractWaterVehicle is not here, because it is abstract | ||
boatSchema, submarineSchema); | ||
} | ||
|
||
// Helium is twice in subtypes hierarchy, once as ElementInterface subtype and second time as subtype | ||
// of AbstractGas subtype. This situation may result in | ||
// "Failed to generate `AvroSchema` for ...., problem: (AvroRuntimeException) Duplicate in union:com.fasterxml...PolymorphicTypeAnnotationsTest.Helium" | ||
// error. | ||
@JsonSubTypes({ | ||
@JsonSubTypes.Type(value = AbstractGas.class), | ||
@JsonSubTypes.Type(value = Helium.class), | ||
}) | ||
private interface ElementInterface { | ||
} | ||
|
||
@JsonSubTypes({ | ||
@JsonSubTypes.Type(value = Helium.class), | ||
@JsonSubTypes.Type(value = Oxygen.class), | ||
}) | ||
static abstract class AbstractGas implements ElementInterface { | ||
public int atomicMass; | ||
} | ||
|
||
private static class Helium extends AbstractGas { | ||
} | ||
|
||
private static class Oxygen extends AbstractGas { | ||
} | ||
|
||
@Test | ||
public void class_is_referenced_twice_in_hierarchy_test() throws JsonMappingException { | ||
// GIVEN | ||
final Schema heliumSchema = MAPPER.schemaFor(Helium.class).getAvroSchema(); | ||
final Schema oxygenSchema = MAPPER.schemaFor(Oxygen.class).getAvroSchema(); | ||
|
||
// WHEN | ||
Schema actualSchema = MAPPER.schemaFor(ElementInterface.class).getAvroSchema(); | ||
|
||
System.out.println("ElementInterface schema:\n" + actualSchema.toString(true)); | ||
|
||
// THEN | ||
assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); | ||
// ElementInterface and AbstractGas are not concrete classes they are not expected to be among types in union | ||
assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(heliumSchema, oxygenSchema); | ||
} | ||
|
||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.