Skip to content

Commit f1a1571

Browse files
EXPEbdodlaBhargav Dodla
andauthored
feat: Added support for Confluent Avro Format (#90)
* feat: Added support for Confluent Avro Format * Removed request source from SUPPORTED_KAFKA_BATCH_SOURCES * rename schoma to schema_str --------- Co-authored-by: Bhargav Dodla <[email protected]>
1 parent c540117 commit f1a1571

File tree

6 files changed

+325
-66
lines changed

6 files changed

+325
-66
lines changed

protos/feast/core/DataFormat.proto

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,17 @@ message StreamFormat {
5252
string schema_json = 1;
5353
}
5454

55+
// Confluent Avro fetches schema from a schema registry
56+
message ConfluentAvroFormat {
57+
string record_name = 1;
58+
string record_namespace = 2;
59+
}
60+
5561
// Specifies the data format and format specific options
5662
oneof format {
5763
AvroFormat avro_format = 1;
5864
ProtoFormat proto_format = 2;
5965
JsonFormat json_format = 3;
66+
ConfluentAvroFormat confluent_avro_format = 4;
6067
}
6168
}

sdk/python/feast/data_format.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ def from_proto(cls, proto):
9393
return JsonFormat(schema_json=proto.json_format.schema_json)
9494
if fmt == "proto_format":
9595
return ProtoFormat(class_path=proto.proto_format.class_path)
96+
if fmt == "confluent_avro_format":
97+
return ConfluentAvroFormat(
98+
record_name=proto.confluent_avro_format.record_name,
99+
record_namespace=proto.confluent_avro_format.record_namespace,
100+
)
96101
raise NotImplementedError(f"StreamFormat is unsupported: {fmt}")
97102

98103

@@ -155,3 +160,26 @@ def to_proto(self):
155160
return StreamFormatProto(
156161
proto_format=StreamFormatProto.ProtoFormat(class_path=self.class_path)
157162
)
163+
164+
165+
class ConfluentAvroFormat(StreamFormat):
166+
"""
167+
Defines the Confluent Avro streaming data format that encodes data in Confluent Avro format
168+
"""
169+
170+
def __init__(self, record_name: str, record_namespace: str):
171+
"""
172+
Construct a new Confluet Avro data format.
173+
174+
Args:
175+
record_name: Record name used by schema registry
176+
record_namespace: Record namespace used by schema registry
177+
"""
178+
self.record_name = record_name
179+
self.record_namespace = record_namespace
180+
181+
def to_proto(self):
182+
proto = StreamFormatProto.ConfluentAvroFormat(
183+
record_name=self.record_name, record_namespace=self.record_namespace
184+
)
185+
return StreamFormatProto(confluent_avro_format=proto)

sdk/python/feast/expediagroup/pydantic_models/data_source_model.py

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
Copyright 2023 Expedia Group
55
66
"""
7+
78
import sys
89
from datetime import timedelta
910
from typing import Dict, List, Literal, Optional, Union
@@ -17,6 +18,7 @@
1718
from feast.expediagroup.pydantic_models.stream_format_model import (
1819
AnyStreamFormat,
1920
AvroFormatModel,
21+
ConfluentAvroFormatModel,
2022
JsonFormatModel,
2123
ProtoFormatModel,
2224
)
@@ -230,8 +232,13 @@ def from_data_source(
230232
)
231233

232234

233-
SUPPORTED_MESSAGE_FORMATS = [AvroFormatModel, JsonFormatModel, ProtoFormatModel]
234-
SUPPORTED_KAFKA_BATCH_SOURCES = [RequestSourceModel, SparkSourceModel]
235+
SUPPORTED_MESSAGE_FORMATS = [
236+
AvroFormatModel,
237+
JsonFormatModel,
238+
ProtoFormatModel,
239+
ConfluentAvroFormatModel,
240+
]
241+
SUPPORTED_KAFKA_BATCH_SOURCES = [SparkSourceModel]
235242

236243

237244
class KafkaSourceModel(DataSourceModel):
@@ -271,9 +278,9 @@ def to_data_source(self) -> KafkaSource:
271278
description=self.description,
272279
tags=self.tags,
273280
owner=self.owner,
274-
batch_source=self.batch_source.to_data_source()
275-
if self.batch_source
276-
else None,
281+
batch_source=(
282+
self.batch_source.to_data_source() if self.batch_source else None
283+
),
277284
watermark_delay_threshold=self.watermark_delay_threshold,
278285
)
279286

@@ -317,16 +324,20 @@ def from_data_source(
317324
name=data_source.name,
318325
timestamp_field=data_source.timestamp_field,
319326
message_format=message_format,
320-
kafka_bootstrap_servers=data_source.kafka_options.kafka_bootstrap_servers
321-
if data_source.kafka_options.kafka_bootstrap_servers
322-
else "",
323-
topic=data_source.kafka_options.topic
324-
if data_source.kafka_options.topic
325-
else "",
327+
kafka_bootstrap_servers=(
328+
data_source.kafka_options.kafka_bootstrap_servers
329+
if data_source.kafka_options.kafka_bootstrap_servers
330+
else ""
331+
),
332+
topic=(
333+
data_source.kafka_options.topic
334+
if data_source.kafka_options.topic
335+
else ""
336+
),
326337
created_timestamp_column=data_source.created_timestamp_column,
327-
field_mapping=data_source.field_mapping
328-
if data_source.field_mapping
329-
else None,
338+
field_mapping=(
339+
data_source.field_mapping if data_source.field_mapping else None
340+
),
330341
description=data_source.description,
331342
tags=data_source.tags if data_source.tags else None,
332343
owner=data_source.owner,

sdk/python/feast/expediagroup/pydantic_models/stream_format_model.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pydantic import Field as PydanticField
55
from typing_extensions import Annotated, Self
66

7-
from feast.data_format import AvroFormat, JsonFormat, ProtoFormat
7+
from feast.data_format import AvroFormat, ConfluentAvroFormat, JsonFormat, ProtoFormat
88

99

1010
class StreamFormatModel(BaseModel):
@@ -38,7 +38,7 @@ class AvroFormatModel(StreamFormatModel):
3838
"""
3939

4040
format: Literal["AvroFormatModel"] = "AvroFormatModel"
41-
schoma: str
41+
schema_str: str
4242

4343
def to_stream_format(self) -> AvroFormat:
4444
"""
@@ -47,7 +47,7 @@ def to_stream_format(self) -> AvroFormat:
4747
Returns:
4848
An AvroFormat.
4949
"""
50-
return AvroFormat(schema_json=self.schoma)
50+
return AvroFormat(schema_json=self.schema_str)
5151

5252
@classmethod
5353
def from_stream_format(
@@ -60,7 +60,7 @@ def from_stream_format(
6060
Returns:
6161
An AvroFormatModel.
6262
"""
63-
return cls(schoma=avro_format.schema_json)
63+
return cls(schema_str=avro_format.schema_json)
6464

6565

6666
class JsonFormatModel(StreamFormatModel):
@@ -69,7 +69,7 @@ class JsonFormatModel(StreamFormatModel):
6969
"""
7070

7171
format: Literal["JsonFormatModel"] = "JsonFormatModel"
72-
schoma: str
72+
schema_str: str
7373

7474
def to_stream_format(self) -> JsonFormat:
7575
"""
@@ -78,7 +78,7 @@ def to_stream_format(self) -> JsonFormat:
7878
Returns:
7979
A JsonFormat.
8080
"""
81-
return JsonFormat(schema_json=self.schoma)
81+
return JsonFormat(schema_json=self.schema_str)
8282

8383
@classmethod
8484
def from_stream_format(
@@ -91,7 +91,7 @@ def from_stream_format(
9191
Returns:
9292
A JsonFormatModel.
9393
"""
94-
return cls(schoma=json_format.schema_json)
94+
return cls(schema_str=json_format.schema_json)
9595

9696

9797
class ProtoFormatModel(StreamFormatModel):
@@ -125,9 +125,46 @@ def from_stream_format(
125125
return cls(class_path=proto_format.class_path)
126126

127127

128+
class ConfluentAvroFormatModel(StreamFormatModel):
129+
"""
130+
Pydantic Model of a Feast ProtoFormat.
131+
"""
132+
133+
format: Literal["ConfluentAvroFormatModel"] = "ConfluentAvroFormatModel"
134+
record_name: str
135+
record_namespace: str
136+
137+
def to_stream_format(self) -> ConfluentAvroFormat:
138+
"""
139+
Given a Pydantic ProtoFormatModel, create and return a ProtoFormat.
140+
141+
Returns:
142+
A ProtoFormat.
143+
"""
144+
return ConfluentAvroFormat(
145+
record_name=self.record_name, record_namespace=self.record_namespace
146+
)
147+
148+
@classmethod
149+
def from_stream_format(
150+
cls,
151+
confluent_avro_format,
152+
) -> Self: # type: ignore
153+
"""
154+
Converts a ProtoFormat object to its pydantic model representation.
155+
156+
Returns:
157+
A ProtoFormatModel.
158+
"""
159+
return cls(
160+
record_name=confluent_avro_format.record_name,
161+
record_namespace=confluent_avro_format.record_namespace,
162+
)
163+
164+
128165
# https://blog.devgenius.io/deserialize-child-classes-with-pydantic-that-gonna-work-784230e1cf83
129166
# This lets us discriminate child classes of DataSourceModel with type hints.
130167
AnyStreamFormat = Annotated[
131-
Union[AvroFormatModel, JsonFormatModel, ProtoFormatModel],
168+
Union[AvroFormatModel, JsonFormatModel, ProtoFormatModel, ConfluentAvroFormatModel],
132169
PydanticField(discriminator="format"),
133170
]

0 commit comments

Comments
 (0)