Skip to content

Commit 4470a5f

Browse files
author
Bhargav Dodla
committed
feat: Row level TTL and cleaned up
1 parent beac9e7 commit 4470a5f

File tree

1 file changed

+17
-98
lines changed

1 file changed

+17
-98
lines changed

sdk/python/feast/infra/online_stores/contrib/cassandra_online_store/cassandra_online_store.py

Lines changed: 17 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
INSERT_CQL_4_TEMPLATE = (
6666
"INSERT INTO {fqtable} (feature_name,"
6767
" value, entity_key, event_ts) VALUES"
68-
" (?, ?, ?, ?);"
68+
" (?, ?, ?, ?) USING TTL {ttl};"
6969
)
7070

7171
SELECT_CQL_TEMPLATE = "SELECT {columns} FROM {fqtable} WHERE entity_key = ?;"
@@ -78,7 +78,7 @@
7878
event_ts TIMESTAMP,
7979
created_ts TIMESTAMP,
8080
PRIMARY KEY ((entity_key), feature_name)
81-
) WITH CLUSTERING ORDER BY (feature_name ASC) AND default_time_to_live={ttl};
81+
) WITH CLUSTERING ORDER BY (feature_name ASC);
8282
"""
8383

8484
DROP_TABLE_CQL_TEMPLATE = "DROP TABLE IF EXISTS {fqtable};"
@@ -194,10 +194,6 @@ class CassandraLoadBalancingPolicy(FeastConfigBaseModel):
194194
"""
195195

196196

197-
def get_current_time_in_ms():
198-
return datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
199-
200-
201197
class CassandraOnlineStore(OnlineStore):
202198
"""
203199
Cassandra/Astra DB online store implementation for Feast.
@@ -229,15 +225,9 @@ def _get_session(self, config: RepoConfig):
229225

230226
if self._session:
231227
if not self._session.is_shutdown:
232-
print(f"{get_current_time_in_ms()} Reusing existing session..")
233228
return self._session
234229
else:
235230
self._session = None
236-
print(
237-
f"{get_current_time_in_ms()} Setting a session to None. Creating a new session.."
238-
)
239-
else:
240-
print(f"{get_current_time_in_ms()} Creating a new session..")
241231
if not self._session:
242232
# configuration consistency checks
243233
hosts = online_store_config.hosts
@@ -307,8 +297,6 @@ def _get_session(self, config: RepoConfig):
307297
hosts,
308298
port=port,
309299
auth_provider=auth_provider,
310-
idle_heartbeat_interval=0,
311-
idle_heartbeat_timeout=0,
312300
**cluster_kwargs,
313301
)
314302
else:
@@ -332,61 +320,13 @@ def __del__(self):
332320
you can't use the session object anymore.
333321
You'd get a RuntimeError "cannot schedule new futures after shutdown".
334322
"""
335-
print("Calling CassandraOnlineStore __del__() method")
336323
if self._session:
337324
if not self._session.is_shutdown:
338325
self._session.shutdown()
339-
current_time_in_ms = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[
340-
:-3
341-
]
342-
print(f"{current_time_in_ms} Session is shutdown")
343326

344327
if self._cluster:
345328
if not self._cluster.is_shutdown:
346329
self._cluster.shutdown()
347-
current_time_in_ms = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[
348-
:-3
349-
]
350-
print(f"{current_time_in_ms} Cluster is shutdown")
351-
352-
def online_write_batch_connector(
353-
self,
354-
config: RepoConfig,
355-
table: FeatureView,
356-
data: List[
357-
Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]
358-
],
359-
progress: Optional[Callable[[int], Any]],
360-
) -> List[Tuple[str, bytes, str, datetime]]:
361-
"""
362-
Write a batch of features of several entities to the database.
363-
364-
Args:
365-
config: The RepoConfig for the current FeatureStore.
366-
table: Feast FeatureView.
367-
data: a list of quadruplets containing Feature data. Each
368-
quadruplet contains an Entity Key, a dict containing feature
369-
values, an event timestamp for the row, and
370-
the created timestamp for the row if it exists.
371-
progress: Optional function to be called once every mini-batch of
372-
rows is written to the online store. Can be used to
373-
display progress.
374-
"""
375-
data_list = []
376-
for entity_key, values, timestamp, created_ts in data:
377-
entity_key_bin = serialize_entity_key(
378-
entity_key,
379-
entity_key_serialization_version=config.entity_key_serialization_version,
380-
).hex()
381-
for feature_name, val in values.items():
382-
params: Tuple[str, bytes, str, datetime] = (
383-
feature_name,
384-
val.SerializeToString(),
385-
entity_key_bin,
386-
timestamp,
387-
)
388-
data_list.append(params)
389-
return data_list
390330

391331
def online_write_batch(
392332
self,
@@ -411,32 +351,26 @@ def online_write_batch(
411351
rows is written to the online store. Can be used to
412352
display progress.
413353
"""
414-
logger.info(f"Started writing data of size {len(data)} to CassandraOnlineStore")
415-
print(
416-
f"{get_current_time_in_ms()} Started writing data of size {len(data)} to CassandraOnlineStore"
417-
)
418354
write_concurrency = config.online_store.write_concurrency
419355
project = config.project
420-
421-
# def on_success(result, semaphore):
422-
# semaphore.release()
423-
424-
# def on_failure(exc, semaphore):
425-
# semaphore.release()
426-
# logger.exception(f"Error writing a batch: {exc}")
427-
# print(f"Error writing a batch: {exc}")
428-
# raise Exception("Error writing a batch") from exc
429-
356+
ttl = (
357+
table.online_store_key_ttl_seconds
358+
or config.online_store.key_ttl_seconds
359+
or 0
360+
)
430361
session: Session = self._get_session(config)
431362
keyspace: str = self._keyspace
432363
fqtable = CassandraOnlineStore._fq_table_name(keyspace, project, table)
433364

434365
futures = []
435366
insert_cql = self._get_cql_statement(
436-
config, "insert4", fqtable=fqtable, session=session
367+
config,
368+
"insert4",
369+
fqtable=fqtable,
370+
ttl=ttl,
371+
session=session,
437372
)
438373

439-
# semaphore = Semaphore(write_concurrency)
440374
for entity_key, values, timestamp, created_ts in data:
441375
batch = BatchStatement(batch_type=BatchType.UNLOGGED)
442376
entity_key_bin = serialize_entity_key(
@@ -451,14 +385,11 @@ def online_write_batch(
451385
timestamp,
452386
)
453387
batch.add(insert_cql, params)
454-
# semaphore.acquire()
455-
# future = session.execute_async(batch)
456-
# future.add_callbacks(
457-
# partial(on_success, semaphore=semaphore),
458-
# partial(on_failure, semaphore=semaphore),
459-
# )
460388
futures.append(session.execute_async(batch))
461389

390+
# TODO: Make this efficient by leveraging continuous writes rather
391+
# than blocking until all writes are done. We may need to rate limit
392+
# the writes to reduce the impact on read performance.
462393
if len(futures) >= write_concurrency:
463394
# Raises exception if at least one of the batch fails
464395
try:
@@ -484,13 +415,6 @@ def online_write_batch(
484415
print(f"Error writing a batch: {exc}")
485416
raise Exception("Error writing a batch") from exc
486417

487-
# Wait for all tasks to complete
488-
# while semaphore._value < write_concurrency:
489-
# print(f"{get_current_time_in_ms()} Waiting for active tasks to complete")
490-
# time.sleep(0.01)
491-
492-
logger.info("Done writing data to CassandraOnlineStore")
493-
print(f"{get_current_time_in_ms()} Done writing data to CassandraOnlineStore")
494418
# correction for the last missing call to `progress`:
495419
if progress:
496420
progress(1)
@@ -663,13 +587,8 @@ def _create_table(self, config: RepoConfig, project: str, table: FeatureView):
663587
session: Session = self._get_session(config)
664588
keyspace: str = self._keyspace
665589
fqtable = CassandraOnlineStore._fq_table_name(keyspace, project, table)
666-
ttl = (
667-
table.online_store_key_ttl_seconds
668-
or config.online_store.key_ttl_seconds
669-
or 0
670-
)
671-
create_cql = self._get_cql_statement(config, "create", fqtable, ttl=ttl)
672-
logger.info(f"Creating table {fqtable} with TTL {ttl}.")
590+
create_cql = self._get_cql_statement(config, "create", fqtable)
591+
logger.info(f"Creating table {fqtable} in keyspace {keyspace}.")
673592
session.execute(create_cql)
674593

675594
def _get_cql_statement(

0 commit comments

Comments
 (0)