65
65
INSERT_CQL_4_TEMPLATE = (
66
66
"INSERT INTO {fqtable} (feature_name,"
67
67
" value, entity_key, event_ts) VALUES"
68
- " (?, ?, ?, ?);"
68
+ " (?, ?, ?, ?) USING TTL {ttl} ;"
69
69
)
70
70
71
71
SELECT_CQL_TEMPLATE = "SELECT {columns} FROM {fqtable} WHERE entity_key = ?;"
78
78
event_ts TIMESTAMP,
79
79
created_ts TIMESTAMP,
80
80
PRIMARY KEY ((entity_key), feature_name)
81
- ) WITH CLUSTERING ORDER BY (feature_name ASC) AND default_time_to_live={ttl} ;
81
+ ) WITH CLUSTERING ORDER BY (feature_name ASC);
82
82
"""
83
83
84
84
DROP_TABLE_CQL_TEMPLATE = "DROP TABLE IF EXISTS {fqtable};"
@@ -194,10 +194,6 @@ class CassandraLoadBalancingPolicy(FeastConfigBaseModel):
194
194
"""
195
195
196
196
197
- def get_current_time_in_ms ():
198
- return datetime .now ().strftime ("%Y-%m-%d %H:%M:%S.%f" )[:- 3 ]
199
-
200
-
201
197
class CassandraOnlineStore (OnlineStore ):
202
198
"""
203
199
Cassandra/Astra DB online store implementation for Feast.
@@ -229,15 +225,9 @@ def _get_session(self, config: RepoConfig):
229
225
230
226
if self ._session :
231
227
if not self ._session .is_shutdown :
232
- print (f"{ get_current_time_in_ms ()} Reusing existing session.." )
233
228
return self ._session
234
229
else :
235
230
self ._session = None
236
- print (
237
- f"{ get_current_time_in_ms ()} Setting a session to None. Creating a new session.."
238
- )
239
- else :
240
- print (f"{ get_current_time_in_ms ()} Creating a new session.." )
241
231
if not self ._session :
242
232
# configuration consistency checks
243
233
hosts = online_store_config .hosts
@@ -307,8 +297,6 @@ def _get_session(self, config: RepoConfig):
307
297
hosts ,
308
298
port = port ,
309
299
auth_provider = auth_provider ,
310
- idle_heartbeat_interval = 0 ,
311
- idle_heartbeat_timeout = 0 ,
312
300
** cluster_kwargs ,
313
301
)
314
302
else :
@@ -332,61 +320,13 @@ def __del__(self):
332
320
you can't use the session object anymore.
333
321
You'd get a RuntimeError "cannot schedule new futures after shutdown".
334
322
"""
335
- print ("Calling CassandraOnlineStore __del__() method" )
336
323
if self ._session :
337
324
if not self ._session .is_shutdown :
338
325
self ._session .shutdown ()
339
- current_time_in_ms = datetime .now ().strftime ("%Y-%m-%d %H:%M:%S.%f" )[
340
- :- 3
341
- ]
342
- print (f"{ current_time_in_ms } Session is shutdown" )
343
326
344
327
if self ._cluster :
345
328
if not self ._cluster .is_shutdown :
346
329
self ._cluster .shutdown ()
347
- current_time_in_ms = datetime .now ().strftime ("%Y-%m-%d %H:%M:%S.%f" )[
348
- :- 3
349
- ]
350
- print (f"{ current_time_in_ms } Cluster is shutdown" )
351
-
352
- def online_write_batch_connector (
353
- self ,
354
- config : RepoConfig ,
355
- table : FeatureView ,
356
- data : List [
357
- Tuple [EntityKeyProto , Dict [str , ValueProto ], datetime , Optional [datetime ]]
358
- ],
359
- progress : Optional [Callable [[int ], Any ]],
360
- ) -> List [Tuple [str , bytes , str , datetime ]]:
361
- """
362
- Write a batch of features of several entities to the database.
363
-
364
- Args:
365
- config: The RepoConfig for the current FeatureStore.
366
- table: Feast FeatureView.
367
- data: a list of quadruplets containing Feature data. Each
368
- quadruplet contains an Entity Key, a dict containing feature
369
- values, an event timestamp for the row, and
370
- the created timestamp for the row if it exists.
371
- progress: Optional function to be called once every mini-batch of
372
- rows is written to the online store. Can be used to
373
- display progress.
374
- """
375
- data_list = []
376
- for entity_key , values , timestamp , created_ts in data :
377
- entity_key_bin = serialize_entity_key (
378
- entity_key ,
379
- entity_key_serialization_version = config .entity_key_serialization_version ,
380
- ).hex ()
381
- for feature_name , val in values .items ():
382
- params : Tuple [str , bytes , str , datetime ] = (
383
- feature_name ,
384
- val .SerializeToString (),
385
- entity_key_bin ,
386
- timestamp ,
387
- )
388
- data_list .append (params )
389
- return data_list
390
330
391
331
def online_write_batch (
392
332
self ,
@@ -411,32 +351,26 @@ def online_write_batch(
411
351
rows is written to the online store. Can be used to
412
352
display progress.
413
353
"""
414
- logger .info (f"Started writing data of size { len (data )} to CassandraOnlineStore" )
415
- print (
416
- f"{ get_current_time_in_ms ()} Started writing data of size { len (data )} to CassandraOnlineStore"
417
- )
418
354
write_concurrency = config .online_store .write_concurrency
419
355
project = config .project
420
-
421
- # def on_success(result, semaphore):
422
- # semaphore.release()
423
-
424
- # def on_failure(exc, semaphore):
425
- # semaphore.release()
426
- # logger.exception(f"Error writing a batch: {exc}")
427
- # print(f"Error writing a batch: {exc}")
428
- # raise Exception("Error writing a batch") from exc
429
-
356
+ ttl = (
357
+ table .online_store_key_ttl_seconds
358
+ or config .online_store .key_ttl_seconds
359
+ or 0
360
+ )
430
361
session : Session = self ._get_session (config )
431
362
keyspace : str = self ._keyspace
432
363
fqtable = CassandraOnlineStore ._fq_table_name (keyspace , project , table )
433
364
434
365
futures = []
435
366
insert_cql = self ._get_cql_statement (
436
- config , "insert4" , fqtable = fqtable , session = session
367
+ config ,
368
+ "insert4" ,
369
+ fqtable = fqtable ,
370
+ ttl = ttl ,
371
+ session = session ,
437
372
)
438
373
439
- # semaphore = Semaphore(write_concurrency)
440
374
for entity_key , values , timestamp , created_ts in data :
441
375
batch = BatchStatement (batch_type = BatchType .UNLOGGED )
442
376
entity_key_bin = serialize_entity_key (
@@ -451,14 +385,11 @@ def online_write_batch(
451
385
timestamp ,
452
386
)
453
387
batch .add (insert_cql , params )
454
- # semaphore.acquire()
455
- # future = session.execute_async(batch)
456
- # future.add_callbacks(
457
- # partial(on_success, semaphore=semaphore),
458
- # partial(on_failure, semaphore=semaphore),
459
- # )
460
388
futures .append (session .execute_async (batch ))
461
389
390
+ # TODO: Make this efficient by leveraging continuous writes rather
391
+ # than blocking until all writes are done. We may need to rate limit
392
+ # the writes to reduce the impact on read performance.
462
393
if len (futures ) >= write_concurrency :
463
394
# Raises exception if at least one of the batch fails
464
395
try :
@@ -484,13 +415,6 @@ def online_write_batch(
484
415
print (f"Error writing a batch: { exc } " )
485
416
raise Exception ("Error writing a batch" ) from exc
486
417
487
- # Wait for all tasks to complete
488
- # while semaphore._value < write_concurrency:
489
- # print(f"{get_current_time_in_ms()} Waiting for active tasks to complete")
490
- # time.sleep(0.01)
491
-
492
- logger .info ("Done writing data to CassandraOnlineStore" )
493
- print (f"{ get_current_time_in_ms ()} Done writing data to CassandraOnlineStore" )
494
418
# correction for the last missing call to `progress`:
495
419
if progress :
496
420
progress (1 )
@@ -663,13 +587,8 @@ def _create_table(self, config: RepoConfig, project: str, table: FeatureView):
663
587
session : Session = self ._get_session (config )
664
588
keyspace : str = self ._keyspace
665
589
fqtable = CassandraOnlineStore ._fq_table_name (keyspace , project , table )
666
- ttl = (
667
- table .online_store_key_ttl_seconds
668
- or config .online_store .key_ttl_seconds
669
- or 0
670
- )
671
- create_cql = self ._get_cql_statement (config , "create" , fqtable , ttl = ttl )
672
- logger .info (f"Creating table { fqtable } with TTL { ttl } ." )
590
+ create_cql = self ._get_cql_statement (config , "create" , fqtable )
591
+ logger .info (f"Creating table { fqtable } in keyspace { keyspace } ." )
673
592
session .execute (create_cql )
674
593
675
594
def _get_cql_statement (
0 commit comments