6565INSERT_CQL_4_TEMPLATE = (
6666 "INSERT INTO {fqtable} (feature_name,"
6767 " value, entity_key, event_ts) VALUES"
68- " (?, ?, ?, ?);"
68+ " (?, ?, ?, ?) USING TTL {ttl} ;"
6969)
7070
7171SELECT_CQL_TEMPLATE = "SELECT {columns} FROM {fqtable} WHERE entity_key = ?;"
7878 event_ts TIMESTAMP,
7979 created_ts TIMESTAMP,
8080 PRIMARY KEY ((entity_key), feature_name)
81- ) WITH CLUSTERING ORDER BY (feature_name ASC) AND default_time_to_live={ttl} ;
81+ ) WITH CLUSTERING ORDER BY (feature_name ASC);
8282"""
8383
8484DROP_TABLE_CQL_TEMPLATE = "DROP TABLE IF EXISTS {fqtable};"
@@ -194,10 +194,6 @@ class CassandraLoadBalancingPolicy(FeastConfigBaseModel):
194194 """
195195
196196
197- def get_current_time_in_ms ():
198- return datetime .now ().strftime ("%Y-%m-%d %H:%M:%S.%f" )[:- 3 ]
199-
200-
201197class CassandraOnlineStore (OnlineStore ):
202198 """
203199 Cassandra/Astra DB online store implementation for Feast.
@@ -229,15 +225,9 @@ def _get_session(self, config: RepoConfig):
229225
230226 if self ._session :
231227 if not self ._session .is_shutdown :
232- print (f"{ get_current_time_in_ms ()} Reusing existing session.." )
233228 return self ._session
234229 else :
235230 self ._session = None
236- print (
237- f"{ get_current_time_in_ms ()} Setting a session to None. Creating a new session.."
238- )
239- else :
240- print (f"{ get_current_time_in_ms ()} Creating a new session.." )
241231 if not self ._session :
242232 # configuration consistency checks
243233 hosts = online_store_config .hosts
@@ -307,8 +297,6 @@ def _get_session(self, config: RepoConfig):
307297 hosts ,
308298 port = port ,
309299 auth_provider = auth_provider ,
310- idle_heartbeat_interval = 0 ,
311- idle_heartbeat_timeout = 0 ,
312300 ** cluster_kwargs ,
313301 )
314302 else :
@@ -332,61 +320,13 @@ def __del__(self):
332320 you can't use the session object anymore.
333321 You'd get a RuntimeError "cannot schedule new futures after shutdown".
334322 """
335- print ("Calling CassandraOnlineStore __del__() method" )
336323 if self ._session :
337324 if not self ._session .is_shutdown :
338325 self ._session .shutdown ()
339- current_time_in_ms = datetime .now ().strftime ("%Y-%m-%d %H:%M:%S.%f" )[
340- :- 3
341- ]
342- print (f"{ current_time_in_ms } Session is shutdown" )
343326
344327 if self ._cluster :
345328 if not self ._cluster .is_shutdown :
346329 self ._cluster .shutdown ()
347- current_time_in_ms = datetime .now ().strftime ("%Y-%m-%d %H:%M:%S.%f" )[
348- :- 3
349- ]
350- print (f"{ current_time_in_ms } Cluster is shutdown" )
351-
352- def online_write_batch_connector (
353- self ,
354- config : RepoConfig ,
355- table : FeatureView ,
356- data : List [
357- Tuple [EntityKeyProto , Dict [str , ValueProto ], datetime , Optional [datetime ]]
358- ],
359- progress : Optional [Callable [[int ], Any ]],
360- ) -> List [Tuple [str , bytes , str , datetime ]]:
361- """
362- Write a batch of features of several entities to the database.
363-
364- Args:
365- config: The RepoConfig for the current FeatureStore.
366- table: Feast FeatureView.
367- data: a list of quadruplets containing Feature data. Each
368- quadruplet contains an Entity Key, a dict containing feature
369- values, an event timestamp for the row, and
370- the created timestamp for the row if it exists.
371- progress: Optional function to be called once every mini-batch of
372- rows is written to the online store. Can be used to
373- display progress.
374- """
375- data_list = []
376- for entity_key , values , timestamp , created_ts in data :
377- entity_key_bin = serialize_entity_key (
378- entity_key ,
379- entity_key_serialization_version = config .entity_key_serialization_version ,
380- ).hex ()
381- for feature_name , val in values .items ():
382- params : Tuple [str , bytes , str , datetime ] = (
383- feature_name ,
384- val .SerializeToString (),
385- entity_key_bin ,
386- timestamp ,
387- )
388- data_list .append (params )
389- return data_list
390330
391331 def online_write_batch (
392332 self ,
@@ -411,32 +351,26 @@ def online_write_batch(
411351 rows is written to the online store. Can be used to
412352 display progress.
413353 """
414- logger .info (f"Started writing data of size { len (data )} to CassandraOnlineStore" )
415- print (
416- f"{ get_current_time_in_ms ()} Started writing data of size { len (data )} to CassandraOnlineStore"
417- )
418354 write_concurrency = config .online_store .write_concurrency
419355 project = config .project
420-
421- # def on_success(result, semaphore):
422- # semaphore.release()
423-
424- # def on_failure(exc, semaphore):
425- # semaphore.release()
426- # logger.exception(f"Error writing a batch: {exc}")
427- # print(f"Error writing a batch: {exc}")
428- # raise Exception("Error writing a batch") from exc
429-
356+ ttl = (
357+ table .online_store_key_ttl_seconds
358+ or config .online_store .key_ttl_seconds
359+ or 0
360+ )
430361 session : Session = self ._get_session (config )
431362 keyspace : str = self ._keyspace
432363 fqtable = CassandraOnlineStore ._fq_table_name (keyspace , project , table )
433364
434365 futures = []
435366 insert_cql = self ._get_cql_statement (
436- config , "insert4" , fqtable = fqtable , session = session
367+ config ,
368+ "insert4" ,
369+ fqtable = fqtable ,
370+ ttl = ttl ,
371+ session = session ,
437372 )
438373
439- # semaphore = Semaphore(write_concurrency)
440374 for entity_key , values , timestamp , created_ts in data :
441375 batch = BatchStatement (batch_type = BatchType .UNLOGGED )
442376 entity_key_bin = serialize_entity_key (
@@ -451,14 +385,11 @@ def online_write_batch(
451385 timestamp ,
452386 )
453387 batch .add (insert_cql , params )
454- # semaphore.acquire()
455- # future = session.execute_async(batch)
456- # future.add_callbacks(
457- # partial(on_success, semaphore=semaphore),
458- # partial(on_failure, semaphore=semaphore),
459- # )
460388 futures .append (session .execute_async (batch ))
461389
390+ # TODO: Make this efficient by leveraging continuous writes rather
391+ # than blocking until all writes are done. We may need to rate limit
392+ # the writes to reduce the impact on read performance.
462393 if len (futures ) >= write_concurrency :
463394 # Raises exception if at least one of the batch fails
464395 try :
@@ -484,13 +415,6 @@ def online_write_batch(
484415 print (f"Error writing a batch: { exc } " )
485416 raise Exception ("Error writing a batch" ) from exc
486417
487- # Wait for all tasks to complete
488- # while semaphore._value < write_concurrency:
489- # print(f"{get_current_time_in_ms()} Waiting for active tasks to complete")
490- # time.sleep(0.01)
491-
492- logger .info ("Done writing data to CassandraOnlineStore" )
493- print (f"{ get_current_time_in_ms ()} Done writing data to CassandraOnlineStore" )
494418 # correction for the last missing call to `progress`:
495419 if progress :
496420 progress (1 )
@@ -663,13 +587,8 @@ def _create_table(self, config: RepoConfig, project: str, table: FeatureView):
663587 session : Session = self ._get_session (config )
664588 keyspace : str = self ._keyspace
665589 fqtable = CassandraOnlineStore ._fq_table_name (keyspace , project , table )
666- ttl = (
667- table .online_store_key_ttl_seconds
668- or config .online_store .key_ttl_seconds
669- or 0
670- )
671- create_cql = self ._get_cql_statement (config , "create" , fqtable , ttl = ttl )
672- logger .info (f"Creating table { fqtable } with TTL { ttl } ." )
590+ create_cql = self ._get_cql_statement (config , "create" , fqtable )
591+ logger .info (f"Creating table { fqtable } in keyspace { keyspace } ." )
673592 session .execute (create_cql )
674593
675594 def _get_cql_statement (
0 commit comments