Skip to content

Commit 5710727

Browse files
author
Bhargav Dodla
committed
fix: Using queue instead of semaphore
1 parent 9a4bd61 commit 5710727

File tree

1 file changed

+10
-46
lines changed

1 file changed

+10
-46
lines changed

sdk/python/feast/infra/online_stores/contrib/cassandra_online_store/cassandra_online_store.py

Lines changed: 10 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"""
2020

2121
import logging
22+
import queue
2223
import threading
2324
import time
2425
from datetime import datetime
@@ -399,24 +400,17 @@ def online_write_batch(
399400
display progress.
400401
"""
401402

402-
def on_success(result, semaphore, futures, future, lock):
403-
semaphore.release()
404-
with lock:
405-
futures.remove(future)
403+
def on_success(result, concurrent_queue):
404+
concurrent_queue.get_nowait()
406405

407-
def on_failure(exc, semaphore, futures, future, lock):
408-
semaphore.release()
409-
with lock:
410-
futures.remove(future)
406+
def on_failure(exc, concurrent_queue):
407+
concurrent_queue.get_nowait()
411408
logger.exception(f"Error writing a batch: {exc}")
412409
print(f"Error writing a batch: {exc}")
413410
raise Exception("Error writing a batch") from exc
414411

415412
write_concurrency = config.online_store.write_concurrency
416-
417-
# rate_limiter = RateLimiter(rate=write_concurrency, interval=1)
418-
semaphore = threading.Semaphore(write_concurrency)
419-
lock = threading.Lock()
413+
concurrent_queue: queue.Queue = queue.Queue(maxsize=write_concurrency)
420414

421415
project = config.project
422416
ttl = (
@@ -428,7 +422,6 @@ def on_failure(exc, semaphore, futures, future, lock):
428422
keyspace: str = self._keyspace
429423
fqtable = CassandraOnlineStore._fq_table_name(keyspace, project, table)
430424

431-
futures = []
432425
insert_cql = self._get_cql_statement(
433426
config,
434427
"insert4",
@@ -451,58 +444,29 @@ def on_failure(exc, semaphore, futures, future, lock):
451444
timestamp,
452445
)
453446
batch.add(insert_cql, params)
454-
# with rate_limiter:
455-
semaphore.acquire()
456447
future = session.execute_async(batch)
457-
futures.append(future)
448+
concurrent_queue.put(future)
458449
future.add_callbacks(
459450
partial(
460451
on_success,
461-
semaphore=semaphore,
462-
futures=futures,
463-
future=future,
464-
lock=lock,
452+
concurrent_queue=concurrent_queue,
465453
),
466454
partial(
467455
on_failure,
468-
semaphore=semaphore,
469-
futures=futures,
470-
future=future,
471-
lock=lock,
456+
concurrent_queue=concurrent_queue,
472457
),
473458
)
474459

475-
# TODO: Make this efficient by leveraging continuous writes rather
476-
# than blocking until all writes are done. We may need to rate limit
477-
# the writes to reduce the impact on read performance.
478-
# if len(futures) >= write_concurrency:
479-
# # Raises exception if at least one of the batch fails
480-
# self._wait_for_futures(futures)
481-
# futures.clear()
482-
483460
# this happens N-1 times, will be corrected outside:
484461
if progress:
485462
progress(1)
486-
while futures:
463+
while not concurrent_queue.empty():
487464
time.sleep(0.001)
488-
# if len(futures) > 0:
489-
# self._wait_for_futures(futures)
490-
# futures.clear()
491465

492466
# correction for the last missing call to `progress`:
493467
if progress:
494468
progress(1)
495469

496-
def _wait_for_futures(self, futures):
497-
try:
498-
for future in futures:
499-
future.result()
500-
futures = []
501-
except Exception as exc:
502-
logger.error(f"Error writing a batch: {exc}")
503-
print(f"Error writing a batch: {exc}")
504-
raise Exception("Error writing a batch") from exc
505-
506470
def online_read(
507471
self,
508472
config: RepoConfig,

0 commit comments

Comments
 (0)