Skip to content

Commit c96f74a

Browse files
Merge pull request #504 from jordiolivares/feature/add-batch-across-fetch
Add support for batching requests across unrelated fetches
2 parents 0b659b5 + 776d06d commit c96f74a

File tree

2 files changed

+167
-1
lines changed

2 files changed

+167
-1
lines changed

fetch/src/main/scala/datasource.scala

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,13 @@ package fetch
1919
import cats.data.NonEmptyList
2020
import cats.effect._
2121
import cats.effect.implicits._
22+
import cats.effect.std.Queue
2223
import cats.kernel.{Hash => H}
2324
import cats.syntax.all._
2425

26+
import scala.collection.mutable
27+
import scala.concurrent.duration.FiniteDuration
28+
2529
/**
2630
* `Data` is a trait used to identify and optimize access to a `DataSource`.
2731
*/
@@ -66,6 +70,87 @@ trait DataSource[F[_], I, A] {
6670
def batchExecution: BatchExecution = InParallel
6771
}
6872

73+
object DataSource {
74+
private def upToWithin[F[_], T](queue: Queue[F, T], maxElements: Int, interval: FiniteDuration)(
75+
implicit F: Temporal[F]
76+
): F[List[T]] = {
77+
Ref[F].of(List.empty[T]).flatMap { ref =>
78+
val takeAndBuffer = queue.take.flatMap { x =>
79+
ref.updateAndGet(list => x :: list)
80+
}
81+
val bufferUntilNumElements = takeAndBuffer.iterateUntil { buffer =>
82+
buffer.size == maxElements
83+
}
84+
F.timeoutTo(bufferUntilNumElements, interval, ref.get)
85+
}
86+
}
87+
88+
/**
89+
* Returns a new DataSource that will batch Fetch requests across executions within a given
90+
* interval.
91+
*
92+
* As an example, if we have a Fetch request A, and a fetch request B that are being executed
93+
* simultaneously without knowledge of the other within some milliseconds of the other, the
94+
* datasource will transparently batch the two requests in a single batch call execution.
95+
*
96+
* This is useful if you want to treat each fetch individually from the others, for example in an
97+
* HTTP server processing requests.
98+
*
99+
* The original DataSource limits will be respected
100+
*
101+
* @param dataSource
102+
* the original datasource to be wrapped
103+
* @param delayPerBatch
104+
* the interval for processing Fetch requests as a single Batch call
105+
* @return
106+
*/
107+
def batchAcrossFetches[F[_], I, A](
108+
dataSource: DataSource[F, I, A],
109+
delayPerBatch: FiniteDuration
110+
)(implicit
111+
F: Async[F]
112+
): Resource[F, DataSource[F, I, A]] = {
113+
type Callback = Either[Throwable, Option[A]] => Unit
114+
for {
115+
queue <- Resource.eval(Queue.unbounded[F, (I, Callback)])
116+
workerFiber = upToWithin(
117+
queue,
118+
dataSource.maxBatchSize.getOrElse(Int.MaxValue),
119+
delayPerBatch
120+
).flatMap {
121+
case Nil => F.start(F.unit)
122+
case x =>
123+
val asMap = x.groupBy(_._1).mapValues(callbacks => callbacks.map(_._2))
124+
val batchResults = dataSource.batch(NonEmptyList.fromListUnsafe(asMap.keys.toList))
125+
val resultsHaveBeenSent = batchResults.map { results =>
126+
asMap.foreach { case (identity, callbacks) =>
127+
callbacks.foreach(cb => cb(Right(results.get(identity))))
128+
}
129+
}
130+
val fiberWork = F.handleError(resultsHaveBeenSent) { ex =>
131+
asMap.foreach { case (_, callbacks) =>
132+
callbacks.foreach(cb => cb(Left(ex)))
133+
}
134+
}
135+
F.start(fiberWork)
136+
}.foreverM[Unit]
137+
_ <- F.background(workerFiber)
138+
} yield {
139+
new DataSource[F, I, A] {
140+
override def data: Data[I, A] = dataSource.data
141+
142+
override implicit def CF: Concurrent[F] = dataSource.CF
143+
144+
override def fetch(id: I): F[Option[A]] = {
145+
F.async { cb =>
146+
queue.offer((id, cb)) *> F.pure(None)
147+
}
148+
}
149+
}
150+
}
151+
}
152+
}
153+
69154
sealed trait BatchExecution extends Product with Serializable
70155
case object Sequentially extends BatchExecution
71156
case object InParallel extends BatchExecution

fetch/src/test/scala/FetchBatchingTests.scala

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@ import cats.data.NonEmptyList
2121
import cats.instances.list._
2222
import cats.syntax.all._
2323
import cats.effect._
24-
2524
import fetch._
2625

26+
import java.util.concurrent.atomic.AtomicInteger
27+
import scala.concurrent.duration.{DurationInt, FiniteDuration}
28+
2729
class FetchBatchingTests extends FetchSpec {
2830
import TestHelper._
2931

@@ -94,6 +96,51 @@ class FetchBatchingTests extends FetchSpec {
9496
}
9597
}
9698

99+
case class BatchAcrossFetchData(id: Int)
100+
101+
object BatchAcrossFetches extends Data[BatchAcrossFetchData, String] {
102+
def name = "Batch across Fetches"
103+
104+
private val batchesCounter = new AtomicInteger(0)
105+
private val fetchesCounter = new AtomicInteger(0)
106+
107+
def reset(): Unit = {
108+
batchesCounter.set(0)
109+
fetchesCounter.set(0)
110+
}
111+
112+
def counters: (Int, Int) =
113+
(fetchesCounter.get(), batchesCounter.get())
114+
115+
def unBatchedSource[F[_]: Concurrent]: DataSource[F, BatchAcrossFetchData, String] =
116+
new DataSource[F, BatchAcrossFetchData, String] {
117+
override def data = BatchAcrossFetches
118+
119+
override def CF = Concurrent[F]
120+
121+
override def fetch(request: BatchAcrossFetchData): F[Option[String]] = {
122+
fetchesCounter.incrementAndGet()
123+
CF.pure(Some(request.toString))
124+
}
125+
126+
override def batch(
127+
ids: NonEmptyList[BatchAcrossFetchData]
128+
): F[Map[BatchAcrossFetchData, String]] = {
129+
batchesCounter.incrementAndGet()
130+
CF.pure(
131+
ids.map(id => id -> id.toString).toList.toMap
132+
)
133+
}
134+
135+
override val batchExecution = InParallel
136+
}
137+
138+
def batchedSource[F[_]: Async](
139+
interval: FiniteDuration
140+
): Resource[F, DataSource[F, BatchAcrossFetchData, String]] =
141+
DataSource.batchAcrossFetches(unBatchedSource, interval)
142+
}
143+
97144
def fetchBatchedDataSeq[F[_]: Concurrent](id: Int): Fetch[F, Int] =
98145
Fetch(BatchedDataSeq(id), SeqBatch.source)
99146

@@ -207,4 +254,38 @@ class FetchBatchingTests extends FetchSpec {
207254
result shouldEqual ids.map(_.toString)
208255
}.unsafeToFuture()
209256
}
257+
258+
"Fetches produced across unrelated fetches to a DataSource that is NOT batched across fetch executions should NOT be bundled together" in {
259+
BatchAcrossFetches.reset()
260+
val dataSource = BatchAcrossFetches.unBatchedSource[IO]
261+
val id1 = BatchAcrossFetchData(1)
262+
val id2 = BatchAcrossFetchData(2)
263+
val execution1 = Fetch.run[IO](Fetch(id1, dataSource))
264+
val execution2 = Fetch.run[IO](Fetch(id2, dataSource))
265+
val singleExecution = (execution1, execution2).parMapN { (_, _) =>
266+
val (fetchRequests, batchRequests) = BatchAcrossFetches.counters
267+
fetchRequests shouldEqual 2
268+
batchRequests shouldEqual 0
269+
}
270+
singleExecution.unsafeToFuture()
271+
}
272+
273+
"Fetches produced across unrelated fetches to a DataSource that is batched across fetch executions should be bundled together" in {
274+
BatchAcrossFetches.reset()
275+
val dataSource = BatchAcrossFetches.batchedSource[IO](500.millis)
276+
val id1 = BatchAcrossFetchData(1)
277+
val id2 = BatchAcrossFetchData(2)
278+
dataSource
279+
.use { dataSource =>
280+
val execution1 = Fetch.run[IO](Fetch(id1, dataSource))
281+
val execution2 = Fetch.run[IO](Fetch(id2, dataSource))
282+
val singleExecution = (execution1, execution2).parMapN { (_, _) =>
283+
val (fetchRequests, batchRequests) = BatchAcrossFetches.counters
284+
fetchRequests shouldEqual 0
285+
batchRequests shouldEqual 1
286+
}
287+
singleExecution
288+
}
289+
.unsafeToFuture()
290+
}
210291
}

0 commit comments

Comments
 (0)