Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: added size based retention policy #2098

Merged
merged 8 commits into from
Oct 10, 2023
47 changes: 47 additions & 0 deletions tests/waku_archive/test_retention_policy.nim
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import
../../../waku/waku_archive/driver/sqlite_driver,
../../../waku/waku_archive/retention_policy,
../../../waku/waku_archive/retention_policy/retention_policy_capacity,
../../../waku/waku_archive/retention_policy/retention_policy_size,
../testlib/common,
../testlib/wakucore

Expand Down Expand Up @@ -53,6 +54,51 @@ suite "Waku Archive - Retention policy":

## Cleanup
(waitFor driver.close()).expect("driver to close")

test "size retention policy - windowed message deletion":
## Given
let
# in megabytes
sizeLimit:float = 0.05
excess = 123

let driver = newTestArchiveDriver()

let retentionPolicy: RetentionPolicy = SizeRetentionPolicy.init(size=sizeLimit)

## When

var putFutures = newSeq[Future[ArchiveDriverResult[void]]]()

for i in 1..excess:
let msg = fakeWakuMessage(payload= @[byte i], contentTopic=DefaultContentTopic, ts=Timestamp(i))
putFutures.add(driver.put(DefaultPubsubTopic, msg, computeDigest(msg), msg.timestamp))

# waitFor is used to synchronously wait for the futures to complete.
discard waitFor allFinished(putFutures)

## Then
# calculate the current database size
var pageSize = (waitFor driver.getPagesSize()).tryGet()
var pageCount = (waitFor driver.getPagesCount()).tryGet()
var sizeDB = float(pageCount * pageSize) / (1024.0 * 1024.0)
# execute policy if the current db size oveflows
if sizeDB >= sizeLimit:
require (waitFor retentionPolicy.execute(driver)).isOk()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is great indeed! However, would it be possible to enforce a require instead?

Suggested change
if sizeDB >= sizeLimit:
require (waitFor retentionPolicy.execute(driver)).isOk()
require sizeDB >= sizeLimit
require (waitFor retentionPolicy.execute(driver)).isOk()


# update the current db size
pageSize = (waitFor driver.getPagesSize()).tryGet()
pageCount = (waitFor driver.getPagesCount()).tryGet()
sizeDB = float(pageCount * pageSize) / (1024.0 * 1024.0)

check:
# size of the database is used to check if the storage limit has been preserved
# check the current database size with the limitSize provided by the user
# it should be lower
sizeDB <= sizeLimit

## Cleanup
(waitFor driver.close()).expect("driver to close")

test "store capacity should be limited":
## Given
Expand Down Expand Up @@ -90,3 +136,4 @@ suite "Waku Archive - Retention policy":

## Cleanup
(waitFor driver.close()).expect("driver to close")

3 changes: 2 additions & 1 deletion waku/common/databases/db_sqlite.nim
Original file line number Diff line number Diff line change
Expand Up @@ -484,4 +484,5 @@ proc performSqliteVacuum*(db: SqliteDatabase): DatabaseResult[void] =
if resVacuum.isErr():
return err("failed to execute vacuum: " & resVacuum.error)

debug "finished sqlite database vacuuming"
debug "finished sqlite database vacuuming"
ok()
10 changes: 10 additions & 0 deletions waku/waku_archive/driver.nim
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ method getMessages*(driver: ArchiveDriver,
method getMessagesCount*(driver: ArchiveDriver):
Future[ArchiveDriverResult[int64]] {.base, async.} = discard

method getPagesCount*(driver: ArchiveDriver):
Future[ArchiveDriverResult[int64]] {.base, async.} = discard

method getPagesSize*(driver: ArchiveDriver):
Future[ArchiveDriverResult[int64]] {.base, async.} = discard

method performVacuum*(driver: ArchiveDriver):
Future[ArchiveDriverResult[void]] {.base, async.} = discard

method getOldestMessageTimestamp*(driver: ArchiveDriver):
Future[ArchiveDriverResult[Timestamp]] {.base, async.} = discard

Expand All @@ -61,3 +70,4 @@ method deleteOldestMessagesNotWithinLimit*(driver: ArchiveDriver,

method close*(driver: ArchiveDriver):
Future[ArchiveDriverResult[void]] {.base, async.} = discard

1 change: 1 addition & 0 deletions waku/waku_archive/driver/builder.nim
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,4 @@ proc new*(T: type ArchiveDriver,
debug "setting up in-memory waku archive driver"
let driver = QueueDriver.new() # Defaults to a capacity of 25.000 messages
return ok(driver)

14 changes: 13 additions & 1 deletion waku/waku_archive/driver/queue_driver/queue_driver.nim
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,18 @@ method getMessagesCount*(driver: QueueDriver):
Future[ArchiveDriverResult[int64]] {.async} =
return ok(int64(driver.len()))

method getPagesCount*(driver: QueueDriver):
Future[ArchiveDriverResult[int64]] {.async} =
return ok(int64(driver.len()))

method getPagesSize*(driver: QueueDriver):
Future[ArchiveDriverResult[int64]] {.async} =
return ok(int64(driver.len()))

method performsVacuum*(driver: QueueDriver):
Future[ArchiveDriverResult[void]] {.async.} =
return err("interface method not implemented")

method getOldestMessageTimestamp*(driver: QueueDriver):
Future[ArchiveDriverResult[Timestamp]] {.async.} =
return driver.first().map(proc(msg: IndexedWakuMessage): Timestamp = msg.index.receiverTime)
Expand All @@ -302,4 +314,4 @@ method deleteOldestMessagesNotWithinLimit*(driver: QueueDriver,

method close*(driver: QueueDriver):
Future[ArchiveDriverResult[void]] {.async.} =
return ok()
return ok()
13 changes: 13 additions & 0 deletions waku/waku_archive/driver/sqlite_driver/sqlite_driver.nim
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,18 @@ method getMessagesCount*(s: SqliteDriver):
Future[ArchiveDriverResult[int64]] {.async.} =
return s.db.getMessageCount()

method getPagesCount*(s: SqliteDriver):
Future[ArchiveDriverResult[int64]] {.async.} =
return s.db.getPageCount()

method getPagesSize*(s: SqliteDriver):
Future[ArchiveDriverResult[int64]] {.async.} =
return s.db.getPageSize()

method performVacuum*(s: SqliteDriver):
Future[ArchiveDriverResult[void]] {.async.} =
return s.db.performSqliteVacuum()

method getOldestMessageTimestamp*(s: SqliteDriver):
Future[ArchiveDriverResult[Timestamp]] {.async.} =
return s.db.selectOldestReceiverTimestamp()
Expand All @@ -135,3 +147,4 @@ method close*(s: SqliteDriver):
# Close connection
s.db.close()
return ok()

37 changes: 36 additions & 1 deletion waku/waku_archive/retention_policy/builder.nim
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ import
import
../retention_policy,
./retention_policy_time,
./retention_policy_capacity
./retention_policy_capacity,
./retention_policy_size

proc new*(T: type RetentionPolicy,
retPolicy: string):
Expand Down Expand Up @@ -51,5 +52,39 @@ proc new*(T: type RetentionPolicy,
let retPolicy: RetentionPolicy = CapacityRetentionPolicy.init(retentionCapacity)
return ok(some(retPolicy))

elif policy == "size":
var retentionSize: string
retentionSize = policyArgs

# captures the size unit such as Gb or Mb
let sizeUnit = retentionSize.substr(retentionSize.len-2)
# captures the string type number data of the size provided
let sizeQuantityStr = retentionSize.substr(0,retentionSize.len-3)
# to hold the numeric value data of size
var sizeQuantity: float

if sizeUnit in ["gb", "Gb", "GB", "gB"]:
# parse the actual value into integer type var
try:
sizeQuantity = parseFloat(sizeQuantityStr)
except ValueError:
return err("invalid size retention policy argument: " & getCurrentExceptionMsg())
# Gb data is converted into Mb for uniform processing
sizeQuantity = sizeQuantity * 1024
elif sizeUnit in ["mb", "Mb", "MB", "mB"]:
try:
sizeQuantity = parseFloat(sizeQuantityStr)
except ValueError:
return err("invalid size retention policy argument")
else:
return err ("""invalid size retention value unit: expected "Mb" or "Gb" but got """ & sizeUnit )

if sizeQuantity <= 0:
return err("invalid size retention policy argument: a non-zero value is required")

let retPolicy: RetentionPolicy = SizeRetentionPolicy.init(sizeQuantity)
return ok(some(retPolicy))

else:
return err("unknown retention policy")

87 changes: 87 additions & 0 deletions waku/waku_archive/retention_policy/retention_policy_size.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
when (NimMajor, NimMinor) < (1, 4):
{.push raises: [Defect].}
else:
{.push raises: [].}

import
std/times,
stew/results,
chronicles,
chronos
import
../driver,
../retention_policy

logScope:
topics = "waku archive retention_policy"

# default size is 30 Gb
const DefaultRetentionSize*: float = 30_720

# to remove 20% of the outdated data from database
const DeleteLimit = 0.80

type
# SizeRetentionPolicy implements auto delete as follows:
# - sizeLimit is the size in megabytes (Mbs) the database can grow upto
# to reduce the size of the databases, remove the rows/number-of-messages
# DeleteLimit is the total number of messages to delete beyond this limit
# when the database size crosses the sizeLimit, then only a fraction of messages are kept,
# rest of the outdated message are deleted using deleteOldestMessagesNotWithinLimit(),
# upon deletion process the fragmented space is retrieve back using Vacuum process.
SizeRetentionPolicy* = ref object of RetentionPolicy
sizeLimit: float

proc init*(T: type SizeRetentionPolicy, size=DefaultRetentionSize): T =
SizeRetentionPolicy(
sizeLimit: size
)

method execute*(p: SizeRetentionPolicy,
driver: ArchiveDriver):
Future[RetentionPolicyResult[void]] {.async.} =
## when db size overshoots the database limit, shread 20% of outdated messages

# to get the size of the database, pageCount and PageSize is required
# get page count in "messages" database
let pageCountRes = await driver.getPagesCount()
if pageCountRes.isErr():
return err("failed to get Pages count: " & pageCountRes.error)

let pageCount: int64 = pageCountRes.value

# get page size of database
let pageSizeRes = await driver.getPagesSize()
let pageSize: int64 = int64(pageSizeRes.valueOr(0) div 1024)

if pageSize == 0:
return err("failed to get Page size: " & pageSizeRes.error)

# database size in megabytes (Mb)
let totalSizeOfDB: float = float(pageSize * pageCount)/1024.0

# check if current databse size crosses the db size limit
if totalSizeOfDB < p.sizeLimit:
return ok()

# to shread/delete messsges, get the total row/message count
let numMessagesRes = await driver.getMessagesCount()
if numMessagesRes.isErr():
return err("failed to get messages count: " & numMessagesRes.error)
let numMessages = numMessagesRes.value

# 80% of the total messages are to be kept, delete others
let pageDeleteWindow = int(float(numMessages) * DeleteLimit)

let res = await driver.deleteOldestMessagesNotWithinLimit(limit=pageDeleteWindow)
if res.isErr():
return err("deleting oldest messages failed: " & res.error)

# vacuum to get the deleted pages defragments to save storage space
# this will resize the database size
let resVaccum = await driver.performVacuum()
if resVaccum.isErr():
return err("vacuumming failed: " & resVaccum.error)

return ok()