Skip to content

Commit 2be839e

Browse files
committed
Adding optional arg to BF.INSERT to allow users to check if their bloom filter can reach the desired size
Signed-off-by: zackcam <[email protected]>
1 parent 591ab10 commit 2be839e

File tree

3 files changed

+84
-0
lines changed

3 files changed

+84
-0
lines changed

src/bloom/command_handler.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,7 @@ pub fn bloom_filter_insert(ctx: &Context, input_args: &[ValkeyString]) -> Valkey
462462
true => (None, true),
463463
false => (Some(configs::FIXED_SEED), false),
464464
};
465+
let mut wanted_capacity = -1;
465466
let mut nocreate = false;
466467
let mut items_provided = false;
467468
while idx < argc {
@@ -553,6 +554,21 @@ pub fn bloom_filter_insert(ctx: &Context, input_args: &[ValkeyString]) -> Valkey
553554
}
554555
};
555556
}
557+
"ATLEASTCAPACITY" => {
558+
if idx >= (argc - 1) {
559+
return Err(ValkeyError::WrongArity);
560+
}
561+
idx += 1;
562+
wanted_capacity = match input_args[idx].to_string_lossy().parse::<i64>() {
563+
Ok(num) if (BLOOM_CAPACITY_MIN..=BLOOM_CAPACITY_MAX).contains(&num) => num,
564+
Ok(0) => {
565+
return Err(ValkeyError::Str(utils::CAPACITY_LARGER_THAN_0));
566+
}
567+
_ => {
568+
return Err(ValkeyError::Str(utils::BAD_CAPACITY));
569+
}
570+
};
571+
}
556572
"ITEMS" => {
557573
idx += 1;
558574
items_provided = true;
@@ -568,6 +584,26 @@ pub fn bloom_filter_insert(ctx: &Context, input_args: &[ValkeyString]) -> Valkey
568584
// When the `ITEMS` argument is provided, we expect additional item arg/s to be provided.
569585
return Err(ValkeyError::WrongArity);
570586
}
587+
// Check if we have a wanted capacity and calculate if we can reach that capacity
588+
if wanted_capacity > 0 {
589+
if expansion == 0 {
590+
return Err(ValkeyError::Str(
591+
utils::NON_SCALING_AND_WANTED_CAPACITY_IS_INVALID,
592+
));
593+
}
594+
match utils::BloomObject::calculate_if_wanted_capacity_is_valid(
595+
capacity,
596+
fp_rate,
597+
wanted_capacity,
598+
tightening_ratio,
599+
expansion,
600+
) {
601+
Ok(result) => result,
602+
Err(e) => {
603+
return Err(e);
604+
}
605+
}
606+
}
571607
// If the filter does not exist, create one
572608
let filter_key = ctx.open_key_writable(filter_name);
573609
let value = match filter_key.get_value::<BloomObject>(&BLOOM_TYPE) {

src/bloom/utils.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use bloomfilter::Bloom;
1010
use bloomfilter::{deserialize, serialize};
1111
use serde::{Deserialize, Deserializer, Serialize};
1212
use std::sync::atomic::Ordering;
13+
use valkey_module::ValkeyError;
1314

1415
/// KeySpace Notification Events
1516
pub const ADD_EVENT: &str = "bloom.add";
@@ -32,10 +33,16 @@ pub const CAPACITY_LARGER_THAN_0: &str = "ERR (capacity should be larger than 0)
3233
pub const MAX_NUM_SCALING_FILTERS: &str = "ERR bloom object reached max number of filters";
3334
pub const UNKNOWN_ARGUMENT: &str = "ERR unknown argument received";
3435
pub const EXCEEDS_MAX_BLOOM_SIZE: &str = "ERR operation exceeds bloom object memory limit";
36+
pub const WANTED_CAPACITY_EXCEEDS_MAX_SIZE: &str =
37+
"ERR Wanted capacity would go beyond bloom object memory limit";
38+
pub const WANTED_CAPACITY_FALSE_POSITIVE_INVALID: &str =
39+
"ERR False positive degrades too much to reach wanted capacity";
3540
pub const KEY_EXISTS: &str = "BUSYKEY Target key name already exists.";
3641
pub const DECODE_BLOOM_OBJECT_FAILED: &str = "ERR bloom object decoding failed";
3742
pub const DECODE_UNSUPPORTED_VERSION: &str =
3843
"ERR bloom object decoding failed. Unsupported version";
44+
pub const NON_SCALING_AND_WANTED_CAPACITY_IS_INVALID: &str =
45+
"ERR Specifying NONSCALING and ATLEASTCAPCITY is not allowed";
3946
/// Logging Error messages
4047
pub const ENCODE_BLOOM_OBJECT_FAILED: &str = "Failed to encode bloom object.";
4148

@@ -455,6 +462,44 @@ impl BloomObject {
455462
_ => Err(BloomError::DecodeUnsupportedVersion),
456463
}
457464
}
465+
466+
pub fn calculate_if_wanted_capacity_is_valid(
467+
capacity: i64,
468+
fp_rate: f64,
469+
wanted_capacity: i64,
470+
tightening_ratio: f64,
471+
expansion: u32,
472+
) -> Result<(), ValkeyError> {
473+
let mut curr_capacity = capacity;
474+
let mut curr_num_filters: u64 = 1;
475+
let mut curr_fp_rate = fp_rate;
476+
let mut filters_memory_usage = 0;
477+
while curr_capacity < wanted_capacity {
478+
curr_fp_rate = match BloomObject::calculate_fp_rate(
479+
curr_fp_rate,
480+
curr_num_filters as i32,
481+
tightening_ratio,
482+
) {
483+
Ok(rate) => rate,
484+
Err(_) => {
485+
return Err(ValkeyError::Str(WANTED_CAPACITY_FALSE_POSITIVE_INVALID));
486+
}
487+
};
488+
let curr_filter_size = BloomFilter::compute_size(curr_capacity, curr_fp_rate);
489+
// For vectors of size < 4 the capacity of the vector is 4. However after that the capacity is always a power of two above or equal to the size
490+
let curr_object_size = BloomObject::compute_size(
491+
std::cmp::max(4, curr_num_filters).next_power_of_two() as usize,
492+
) + filters_memory_usage
493+
+ curr_filter_size;
494+
if !BloomObject::validate_size(curr_object_size) {
495+
return Err(ValkeyError::Str(WANTED_CAPACITY_EXCEEDS_MAX_SIZE));
496+
}
497+
filters_memory_usage += curr_filter_size;
498+
curr_capacity *= expansion as i64;
499+
curr_num_filters += 1;
500+
}
501+
Ok(())
502+
}
458503
}
459504

460505
/// Structure representing a single bloom filter. 200 Bytes.

tests/test_bloom_command.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ def test_bloom_command_error(self):
4444
('BF.INSERT TEST_LIMIT EXPANSION 4294967299 ITEMS EXPAN', 'bad expansion'),
4545
('BF.INSERT TEST_NOCREATE NOCREATE ITEMS A B', 'not found'),
4646
('BF.INSERT KEY HELLO', 'unknown argument received'),
47+
('BF.INSERT KEY CAPACITY 1 ERROR 0.0000000001 ATLEASTCAPACITY 10000000 EXPANSION 1', 'False positive degrades too much to reach wanted capacity'),
48+
('BF.INSERT KEY ATLEASTCAPACITY 1000000000000', 'Wanted capacity would go beyond bloom object memory limit'),
49+
('BF.INSERT KEY ATLEASTCAPACITY 1000000000000 NONSCALING', 'Specifying NONSCALING and ATLEASTCAPCITY is not allowed'),
4750
('BF.RESERVE KEY String 100', 'bad error rate'),
4851
('BF.RESERVE KEY 0.99999999999999999 3000', '(0 < error rate range < 1)'),
4952
('BF.RESERVE KEY 2 100', '(0 < error rate range < 1)'),

0 commit comments

Comments
 (0)