Skip to content

Commit 2b79825

Browse files
committed
Fixing ATLEASTCAPACITY calculation as well as adding MAXCAPACITY functionality for info
Signed-off-by: zackcam <[email protected]>
1 parent 2be839e commit 2b79825

File tree

5 files changed

+152
-41
lines changed

5 files changed

+152
-41
lines changed

src/bloom/command_handler.rs

+48-8
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ pub fn bloom_filter_insert(ctx: &Context, input_args: &[ValkeyString]) -> Valkey
462462
true => (None, true),
463463
false => (Some(configs::FIXED_SEED), false),
464464
};
465-
let mut wanted_capacity = -1;
465+
let mut can_scale_to = -1;
466466
let mut nocreate = false;
467467
let mut items_provided = false;
468468
while idx < argc {
@@ -554,12 +554,12 @@ pub fn bloom_filter_insert(ctx: &Context, input_args: &[ValkeyString]) -> Valkey
554554
}
555555
};
556556
}
557-
"ATLEASTCAPACITY" => {
557+
"VALIDATESCALETO" => {
558558
if idx >= (argc - 1) {
559559
return Err(ValkeyError::WrongArity);
560560
}
561561
idx += 1;
562-
wanted_capacity = match input_args[idx].to_string_lossy().parse::<i64>() {
562+
can_scale_to = match input_args[idx].to_string_lossy().parse::<i64>() {
563563
Ok(num) if (BLOOM_CAPACITY_MIN..=BLOOM_CAPACITY_MAX).contains(&num) => num,
564564
Ok(0) => {
565565
return Err(ValkeyError::Str(utils::CAPACITY_LARGER_THAN_0));
@@ -585,24 +585,24 @@ pub fn bloom_filter_insert(ctx: &Context, input_args: &[ValkeyString]) -> Valkey
585585
return Err(ValkeyError::WrongArity);
586586
}
587587
// Check if we have a wanted capacity and calculate if we can reach that capacity
588-
if wanted_capacity > 0 {
588+
if can_scale_to > 0 {
589589
if expansion == 0 {
590590
return Err(ValkeyError::Str(
591-
utils::NON_SCALING_AND_WANTED_CAPACITY_IS_INVALID,
591+
utils::NON_SCALING_AND_VALIDATE_SCALE_TO_IS_INVALID,
592592
));
593593
}
594-
match utils::BloomObject::calculate_if_wanted_capacity_is_valid(
594+
match utils::BloomObject::calculate_if_can_scale_to_is_valid(
595595
capacity,
596596
fp_rate,
597-
wanted_capacity,
597+
can_scale_to,
598598
tightening_ratio,
599599
expansion,
600600
) {
601601
Ok(result) => result,
602602
Err(e) => {
603603
return Err(e);
604604
}
605-
}
605+
};
606606
}
607607
// If the filter does not exist, create one
608608
let filter_key = ctx.open_key_writable(filter_name);
@@ -714,12 +714,31 @@ pub fn bloom_filter_info(ctx: &Context, input_args: &[ValkeyString]) -> ValkeyRe
714714
"SIZE" => Ok(ValkeyValue::Integer(val.memory_usage() as i64)),
715715
"FILTERS" => Ok(ValkeyValue::Integer(val.num_filters() as i64)),
716716
"ITEMS" => Ok(ValkeyValue::Integer(val.cardinality())),
717+
"ERROR" => Ok(ValkeyValue::Float(val.fp_rate())),
717718
"EXPANSION" => {
718719
if val.expansion() == 0 {
719720
return Ok(ValkeyValue::Null);
720721
}
721722
Ok(ValkeyValue::Integer(val.expansion() as i64))
722723
}
724+
"MAXSCALEDCAPACITY" if val.expansion() > 0 => {
725+
let max_capacity = match utils::BloomObject::calculate_if_can_scale_to_is_valid(
726+
val.filters()
727+
.first()
728+
.expect("Filter will be populated")
729+
.capacity(),
730+
val.fp_rate(),
731+
-1,
732+
val.tightening_ratio(),
733+
val.expansion(),
734+
) {
735+
Ok(result) => result,
736+
Err(e) => {
737+
return Err(e);
738+
}
739+
};
740+
Ok(ValkeyValue::Integer(max_capacity))
741+
}
723742
_ => Err(ValkeyError::Str(utils::INVALID_INFO_VALUE)),
724743
}
725744
}
@@ -733,13 +752,34 @@ pub fn bloom_filter_info(ctx: &Context, input_args: &[ValkeyString]) -> ValkeyRe
733752
ValkeyValue::Integer(val.num_filters() as i64),
734753
ValkeyValue::SimpleStringStatic("Number of items inserted"),
735754
ValkeyValue::Integer(val.cardinality()),
755+
ValkeyValue::SimpleStringStatic("Error rate"),
756+
ValkeyValue::Float(val.fp_rate()),
736757
ValkeyValue::SimpleStringStatic("Expansion rate"),
737758
];
738759
if val.expansion() == 0 {
739760
result.push(ValkeyValue::Null);
740761
} else {
741762
result.push(ValkeyValue::Integer(val.expansion() as i64));
742763
}
764+
if val.expansion() != 0 {
765+
let max_capacity = match utils::BloomObject::calculate_if_can_scale_to_is_valid(
766+
val.filters()
767+
.first()
768+
.expect("Filter will be populated")
769+
.capacity(),
770+
val.fp_rate(),
771+
-1,
772+
val.tightening_ratio(),
773+
val.expansion(),
774+
) {
775+
Ok(result) => result,
776+
Err(e) => {
777+
return Err(e);
778+
}
779+
};
780+
result.push(ValkeyValue::SimpleStringStatic("Max scaled capacity"));
781+
result.push(ValkeyValue::Integer(max_capacity));
782+
}
743783
Ok(ValkeyValue::Array(result))
744784
}
745785
_ => Err(ValkeyError::Str(utils::NOT_FOUND)),

src/bloom/utils.rs

+69-24
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,20 @@ pub const ERROR_RATE_RANGE: &str = "ERR (0 < error rate range < 1)";
3030
pub const BAD_TIGHTENING_RATIO: &str = "ERR bad tightening ratio";
3131
pub const TIGHTENING_RATIO_RANGE: &str = "ERR (0 < tightening ratio range < 1)";
3232
pub const CAPACITY_LARGER_THAN_0: &str = "ERR (capacity should be larger than 0)";
33-
pub const MAX_NUM_SCALING_FILTERS: &str = "ERR bloom object reached max number of filters";
33+
pub const FALSE_POSITIVE_DEGRADES_TO_O: &str = "ERR false positive degrades to 0 on scale out";
3434
pub const UNKNOWN_ARGUMENT: &str = "ERR unknown argument received";
3535
pub const EXCEEDS_MAX_BLOOM_SIZE: &str = "ERR operation exceeds bloom object memory limit";
36-
pub const WANTED_CAPACITY_EXCEEDS_MAX_SIZE: &str =
37-
"ERR Wanted capacity would go beyond bloom object memory limit";
38-
pub const WANTED_CAPACITY_FALSE_POSITIVE_INVALID: &str =
39-
"ERR False positive degrades too much to reach wanted capacity";
36+
pub const VALIDATE_SCALE_TO_EXCEEDS_MAX_SIZE: &str =
37+
"ERR provided VALIDATESCALETO causes bloom object to exceed memory limit";
38+
pub const MAX_NUM_SCALING_FILTERS: &str = "ERR bloom object reached max number of filters";
39+
pub const VALIDATE_SCALE_TO_FALSE_POSITIVE_INVALID: &str =
40+
"ERR provided VALIDATESCALETO causes false positive to degrades to 0";
4041
pub const KEY_EXISTS: &str = "BUSYKEY Target key name already exists.";
4142
pub const DECODE_BLOOM_OBJECT_FAILED: &str = "ERR bloom object decoding failed";
4243
pub const DECODE_UNSUPPORTED_VERSION: &str =
4344
"ERR bloom object decoding failed. Unsupported version";
44-
pub const NON_SCALING_AND_WANTED_CAPACITY_IS_INVALID: &str =
45-
"ERR Specifying NONSCALING and ATLEASTCAPCITY is not allowed";
45+
pub const NON_SCALING_AND_VALIDATE_SCALE_TO_IS_INVALID: &str =
46+
"ERR cannot use NONSCALING and VALIDATESCALETO options together";
4647
/// Logging Error messages
4748
pub const ENCODE_BLOOM_OBJECT_FAILED: &str = "Failed to encode bloom object.";
4849

@@ -56,6 +57,8 @@ pub enum BloomError {
5657
DecodeUnsupportedVersion,
5758
ErrorRateRange,
5859
BadExpansion,
60+
FalsePositiveReachesZero,
61+
BadCapacity,
5962
}
6063

6164
impl BloomError {
@@ -69,6 +72,8 @@ impl BloomError {
6972
BloomError::DecodeUnsupportedVersion => DECODE_UNSUPPORTED_VERSION,
7073
BloomError::ErrorRateRange => ERROR_RATE_RANGE,
7174
BloomError::BadExpansion => BAD_EXPANSION,
75+
BloomError::FalsePositiveReachesZero => FALSE_POSITIVE_DEGRADES_TO_O,
76+
BloomError::BadCapacity => BAD_CAPACITY,
7277
}
7378
}
7479
}
@@ -319,7 +324,7 @@ impl BloomObject {
319324
Some(new_capacity) => new_capacity,
320325
None => {
321326
// u32:max cannot be reached with 64MB memory usage limit per filter even with a high fp rate (e.g. 0.9).
322-
return Err(BloomError::MaxNumScalingFilters);
327+
return Err(BloomError::BadCapacity);
323328
}
324329
};
325330
// Reject the request, if the operation will result in creation of a filter of size greater than what is allowed.
@@ -373,7 +378,7 @@ impl BloomObject {
373378
) -> Result<f64, BloomError> {
374379
match fp_rate * tightening_ratio.powi(num_filters) {
375380
x if x > f64::MIN_POSITIVE => Ok(x),
376-
_ => Err(BloomError::MaxNumScalingFilters),
381+
_ => Err(BloomError::FalsePositiveReachesZero),
377382
}
378383
}
379384

@@ -463,42 +468,78 @@ impl BloomObject {
463468
}
464469
}
465470

466-
pub fn calculate_if_wanted_capacity_is_valid(
471+
/// This method is called from two different bloom commands BF.INFO and BF.INSERT. The functionality varies slightly on which command it
472+
/// is called from. When called from BF.INFO this method is used to find the maximum possible size that the bloom object could scale to
473+
/// without throwing an error. When called from BF.INSERT this method is used to determine if it is possible to reach the capacity that
474+
/// has been provided.
475+
///
476+
/// # Arguments
477+
///
478+
/// * `capacity` - The size of the initial filter in the bloom object.
479+
/// * `fp_rate` - the false positive rate for the bloom object
480+
/// * `can_scale_to` - the capcity we check to see if it can scale to. If this method is called from BF.INFO this is set as -1 as we
481+
/// want to check the maximum size we could scale up till
482+
/// * `tightening_ratio` - The tightening ratio of the object
483+
/// * `expansion` - The expanison rate of the object
484+
///
485+
/// # Returns
486+
/// * i64 - The maximum capacity that can be reached if called from BF.INFO. If called from BF.INSERT the size it reached after going past max capacity
487+
/// * ValkeyError - Can return two different errors:
488+
/// VALIDATE_SCALE_TO_EXCEEDS_MAX_SIZE: When scaling to the wanted capacity would go over the bloom object memory limit
489+
/// VALIDATE_SCALE_TO_FALSE_POSITIVE_INVALID: When scaling to the wanted capcity would cause the false positive rate to reach 0
490+
pub fn calculate_if_can_scale_to_is_valid(
467491
capacity: i64,
468492
fp_rate: f64,
469-
wanted_capacity: i64,
493+
can_scale_to: i64,
470494
tightening_ratio: f64,
471495
expansion: u32,
472-
) -> Result<(), ValkeyError> {
473-
let mut curr_capacity = capacity;
474-
let mut curr_num_filters: u64 = 1;
475-
let mut curr_fp_rate = fp_rate;
496+
) -> Result<i64, ValkeyError> {
497+
let mut curr_filter_capacity = capacity;
498+
let mut curr_total_capacity = capacity;
499+
let mut curr_num_filters: u64 = 0;
476500
let mut filters_memory_usage = 0;
477-
while curr_capacity < wanted_capacity {
478-
curr_fp_rate = match BloomObject::calculate_fp_rate(
479-
curr_fp_rate,
501+
while curr_total_capacity < can_scale_to || can_scale_to == -1 {
502+
curr_filter_capacity = match curr_filter_capacity.checked_mul(expansion.into()) {
503+
Some(new_capacity) => new_capacity,
504+
None => {
505+
// u32:max cannot be reached with 64MB memory usage limit per filter even with a high fp rate (e.g. 0.9).
506+
return Err(ValkeyError::Str(BAD_CAPACITY));
507+
}
508+
};
509+
curr_total_capacity += curr_filter_capacity;
510+
curr_num_filters += 1;
511+
512+
// Check to see if scaling to the next filter will cause a degradation in FP to 0
513+
let curr_fp_rate = match BloomObject::calculate_fp_rate(
514+
fp_rate,
480515
curr_num_filters as i32,
481516
tightening_ratio,
482517
) {
483518
Ok(rate) => rate,
484519
Err(_) => {
485-
return Err(ValkeyError::Str(WANTED_CAPACITY_FALSE_POSITIVE_INVALID));
520+
if can_scale_to == -1 {
521+
return Ok(curr_total_capacity - curr_filter_capacity);
522+
}
523+
return Err(ValkeyError::Str(VALIDATE_SCALE_TO_FALSE_POSITIVE_INVALID));
486524
}
487525
};
488-
let curr_filter_size = BloomFilter::compute_size(curr_capacity, curr_fp_rate);
526+
// Check that if it scales to this number of filters that the object won't exceed the memory limit
527+
let curr_filter_size = BloomFilter::compute_size(curr_filter_capacity, curr_fp_rate);
489528
// For vectors of size < 4 the capacity of the vector is 4. However after that the capacity is always a power of two above or equal to the size
490529
let curr_object_size = BloomObject::compute_size(
491530
std::cmp::max(4, curr_num_filters).next_power_of_two() as usize,
492531
) + filters_memory_usage
493532
+ curr_filter_size;
494533
if !BloomObject::validate_size(curr_object_size) {
495-
return Err(ValkeyError::Str(WANTED_CAPACITY_EXCEEDS_MAX_SIZE));
534+
if can_scale_to == -1 {
535+
return Ok(curr_total_capacity - curr_filter_capacity);
536+
}
537+
return Err(ValkeyError::Str(VALIDATE_SCALE_TO_EXCEEDS_MAX_SIZE));
496538
}
539+
// Update overall memory usage
497540
filters_memory_usage += curr_filter_size;
498-
curr_capacity *= expansion as i64;
499-
curr_num_filters += 1;
500541
}
501-
Ok(())
542+
Ok(curr_total_capacity / expansion as i64)
502543
}
503544
}
504545

@@ -1006,6 +1047,10 @@ mod tests {
10061047
let test_bloom_filter2 = BloomFilter::with_random_seed(0.5_f64, 1000_i64);
10071048
let test_seed2 = test_bloom_filter2.seed();
10081049
assert_ne!(test_seed2, configs::FIXED_SEED);
1050+
// Check that the random seed changes for each BloomFilter
1051+
let test_bloom_filter3 = BloomFilter::with_random_seed(0.5_f64, 1000_i64);
1052+
let test_seed3 = test_bloom_filter3.seed();
1053+
assert_ne!(test_seed2, test_seed3);
10091054
}
10101055

10111056
#[test]

tests/test_bloom_command.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ def test_bloom_command_error(self):
4444
('BF.INSERT TEST_LIMIT EXPANSION 4294967299 ITEMS EXPAN', 'bad expansion'),
4545
('BF.INSERT TEST_NOCREATE NOCREATE ITEMS A B', 'not found'),
4646
('BF.INSERT KEY HELLO', 'unknown argument received'),
47-
('BF.INSERT KEY CAPACITY 1 ERROR 0.0000000001 ATLEASTCAPACITY 10000000 EXPANSION 1', 'False positive degrades too much to reach wanted capacity'),
48-
('BF.INSERT KEY ATLEASTCAPACITY 1000000000000', 'Wanted capacity would go beyond bloom object memory limit'),
49-
('BF.INSERT KEY ATLEASTCAPACITY 1000000000000 NONSCALING', 'Specifying NONSCALING and ATLEASTCAPCITY is not allowed'),
47+
('BF.INSERT KEY CAPACITY 1 ERROR 0.0000000001 VALIDATESCALETO 10000000 EXPANSION 1', 'provided VALIDATESCALETO causes false positive to degrades to 0'),
48+
('BF.INSERT KEY VALIDATESCALETO 1000000000000', 'provided VALIDATESCALETO causes bloom object to exceed memory limit'),
49+
('BF.INSERT KEY VALIDATESCALETO 1000000000000 NONSCALING', 'cannot use NONSCALING and VALIDATESCALETO options together'),
5050
('BF.RESERVE KEY String 100', 'bad error rate'),
5151
('BF.RESERVE KEY 0.99999999999999999 3000', '(0 < error rate range < 1)'),
5252
('BF.RESERVE KEY 2 100', '(0 < error rate range < 1)'),
@@ -120,6 +120,7 @@ def test_bloom_command_behavior(self):
120120
('bf.info TEST expansion', 2),
121121
('BF.INFO TEST_EXPANSION EXPANSION', 9),
122122
('BF.INFO TEST_CAPACITY CAPACITY', 2000),
123+
('BF.INFO TEST MAXSCALEDCAPACITY', 26214300),
123124
('BF.CARD key', 3),
124125
('BF.CARD hello', 5),
125126
('BF.CARD TEST', 5),

tests/test_bloom_correctness.py

+31
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def test_non_scaling_filter(self):
2525
assert info_dict[b'Number of filters'] == 1
2626
assert info_dict[b'Size'] > 0
2727
assert info_dict[b'Expansion rate'] == None
28+
assert "Maximum Capacity" not in info_dict
2829
# Use a margin on the expected_fp_rate when asserting for correctness.
2930
fp_margin = 0.002
3031
# Validate that item "add" operations on bloom filters are ensuring correctness.
@@ -74,6 +75,7 @@ def test_scaling_filter(self):
7475
assert info_dict[b'Number of filters'] == 1
7576
assert info_dict[b'Size'] > 0
7677
assert info_dict[b'Expansion rate'] == expansion
78+
assert info_dict[b'Max scaled capacity'] == 20470000
7779

7880
# Scale out by adding items.
7981
total_error_count = 0
@@ -92,6 +94,7 @@ def test_scaling_filter(self):
9294
assert info_dict[b'Number of filters'] == filter_idx
9395
assert info_dict[b'Size'] > 0
9496
assert info_dict[b'Expansion rate'] == expansion
97+
assert info_dict[b'Max scaled capacity'] == 20470000
9598

9699
# Use a margin on the expected_fp_rate when asserting for correctness.
97100
fp_margin = 0.002
@@ -127,3 +130,31 @@ def test_scaling_filter(self):
127130
info_dict = dict(zip(it, it))
128131
# Validate correctness on a copy of a scaling bloom filter.
129132
self.validate_copied_bloom_correctness(client, filter_name, item_prefix, add_operation_idx, expected_fp_rate, fp_margin, info_dict)
133+
134+
def test_max_and_can_scale_to_correctness(self):
135+
can_scale_to_commands = [
136+
('BF.INSERT key ERROR 0.00000001 VALIDATESCALETO 13107101', "provided VALIDATESCALETO causes bloom object to exceed memory limit" ),
137+
('BF.INSERT key EXPANSION 1 VALIDATESCALETO 101601', "provided VALIDATESCALETO causes false positive to degrades to 0" )
138+
]
139+
for cmd in can_scale_to_commands:
140+
try:
141+
self.client.execute_command(cmd[0])
142+
assert False, "Expect BF.INSERT to fail if the wanted capacity would cause an error"
143+
except Exception as e:
144+
print(cmd[0])
145+
assert cmd[1] == str(e), f"Unexpected error message: {e}"
146+
self.client.execute_command('BF.INSERT MemLimitKey ERROR 0.00000001 VALIDATESCALETO 13107100')
147+
self.client.execute_command('BF.INSERT FPKey VALIDATESCALETO 101600 EXPANSION 1')
148+
FPKey_max_capacity = self.client.execute_command(f'BF.INFO FPKey MAXSCALEDCAPACITY')
149+
MemLimitKeyMaxCapacity = self.client.execute_command(f'BF.INFO MemLimitKey MAXSCALEDCAPACITY')
150+
self.add_items_till_capacity(self.client, "FPKey", 101600, 1, "item")
151+
self.add_items_till_capacity(self.client, "MemLimitKey", 13107100, 1, "item")
152+
key_names = [("MemLimitKey", MemLimitKeyMaxCapacity, "operation exceeds bloom object memory limit"), ("FPKey", FPKey_max_capacity, "false positive degrades to 0 on scale out")]
153+
for key in key_names:
154+
try:
155+
self.add_items_till_capacity(self.client, key[0], key[1]+1, 1, "new_item")
156+
assert False, "Expect adding to an item after reaching max capacity should fail"
157+
except Exception as e:
158+
assert key[2] in str(e)
159+
# Check that max capacity doesnt change even after adding items.
160+
assert self.client.execute_command(f'BF.INFO {key[0]} MAXSCALEDCAPACITY') == key[1]

tests/valkeytests/valkey_test_case.py

-6
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,11 @@
11
import subprocess
22
import time
3-
import random
43
import os
54
import pytest
65
import re
7-
import struct
8-
import threading
9-
import io
10-
import socket
116
from contextlib import contextmanager
127
from functools import wraps
138
from valkey import *
14-
from valkey.client import Pipeline
159
from util.waiters import *
1610

1711
from enum import Enum

0 commit comments

Comments
 (0)