Skip to content

Commit f5991d8

Browse files
add histogram utils (#361)
1 parent a98e52f commit f5991d8

File tree

3 files changed

+271
-0
lines changed

3 files changed

+271
-0
lines changed

util/src/histogram_nbuckets.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
pub fn histogram(values: &[f64], bins: usize) -> Vec<(f64, usize)> {
2+
assert!(bins >= 2);
3+
let mut bucket: Vec<usize> = vec![0; bins];
4+
5+
let mut min = std::f64::MAX;
6+
let mut max = std::f64::MIN;
7+
for val in values {
8+
min = min.min(*val);
9+
max = max.max(*val);
10+
}
11+
let step = (max - min) / (bins - 1) as f64;
12+
13+
for &v in values {
14+
let i = std::cmp::min(((v - min) / step).ceil() as usize, bins - 1);
15+
bucket[i] += 1;
16+
}
17+
18+
bucket
19+
.into_iter()
20+
.enumerate()
21+
.map(|(i, v)| (min + step * i as f64, v))
22+
.collect()
23+
}

util/src/histogram_percentiles.rs

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
use itertools::Itertools;
2+
use std::fmt::Display;
3+
use std::iter::zip;
4+
5+
// #[derive(Clone, Copy, Debug, Default)]
6+
pub struct Point {
7+
pub priority: f64,
8+
pub value: f64,
9+
}
10+
11+
impl From<(f64, f64)> for Point {
12+
fn from((priority, cu_consumed): (f64, f64)) -> Self {
13+
Point {
14+
priority,
15+
value: cu_consumed,
16+
}
17+
}
18+
}
19+
20+
// #[derive(Clone, Debug, Eq, PartialEq, Hash)]
21+
pub struct HistValue {
22+
// percentile
23+
pub percentile: f32,
24+
// value of fees in lamports
25+
pub value: f64,
26+
}
27+
28+
/// `quantile` function is the same as the median if q=50, the same as the minimum if q=0 and the same as the maximum if q=100.
29+
30+
pub fn calculate_percentiles(input: &[f64]) -> Percentiles {
31+
if input.is_empty() {
32+
// note: percentile for empty array is undefined
33+
return Percentiles {
34+
v: vec![],
35+
p: vec![],
36+
};
37+
}
38+
39+
let is_monotonic = input.windows(2).all(|w| w[0] <= w[1]);
40+
assert!(is_monotonic, "array of values must be sorted");
41+
42+
let p_step = 5;
43+
let i_percentiles = (0..=100).step_by(p_step).collect_vec();
44+
45+
let mut bucket_values = Vec::with_capacity(i_percentiles.len());
46+
let mut percentiles = Vec::with_capacity(i_percentiles.len());
47+
for p in i_percentiles {
48+
let value = {
49+
let index = input.len() * p / 100;
50+
let cap_index = index.min(input.len() - 1);
51+
input[cap_index]
52+
};
53+
54+
bucket_values.push(value);
55+
percentiles.push(p as f32 / 100.0);
56+
}
57+
58+
Percentiles {
59+
v: bucket_values,
60+
p: percentiles,
61+
}
62+
}
63+
64+
pub fn calculate_cummulative(values: &[Point]) -> PercentilesCummulative {
65+
if values.is_empty() {
66+
// note: percentile for empty array is undefined
67+
return PercentilesCummulative {
68+
bucket_values: vec![],
69+
percentiles: vec![],
70+
};
71+
}
72+
73+
let is_monotonic = values.windows(2).all(|w| w[0].priority <= w[1].priority);
74+
assert!(is_monotonic, "array of values must be sorted");
75+
76+
let value_sum: f64 = values.iter().map(|x| x.value).sum();
77+
let mut agg: f64 = values[0].value;
78+
let mut index = 0;
79+
let p_step = 5;
80+
81+
let percentiles = (0..=100).step_by(p_step).map(|p| p as f64).collect_vec();
82+
83+
let dist = percentiles
84+
.iter()
85+
.map(|percentile| {
86+
while agg < (value_sum * *percentile) / 100.0 {
87+
index += 1;
88+
agg += values[index].value;
89+
}
90+
let priority = values[index].priority;
91+
HistValue {
92+
percentile: *percentile as f32,
93+
value: priority,
94+
}
95+
})
96+
.collect_vec();
97+
98+
PercentilesCummulative {
99+
bucket_values: dist.iter().map(|hv| hv.value).collect_vec(),
100+
percentiles: dist.iter().map(|hv| hv.percentile / 100.0).collect_vec(),
101+
}
102+
}
103+
104+
pub struct Percentiles {
105+
// value
106+
pub v: Vec<f64>,
107+
// percentile in range 0.0..1.0
108+
pub p: Vec<f32>,
109+
}
110+
111+
impl Display for Percentiles {
112+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113+
for i in 0..self.v.len() {
114+
write!(f, "p{}=>{} ", self.p[i] * 100.0, self.v[i])?;
115+
}
116+
Ok(())
117+
}
118+
}
119+
120+
#[allow(dead_code)]
121+
impl Percentiles {
122+
fn get_bucket_value(&self, percentile: f32) -> Option<f64> {
123+
zip(&self.p, &self.v)
124+
.find(|(&p, _v)| p == percentile)
125+
.map(|(_p, &v)| v)
126+
}
127+
}
128+
129+
pub struct PercentilesCummulative {
130+
pub bucket_values: Vec<f64>,
131+
pub percentiles: Vec<f32>,
132+
}
133+
134+
#[allow(dead_code)]
135+
impl PercentilesCummulative {
136+
fn get_bucket_value(&self, percentile: f32) -> Option<f64> {
137+
zip(&self.percentiles, &self.bucket_values)
138+
.find(|(&p, _cu)| p == percentile)
139+
.map(|(_p, &cu)| cu)
140+
}
141+
}
142+
143+
#[cfg(test)]
144+
mod tests {
145+
use super::*;
146+
147+
#[test]
148+
fn test_calculate_supp_info() {
149+
let mut values = vec![2.0, 4.0, 5.0, 3.0, 1.0];
150+
values.sort_by_key(|&x| (x * 100.0) as i64);
151+
let supp_info = calculate_percentiles(&values).v;
152+
assert_eq!(supp_info[0], 1.0);
153+
assert_eq!(supp_info[10], 3.0);
154+
assert_eq!(supp_info[15], 4.0);
155+
assert_eq!(supp_info[18], 5.0);
156+
assert_eq!(supp_info[20], 5.0);
157+
}
158+
159+
#[test]
160+
fn test_calculate_supp_info_by_cu() {
161+
// total of 20000 CU where consumed
162+
let values = vec![Point::from((100.0, 10000.0)), Point::from((200.0, 10000.0))];
163+
let PercentilesCummulative {
164+
bucket_values: by_cu,
165+
percentiles: by_cu_percentiles,
166+
..
167+
} = calculate_cummulative(&values);
168+
assert_eq!(by_cu_percentiles[10], 0.5);
169+
assert_eq!(by_cu[10], 100.0); // need more than 100 to beat 50% of the CU
170+
assert_eq!(by_cu[11], 200.0); // need more than 200 to beat 55% of the CU
171+
assert_eq!(by_cu[20], 200.0); // need more than 200 to beat 100% of the CU
172+
}
173+
174+
#[test]
175+
fn test_empty_array() {
176+
let values = vec![];
177+
let supp_info = calculate_percentiles(&values).v;
178+
// note: this is controversal
179+
assert!(supp_info.is_empty());
180+
}
181+
#[test]
182+
fn test_zeros() {
183+
let values = vec![Point::from((0.0, 0.0)), Point::from((0.0, 0.0))];
184+
let supp_info = calculate_cummulative(&values).bucket_values;
185+
assert_eq!(supp_info[0], 0.0);
186+
}
187+
188+
#[test]
189+
fn test_statisticshowto() {
190+
let values = vec![30.0, 33.0, 43.0, 53.0, 56.0, 67.0, 68.0, 72.0];
191+
let supp_info = calculate_percentiles(&values);
192+
assert_eq!(supp_info.v[5], 43.0);
193+
assert_eq!(supp_info.p[5], 0.25);
194+
assert_eq!(supp_info.get_bucket_value(0.25), Some(43.0));
195+
196+
let values = vec![
197+
Point::from((30.0, 1.0)),
198+
Point::from((33.0, 2.0)),
199+
Point::from((43.0, 3.0)),
200+
Point::from((53.0, 4.0)),
201+
Point::from((56.0, 5.0)),
202+
Point::from((67.0, 6.0)),
203+
Point::from((68.0, 7.0)),
204+
Point::from((72.0, 8.0)),
205+
];
206+
let supp_info = calculate_cummulative(&values);
207+
assert_eq!(supp_info.percentiles[20], 1.0);
208+
assert_eq!(supp_info.bucket_values[20], 72.0);
209+
}
210+
211+
#[test]
212+
fn test_simple_non_integer_index() {
213+
// Messwerte: 3 – 5 – 5 – 6 – 7 – 7 – 8 – 10 – 10
214+
// In diesem Fall lautet es also 5.
215+
let values = vec![3.0, 5.0, 5.0, 6.0, 7.0, 7.0, 8.0, 10.0, 10.0];
216+
217+
let supp_info = calculate_percentiles(&values);
218+
assert_eq!(supp_info.p[4], 0.20);
219+
assert_eq!(supp_info.v[5], 5.0);
220+
221+
let values = vec![
222+
Point::from((3.0, 1.0)),
223+
Point::from((5.0, 2.0)),
224+
Point::from((5.0, 3.0)),
225+
Point::from((6.0, 4.0)),
226+
Point::from((7.0, 5.0)),
227+
Point::from((7.0, 6.0)),
228+
Point::from((8.0, 7.0)),
229+
Point::from((10.0, 8.0)),
230+
Point::from((10.0, 9.0)),
231+
];
232+
let supp_info = calculate_cummulative(&values);
233+
assert_eq!(supp_info.percentiles[19], 0.95);
234+
assert_eq!(supp_info.percentiles[20], 1.0);
235+
assert_eq!(supp_info.bucket_values[19], 10.0);
236+
assert_eq!(supp_info.bucket_values[20], 10.0);
237+
}
238+
239+
#[test]
240+
fn test_large_list() {
241+
let values = (0..1000).map(|i| i as f64).collect_vec();
242+
let supp_info = calculate_percentiles(&values);
243+
assert_eq!(supp_info.v[19], 950.0);
244+
assert_eq!(supp_info.p[19], 0.95);
245+
}
246+
}

util/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
pub mod encoding;
2+
pub mod histogram_nbuckets;
3+
pub mod histogram_percentiles;
24
pub mod secrets;
35
pub mod statistics;
46

0 commit comments

Comments
 (0)