|
| 1 | +use itertools::Itertools; |
| 2 | +use std::fmt::Display; |
| 3 | +use std::iter::zip; |
| 4 | + |
| 5 | +// #[derive(Clone, Copy, Debug, Default)] |
| 6 | +pub struct Point { |
| 7 | + pub priority: f64, |
| 8 | + pub value: f64, |
| 9 | +} |
| 10 | + |
| 11 | +impl From<(f64, f64)> for Point { |
| 12 | + fn from((priority, cu_consumed): (f64, f64)) -> Self { |
| 13 | + Point { |
| 14 | + priority, |
| 15 | + value: cu_consumed, |
| 16 | + } |
| 17 | + } |
| 18 | +} |
| 19 | + |
| 20 | +// #[derive(Clone, Debug, Eq, PartialEq, Hash)] |
| 21 | +pub struct HistValue { |
| 22 | + // percentile |
| 23 | + pub percentile: f32, |
| 24 | + // value of fees in lamports |
| 25 | + pub value: f64, |
| 26 | +} |
| 27 | + |
| 28 | +/// `quantile` function is the same as the median if q=50, the same as the minimum if q=0 and the same as the maximum if q=100. |
| 29 | +
|
| 30 | +pub fn calculate_percentiles(input: &[f64]) -> Percentiles { |
| 31 | + if input.is_empty() { |
| 32 | + // note: percentile for empty array is undefined |
| 33 | + return Percentiles { |
| 34 | + v: vec![], |
| 35 | + p: vec![], |
| 36 | + }; |
| 37 | + } |
| 38 | + |
| 39 | + let is_monotonic = input.windows(2).all(|w| w[0] <= w[1]); |
| 40 | + assert!(is_monotonic, "array of values must be sorted"); |
| 41 | + |
| 42 | + let p_step = 5; |
| 43 | + let i_percentiles = (0..=100).step_by(p_step).collect_vec(); |
| 44 | + |
| 45 | + let mut bucket_values = Vec::with_capacity(i_percentiles.len()); |
| 46 | + let mut percentiles = Vec::with_capacity(i_percentiles.len()); |
| 47 | + for p in i_percentiles { |
| 48 | + let value = { |
| 49 | + let index = input.len() * p / 100; |
| 50 | + let cap_index = index.min(input.len() - 1); |
| 51 | + input[cap_index] |
| 52 | + }; |
| 53 | + |
| 54 | + bucket_values.push(value); |
| 55 | + percentiles.push(p as f32 / 100.0); |
| 56 | + } |
| 57 | + |
| 58 | + Percentiles { |
| 59 | + v: bucket_values, |
| 60 | + p: percentiles, |
| 61 | + } |
| 62 | +} |
| 63 | + |
| 64 | +pub fn calculate_cummulative(values: &[Point]) -> PercentilesCummulative { |
| 65 | + if values.is_empty() { |
| 66 | + // note: percentile for empty array is undefined |
| 67 | + return PercentilesCummulative { |
| 68 | + bucket_values: vec![], |
| 69 | + percentiles: vec![], |
| 70 | + }; |
| 71 | + } |
| 72 | + |
| 73 | + let is_monotonic = values.windows(2).all(|w| w[0].priority <= w[1].priority); |
| 74 | + assert!(is_monotonic, "array of values must be sorted"); |
| 75 | + |
| 76 | + let value_sum: f64 = values.iter().map(|x| x.value).sum(); |
| 77 | + let mut agg: f64 = values[0].value; |
| 78 | + let mut index = 0; |
| 79 | + let p_step = 5; |
| 80 | + |
| 81 | + let percentiles = (0..=100).step_by(p_step).map(|p| p as f64).collect_vec(); |
| 82 | + |
| 83 | + let dist = percentiles |
| 84 | + .iter() |
| 85 | + .map(|percentile| { |
| 86 | + while agg < (value_sum * *percentile) / 100.0 { |
| 87 | + index += 1; |
| 88 | + agg += values[index].value; |
| 89 | + } |
| 90 | + let priority = values[index].priority; |
| 91 | + HistValue { |
| 92 | + percentile: *percentile as f32, |
| 93 | + value: priority, |
| 94 | + } |
| 95 | + }) |
| 96 | + .collect_vec(); |
| 97 | + |
| 98 | + PercentilesCummulative { |
| 99 | + bucket_values: dist.iter().map(|hv| hv.value).collect_vec(), |
| 100 | + percentiles: dist.iter().map(|hv| hv.percentile / 100.0).collect_vec(), |
| 101 | + } |
| 102 | +} |
| 103 | + |
| 104 | +pub struct Percentiles { |
| 105 | + // value |
| 106 | + pub v: Vec<f64>, |
| 107 | + // percentile in range 0.0..1.0 |
| 108 | + pub p: Vec<f32>, |
| 109 | +} |
| 110 | + |
| 111 | +impl Display for Percentiles { |
| 112 | + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 113 | + for i in 0..self.v.len() { |
| 114 | + write!(f, "p{}=>{} ", self.p[i] * 100.0, self.v[i])?; |
| 115 | + } |
| 116 | + Ok(()) |
| 117 | + } |
| 118 | +} |
| 119 | + |
| 120 | +#[allow(dead_code)] |
| 121 | +impl Percentiles { |
| 122 | + fn get_bucket_value(&self, percentile: f32) -> Option<f64> { |
| 123 | + zip(&self.p, &self.v) |
| 124 | + .find(|(&p, _v)| p == percentile) |
| 125 | + .map(|(_p, &v)| v) |
| 126 | + } |
| 127 | +} |
| 128 | + |
| 129 | +pub struct PercentilesCummulative { |
| 130 | + pub bucket_values: Vec<f64>, |
| 131 | + pub percentiles: Vec<f32>, |
| 132 | +} |
| 133 | + |
| 134 | +#[allow(dead_code)] |
| 135 | +impl PercentilesCummulative { |
| 136 | + fn get_bucket_value(&self, percentile: f32) -> Option<f64> { |
| 137 | + zip(&self.percentiles, &self.bucket_values) |
| 138 | + .find(|(&p, _cu)| p == percentile) |
| 139 | + .map(|(_p, &cu)| cu) |
| 140 | + } |
| 141 | +} |
| 142 | + |
| 143 | +#[cfg(test)] |
| 144 | +mod tests { |
| 145 | + use super::*; |
| 146 | + |
| 147 | + #[test] |
| 148 | + fn test_calculate_supp_info() { |
| 149 | + let mut values = vec![2.0, 4.0, 5.0, 3.0, 1.0]; |
| 150 | + values.sort_by_key(|&x| (x * 100.0) as i64); |
| 151 | + let supp_info = calculate_percentiles(&values).v; |
| 152 | + assert_eq!(supp_info[0], 1.0); |
| 153 | + assert_eq!(supp_info[10], 3.0); |
| 154 | + assert_eq!(supp_info[15], 4.0); |
| 155 | + assert_eq!(supp_info[18], 5.0); |
| 156 | + assert_eq!(supp_info[20], 5.0); |
| 157 | + } |
| 158 | + |
| 159 | + #[test] |
| 160 | + fn test_calculate_supp_info_by_cu() { |
| 161 | + // total of 20000 CU where consumed |
| 162 | + let values = vec![Point::from((100.0, 10000.0)), Point::from((200.0, 10000.0))]; |
| 163 | + let PercentilesCummulative { |
| 164 | + bucket_values: by_cu, |
| 165 | + percentiles: by_cu_percentiles, |
| 166 | + .. |
| 167 | + } = calculate_cummulative(&values); |
| 168 | + assert_eq!(by_cu_percentiles[10], 0.5); |
| 169 | + assert_eq!(by_cu[10], 100.0); // need more than 100 to beat 50% of the CU |
| 170 | + assert_eq!(by_cu[11], 200.0); // need more than 200 to beat 55% of the CU |
| 171 | + assert_eq!(by_cu[20], 200.0); // need more than 200 to beat 100% of the CU |
| 172 | + } |
| 173 | + |
| 174 | + #[test] |
| 175 | + fn test_empty_array() { |
| 176 | + let values = vec![]; |
| 177 | + let supp_info = calculate_percentiles(&values).v; |
| 178 | + // note: this is controversal |
| 179 | + assert!(supp_info.is_empty()); |
| 180 | + } |
| 181 | + #[test] |
| 182 | + fn test_zeros() { |
| 183 | + let values = vec![Point::from((0.0, 0.0)), Point::from((0.0, 0.0))]; |
| 184 | + let supp_info = calculate_cummulative(&values).bucket_values; |
| 185 | + assert_eq!(supp_info[0], 0.0); |
| 186 | + } |
| 187 | + |
| 188 | + #[test] |
| 189 | + fn test_statisticshowto() { |
| 190 | + let values = vec![30.0, 33.0, 43.0, 53.0, 56.0, 67.0, 68.0, 72.0]; |
| 191 | + let supp_info = calculate_percentiles(&values); |
| 192 | + assert_eq!(supp_info.v[5], 43.0); |
| 193 | + assert_eq!(supp_info.p[5], 0.25); |
| 194 | + assert_eq!(supp_info.get_bucket_value(0.25), Some(43.0)); |
| 195 | + |
| 196 | + let values = vec![ |
| 197 | + Point::from((30.0, 1.0)), |
| 198 | + Point::from((33.0, 2.0)), |
| 199 | + Point::from((43.0, 3.0)), |
| 200 | + Point::from((53.0, 4.0)), |
| 201 | + Point::from((56.0, 5.0)), |
| 202 | + Point::from((67.0, 6.0)), |
| 203 | + Point::from((68.0, 7.0)), |
| 204 | + Point::from((72.0, 8.0)), |
| 205 | + ]; |
| 206 | + let supp_info = calculate_cummulative(&values); |
| 207 | + assert_eq!(supp_info.percentiles[20], 1.0); |
| 208 | + assert_eq!(supp_info.bucket_values[20], 72.0); |
| 209 | + } |
| 210 | + |
| 211 | + #[test] |
| 212 | + fn test_simple_non_integer_index() { |
| 213 | + // Messwerte: 3 – 5 – 5 – 6 – 7 – 7 – 8 – 10 – 10 |
| 214 | + // In diesem Fall lautet es also 5. |
| 215 | + let values = vec![3.0, 5.0, 5.0, 6.0, 7.0, 7.0, 8.0, 10.0, 10.0]; |
| 216 | + |
| 217 | + let supp_info = calculate_percentiles(&values); |
| 218 | + assert_eq!(supp_info.p[4], 0.20); |
| 219 | + assert_eq!(supp_info.v[5], 5.0); |
| 220 | + |
| 221 | + let values = vec![ |
| 222 | + Point::from((3.0, 1.0)), |
| 223 | + Point::from((5.0, 2.0)), |
| 224 | + Point::from((5.0, 3.0)), |
| 225 | + Point::from((6.0, 4.0)), |
| 226 | + Point::from((7.0, 5.0)), |
| 227 | + Point::from((7.0, 6.0)), |
| 228 | + Point::from((8.0, 7.0)), |
| 229 | + Point::from((10.0, 8.0)), |
| 230 | + Point::from((10.0, 9.0)), |
| 231 | + ]; |
| 232 | + let supp_info = calculate_cummulative(&values); |
| 233 | + assert_eq!(supp_info.percentiles[19], 0.95); |
| 234 | + assert_eq!(supp_info.percentiles[20], 1.0); |
| 235 | + assert_eq!(supp_info.bucket_values[19], 10.0); |
| 236 | + assert_eq!(supp_info.bucket_values[20], 10.0); |
| 237 | + } |
| 238 | + |
| 239 | + #[test] |
| 240 | + fn test_large_list() { |
| 241 | + let values = (0..1000).map(|i| i as f64).collect_vec(); |
| 242 | + let supp_info = calculate_percentiles(&values); |
| 243 | + assert_eq!(supp_info.v[19], 950.0); |
| 244 | + assert_eq!(supp_info.p[19], 0.95); |
| 245 | + } |
| 246 | +} |
0 commit comments