Skip to content

Commit 64fb326

Browse files
author
Marshall Pierce
committed
Make index_for use usize, simplify initialization logic, add comments
1 parent b9dc4e7 commit 64fb326

File tree

1 file changed

+34
-37
lines changed

1 file changed

+34
-37
lines changed

src/lib.rs

Lines changed: 34 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ impl<T: Counter> Histogram<T> {
297297
/// Find the bucket the given value should be placed in.
298298
///
299299
/// May panic if the given value falls outside the current range of the histogram.
300-
fn index_for(&self, value: u64) -> isize {
300+
fn index_for(&self, value: u64) -> usize {
301301
let bucket_index = self.bucket_for(value);
302302
let sub_bucket_index = self.sub_bucket_for(value, bucket_index);
303303

@@ -307,27 +307,26 @@ impl<T: Counter> Histogram<T> {
307307
// Calculate the index for the first entry that will be used in the bucket (halfway through
308308
// sub_bucket_count). For bucket_index 0, all sub_bucket_count entries may be used, but
309309
// bucket_base_index is still set in the middle.
310-
let bucket_base_index = (bucket_index as isize + 1) << self.sub_bucket_half_count_magnitude;
310+
let bucket_base_index = (bucket_index as usize + 1) << self.sub_bucket_half_count_magnitude;
311311

312312
// Calculate the offset in the bucket. This subtraction will result in a positive value in
313313
// all buckets except the 0th bucket (since a value in that bucket may be less than half
314314
// the bucket's 0 to sub_bucket_count range). However, this works out since we give bucket 0
315315
// twice as much space.
316316
let offset_in_bucket = sub_bucket_index as isize - self.sub_bucket_half_count as isize;
317317

318-
// The following is the equivalent of
319-
// ((sub_bucket_index - sub_bucket_half_count) + bucket_base_index;
320-
(bucket_base_index + offset_in_bucket)
318+
let index = bucket_base_index as isize + offset_in_bucket;
319+
// This is always non-negative because offset_in_bucket is only negative (and only then by
320+
// sub_bucket_half_count at most) for bucket 0, and bucket_base_index will be halfway into
321+
// bucket 0's sub buckets in that case.
322+
debug_assert!(index >= 0);
323+
return index as usize;
321324
}
322325

323326
/// Get a mutable reference to the count bucket for the given value, if it is in range.
324327
fn mut_at(&mut self, value: u64) -> Option<&mut T> {
325328
let i = self.index_for(value);
326-
if i < 0 {
327-
return None;
328-
}
329-
330-
self.counts.get_mut(i as usize)
329+
self.counts.get_mut(i)
331330
}
332331

333332
// ********************************************************************************************
@@ -426,7 +425,7 @@ impl<T: Counter> Histogram<T> {
426425
// and add each non-zero value found at it's proper value:
427426

428427
// Do max value first, to avoid max value updates on each iteration:
429-
let other_max_index = source.index_for(source.max()) as usize;
428+
let other_max_index = source.index_for(source.max());
430429
let other_count = source[other_max_index];
431430
self.record_n(source.value_for(other_max_index), other_count).unwrap();
432431

@@ -618,7 +617,7 @@ impl<T: Counter> Histogram<T> {
618617
// largest value with single unit resolution, in [2, 200_000].
619618
let largest = 2 * 10_u32.pow(sigfig as u32);
620619

621-
let unit_magnitude = ((low as f64).log2() / 2_f64.log2()).floor() as u8;
620+
let unit_magnitude = (low as f64).log2().floor() as u8;
622621
let unit_magnitude_mask = (1 << unit_magnitude) - 1;
623622

624623
// We need to maintain power-of-two sub_bucket_count (for clean direct indexing) that is
@@ -628,15 +627,11 @@ impl<T: Counter> Histogram<T> {
628627
// that.
629628
// In [1, 18]. 2^18 > 2 * 10^5 (the largest possible
630629
// largest_value_with_single_unit_resolution)
631-
let sub_bucket_count_magnitude = ((largest as f64).log2() / 2_f64.log2()).ceil() as u8;
632-
let sub_bucket_half_count_magnitude = if sub_bucket_count_magnitude > 1 {
633-
sub_bucket_count_magnitude
634-
} else {
635-
1
636-
} - 1;
637-
let sub_bucket_count = 2_u32.pow(sub_bucket_half_count_magnitude as u32 + 1);
630+
let sub_bucket_count_magnitude = (largest as f64).log2().ceil() as u8;
631+
let sub_bucket_half_count_magnitude = sub_bucket_count_magnitude - 1;
632+
let sub_bucket_count = 1_u32 << (sub_bucket_count_magnitude as u32);
638633

639-
if unit_magnitude + sub_bucket_half_count_magnitude + 1 > 63 {
634+
if unit_magnitude + sub_bucket_count_magnitude > 63 {
640635
// Cannot represent shifted sub bucket count in 64 bits.
641636
// This will cause an infinite loop when calculating number of buckets
642637
return Err("Cannot represent significant figures' worth of measurements beyond lowest value");
@@ -660,7 +655,7 @@ impl<T: Counter> Histogram<T> {
660655

661656
// Establish leading_zero_count_base, used in bucket_index_of() fast path:
662657
// subtract the bits that would be used by the largest value in bucket 0.
663-
leading_zero_count_base: (64 - unit_magnitude - sub_bucket_half_count_magnitude - 1),
658+
leading_zero_count_base: 64 - unit_magnitude - sub_bucket_count_magnitude,
664659
sub_bucket_half_count_magnitude: sub_bucket_half_count_magnitude,
665660

666661
unit_magnitude: unit_magnitude,
@@ -1089,7 +1084,7 @@ impl<T: Counter> Histogram<T> {
10891084
return 100.0;
10901085
}
10911086

1092-
let target_index = cmp::min(self.index_for(value), self.last() as isize) as usize;
1087+
let target_index = cmp::min(self.index_for(value), self.last());
10931088
let total_to_current_index =
10941089
(0..(target_index + 1)).map(|i| self[i]).fold(T::zero(), |t, v| t + v);
10951090
100.0 * total_to_current_index.to_f64().unwrap() / self.total_count as f64
@@ -1107,8 +1102,8 @@ impl<T: Counter> Histogram<T> {
11071102
/// May fail if the given values are out of bounds.
11081103
pub fn count_between(&self, low: u64, high: u64) -> Result<T, ()> {
11091104
use std::cmp;
1110-
let low_index = cmp::max(0, self.index_for(low)) as usize;
1111-
let high_index = cmp::min(self.index_for(high), self.last() as isize) as usize;
1105+
let low_index = self.index_for(low);
1106+
let high_index = cmp::min(self.index_for(high), self.last());
11121107
Ok((low_index..(high_index + 1)).map(|i| self[i]).fold(T::zero(), |t, v| t + v))
11131108
}
11141109

@@ -1121,7 +1116,7 @@ impl<T: Counter> Histogram<T> {
11211116
/// May fail if the given value is out of bounds.
11221117
pub fn count_at(&self, value: u64) -> Result<T, ()> {
11231118
use std::cmp;
1124-
Ok(self[cmp::min(cmp::max(0, self.index_for(value)), self.last() as isize) as usize])
1119+
Ok(self[cmp::min(self.index_for(value), self.last())])
11251120
}
11261121

11271122
// ********************************************************************************************
@@ -1206,6 +1201,7 @@ impl<T: Counter> Histogram<T> {
12061201
let bucket_index = self.bucket_for(value);
12071202
let sub_bucket_index = self.sub_bucket_for(value, bucket_index);
12081203
// calculate distance to next value
1204+
// TODO when is sub_bucket_index >= sub_bucket_count?
12091205
1_u64 <<
12101206
(self.unit_magnitude +
12111207
if sub_bucket_index >= self.sub_bucket_count {
@@ -1219,27 +1215,27 @@ impl<T: Counter> Histogram<T> {
12191215
// Internal helpers
12201216
// ********************************************************************************************
12211217

1222-
/// Compute the lowest (and therefore highest precision) bucket index that can represent the
1223-
/// value.
1218+
/// Compute the lowest (and therefore highest precision) bucket index whose sub-buckets can
1219+
/// represent the value.
12241220
#[inline]
12251221
fn bucket_for(&self, value: u64) -> u8 {
12261222
// Calculates the number of powers of two by which the value is greater than the biggest
12271223
// value that fits in bucket 0. This is the bucket index since each successive bucket can
12281224
// hold a value 2x greater. The mask maps small values to bucket 0.
1229-
(self.leading_zero_count_base - (value | self.sub_bucket_mask).leading_zeros() as u8)
1225+
self.leading_zero_count_base - (value | self.sub_bucket_mask).leading_zeros() as u8
12301226
}
12311227

12321228
#[inline]
1233-
/// Compute the position inside a bucket at which the given value should be recorded.
1229+
/// Compute the position inside a bucket at which the given value should be recorded, indexed
1230+
/// from position 0 in the bucket (in the first half, which is not used past the first bucket).
1231+
/// For bucket_index > 0, the result will be in the top half of the bucket.
12341232
fn sub_bucket_for(&self, value: u64, bucket_index: u8) -> u32 {
1235-
// For bucket_index 0, this is just value, so it may be anywhere in 0 to sub_bucket_count. For
1236-
// other bucket_index, this will always end up in the top half of sub_bucket_count: assume
1237-
// that for some bucket k > 0, this calculation will yield a value in the bottom half of 0
1238-
// to sub_bucket_count. Then, because of how buckets overlap, it would have also been in the
1239-
// top half of bucket k-1, and therefore would have returned k-1 in bucket_index_of(). Since
1240-
// we would then shift it one fewer bits here, it would be twice as big, and therefore in
1241-
// the top half of sub_bucket_count.
1242-
// TODO: >>> ?
1233+
// Since bucket_index is simply how many powers of 2 greater value is than what will fit in
1234+
// bucket 0 (that is, what will fit in [0, sub_bucket_count)), we shift off that many
1235+
// powers of two, and end up with a number in [0, sub_bucket_count).
1236+
// For bucket_index 0, this is just value. For bucket index k > 0, we know value won't fit
1237+
// in bucket (k - 1) by definition, so this calculation won't end up in the lower half of
1238+
// [0, sub_bucket_count) because that would mean it would also fit in bucket (k - 1).
12431239
(value >> (bucket_index + self.unit_magnitude)) as u32
12441240
}
12451241

@@ -1286,6 +1282,7 @@ impl<T: Counter> Histogram<T> {
12861282
///
12871283
/// May fail if `high` is not at least twice the lowest discernible value.
12881284
fn cover(&mut self, high: u64) -> u32 {
1285+
// TODO validate before we get to here so we don't need an assert
12891286
assert!(high >= 2 * self.lowest_discernible_value,
12901287
"highest trackable value must be >= (2 * lowest discernible value)");
12911288

0 commit comments

Comments
 (0)