diff --git a/Cargo.toml b/Cargo.toml index d2b255d..4e17db9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ bench_private = [] # for enabling nightly-only feature(test) on the main crate t [dependencies] num = "0.1" byteorder = "1.0.0" +flate2 = "0.2.17" #criterion = { git = "https://github.com/japaric/criterion.rs.git", optional = true } [dev-dependencies] diff --git a/benches/serialization.rs b/benches/serialization.rs index 32c8945..dc81154 100644 --- a/benches/serialization.rs +++ b/benches/serialization.rs @@ -9,107 +9,131 @@ use hdrsample::serialization::*; use self::rand::distributions::range::Range; use self::rand::distributions::IndependentSample; use self::test::Bencher; -use std::io::Cursor; +use std::io::{Cursor, Write}; +use std::fmt::Debug; #[bench] -fn serialize_tiny_dense(b: &mut Bencher) { +fn serialize_tiny_dense_v2(b: &mut Bencher) { // 256 + 3 * 128 = 640 counts - do_serialize_bench(b, 1, 2047, 2, 1.5) + do_serialize_bench(b, &mut V2Serializer::new(), 1, 2047, 2, 1.5) } #[bench] -fn serialize_tiny_sparse(b: &mut Bencher) { +fn serialize_tiny_sparse_v2(b: &mut Bencher) { // 256 + 3 * 128 = 640 counts - do_serialize_bench(b, 1, 2047, 2, 0.1) + do_serialize_bench(b, &mut V2Serializer::new(), 1, 2047, 2, 0.1) } #[bench] -fn serialize_small_dense(b: &mut Bencher) { +fn serialize_small_dense_v2(b: &mut Bencher) { // 2048 counts - do_serialize_bench(b, 1, 2047, 3, 1.5) + do_serialize_bench(b, &mut V2Serializer::new(), 1, 2047, 3, 1.5) } #[bench] -fn serialize_small_sparse(b: &mut Bencher) { +fn serialize_small_sparse_v2(b: &mut Bencher) { // 2048 counts - do_serialize_bench(b, 1, 2047, 3, 0.1) + do_serialize_bench(b, &mut V2Serializer::new(), 1, 2047, 3, 0.1) } #[bench] -fn serialize_medium_dense(b: &mut Bencher) { +fn serialize_medium_dense_v2(b: &mut Bencher) { // 56320 counts - do_serialize_bench(b, 1, u64::max_value(), 3, 1.5) + do_serialize_bench(b, &mut V2Serializer::new(), 1, u64::max_value(), 3, 1.5) } #[bench] -fn serialize_medium_sparse(b: &mut Bencher) { +fn serialize_medium_sparse_v2(b: &mut Bencher) { // 56320 counts - do_serialize_bench(b, 1, u64::max_value(), 3, 0.1) + do_serialize_bench(b, &mut V2Serializer::new(), 1, u64::max_value(), 3, 0.1) } #[bench] -fn serialize_large_dense(b: &mut Bencher) { +fn serialize_large_dense_v2(b: &mut Bencher) { // 6291456 buckets - do_serialize_bench(b, 1, u64::max_value(), 5, 1.5) + do_serialize_bench(b, &mut V2Serializer::new(), 1, u64::max_value(), 5, 1.5) } #[bench] -fn serialize_large_sparse(b: &mut Bencher) { +fn serialize_large_sparse_v2(b: &mut Bencher) { // 6291456 buckets - do_serialize_bench(b, 1, u64::max_value(), 5, 0.1) + do_serialize_bench(b, &mut V2Serializer::new(), 1, u64::max_value(), 5, 0.1) } #[bench] -fn deserialize_tiny_dense(b: &mut Bencher) { +fn serialize_large_dense_v2_deflate(b: &mut Bencher) { + // 6291456 buckets + do_serialize_bench(b, &mut V2DeflateSerializer::new(), 1, u64::max_value(), 5, 1.5) +} + +#[bench] +fn serialize_large_sparse_v2_deflate(b: &mut Bencher) { + // 6291456 buckets + do_serialize_bench(b, &mut V2DeflateSerializer::new(), 1, u64::max_value(), 5, 0.1) +} + +#[bench] +fn deserialize_tiny_dense_v2(b: &mut Bencher) { // 256 + 3 * 128 = 640 counts - do_deserialize_bench(b, 1, 2047, 2, 1.5) + do_deserialize_bench(b, &mut V2Serializer::new(), 1, 2047, 2, 1.5) } #[bench] -fn deserialize_tiny_sparse(b: &mut Bencher) { +fn deserialize_tiny_sparse_v2(b: &mut Bencher) { // 256 + 3 * 128 = 640 counts - do_deserialize_bench(b, 1, 2047, 2, 0.1) + do_deserialize_bench(b, &mut V2Serializer::new(), 1, 2047, 2, 0.1) } #[bench] -fn deserialize_small_dense(b: &mut Bencher) { +fn deserialize_small_dense_v2(b: &mut Bencher) { // 2048 counts - do_deserialize_bench(b, 1, 2047, 3, 1.5) + do_deserialize_bench(b, &mut V2Serializer::new(), 1, 2047, 3, 1.5) } #[bench] -fn deserialize_small_sparse(b: &mut Bencher) { +fn deserialize_small_sparse_v2(b: &mut Bencher) { // 2048 counts - do_deserialize_bench(b, 1, 2047, 3, 0.1) + do_deserialize_bench(b, &mut V2Serializer::new(), 1, 2047, 3, 0.1) } #[bench] -fn deserialize_medium_dense(b: &mut Bencher) { +fn deserialize_medium_dense_v2(b: &mut Bencher) { // 56320 counts - do_deserialize_bench(b, 1, u64::max_value(), 3, 1.5) + do_deserialize_bench(b, &mut V2Serializer::new(), 1, u64::max_value(), 3, 1.5) } #[bench] -fn deserialize_medium_sparse(b: &mut Bencher) { +fn deserialize_medium_sparse_v2(b: &mut Bencher) { // 56320 counts - do_deserialize_bench(b, 1, u64::max_value(), 3, 0.1) + do_deserialize_bench(b, &mut V2Serializer::new(), 1, u64::max_value(), 3, 0.1) +} + +#[bench] +fn deserialize_large_dense_v2(b: &mut Bencher) { + // 6291456 buckets + do_deserialize_bench(b, &mut V2Serializer::new(), 1, u64::max_value(), 5, 1.5) } #[bench] -fn deserialize_large_dense(b: &mut Bencher) { +fn deserialize_large_sparse_v2(b: &mut Bencher) { // 6291456 buckets - do_deserialize_bench(b, 1, u64::max_value(), 5, 1.5) + do_deserialize_bench(b, &mut V2Serializer::new(), 1, u64::max_value(), 5, 0.1) } #[bench] -fn deserialize_large_sparse(b: &mut Bencher) { +fn deserialize_large_dense_v2_deflate(b: &mut Bencher) { // 6291456 buckets - do_deserialize_bench(b, 1, u64::max_value(), 5, 0.1) + do_deserialize_bench(b, &mut V2DeflateSerializer::new(), 1, u64::max_value(), 5, 1.5) } +#[bench] +fn deserialize_large_sparse_v2_deflate(b: &mut Bencher) { + // 6291456 buckets + do_deserialize_bench(b, &mut V2DeflateSerializer::new(), 1, u64::max_value(), 5, 0.1) +} -fn do_serialize_bench(b: &mut Bencher, low: u64, high: u64, digits: u8, fraction_of_counts_len: f64) { - let mut s = V2Serializer::new(); +fn do_serialize_bench(b: &mut Bencher, s: &mut S, low: u64, high: u64, digits: u8, fraction_of_counts_len: f64) + where S: TestOnlyHypotheticalSerializerInterface { let mut h = Histogram::::new_with_bounds(low, high, digits).unwrap(); let random_counts = (fraction_of_counts_len * h.len() as f64) as usize; let mut vec = Vec::with_capacity(random_counts); @@ -128,8 +152,8 @@ fn do_serialize_bench(b: &mut Bencher, low: u64, high: u64, digits: u8, fraction }); } -fn do_deserialize_bench(b: &mut Bencher, low: u64, high: u64, digits: u8, fraction_of_counts_len: f64) { - let mut s = V2Serializer::new(); +fn do_deserialize_bench(b: &mut Bencher, s: &mut S, low: u64, high: u64, digits: u8, fraction_of_counts_len: f64) + where S: TestOnlyHypotheticalSerializerInterface { let mut h = Histogram::::new_with_bounds(low, high, digits).unwrap(); let random_counts = (fraction_of_counts_len * h.len() as f64) as usize; let mut vec = Vec::with_capacity(random_counts); @@ -149,3 +173,30 @@ fn do_deserialize_bench(b: &mut Bencher, low: u64, high: u64, digits: u8, fracti let _: Histogram = d.deserialize(&mut cursor).unwrap(); }); } + +// Maybe someday there will be an obvious right answer for what serialization should look like, at +// least to the user, but for now we'll only take an easily reversible step towards that. There are +// still several ways the serializer interfaces could change to achieve better performance, so +// committing to anything right now would be premature. +trait TestOnlyHypotheticalSerializerInterface { + type SerializeError: Debug; + + fn serialize(&mut self, h: &Histogram, writer: &mut W) + -> Result; +} + +impl TestOnlyHypotheticalSerializerInterface for V2Serializer { + type SerializeError = V2SerializeError; + + fn serialize(&mut self, h: &Histogram, writer: &mut W) -> Result { + self.serialize(h, writer) + } +} + +impl TestOnlyHypotheticalSerializerInterface for V2DeflateSerializer { + type SerializeError = V2DeflateSerializeError; + + fn serialize(&mut self, h: &Histogram, writer: &mut W) -> Result { + self.serialize(h, writer) + } +} diff --git a/src/serialization/deserializer.rs b/src/serialization/deserializer.rs index 8620225..27d8d0b 100644 --- a/src/serialization/deserializer.rs +++ b/src/serialization/deserializer.rs @@ -1,10 +1,11 @@ -use super::V2_COOKIE; +use super::{V2_COOKIE, V2_COMPRESSED_COOKIE}; use super::super::{Counter, Histogram, RestatState}; use super::super::num::ToPrimitive; use std::io::{self, Cursor, ErrorKind, Read}; use std::marker::PhantomData; use std; use super::byteorder::{BigEndian, ReadBytesExt}; +use super::flate2::read::DeflateDecoder; /// Errors that can happen during deserialization. #[derive(Debug, PartialEq, Eq, Clone, Copy)] @@ -57,10 +58,29 @@ impl Deserializer { -> Result, DeserializeError> { let cookie = reader.read_u32::()?; - if cookie != V2_COOKIE { + return match cookie { + V2_COOKIE => self.deser_v2(reader), + V2_COMPRESSED_COOKIE => self.deser_v2_compressed(reader), + _ => Err(DeserializeError::InvalidCookie) + } + } + + fn deser_v2_compressed(&mut self, reader: &mut R) -> Result, DeserializeError> { + let payload_len = reader.read_u32::()?.to_usize() + .ok_or(DeserializeError::UsizeTypeTooSmall)?; + + // TODO reuse deflate buf, or switch to lower-level flate2::Decompress + let mut deflate_reader = DeflateDecoder::new(reader.take(payload_len as u64)); + let inner_cookie = deflate_reader.read_u32::()?; + if inner_cookie != V2_COOKIE { return Err(DeserializeError::InvalidCookie); } + self.deser_v2(&mut deflate_reader) + } + + + fn deser_v2(&mut self, reader: &mut R) -> Result, DeserializeError> { let payload_len = reader.read_u32::()?.to_usize() .ok_or(DeserializeError::UsizeTypeTooSmall)?; let normalizing_offset = reader.read_u32::()?; diff --git a/src/serialization/serialization.rs b/src/serialization/serialization.rs index 0aa623b..f0e9e4a 100644 --- a/src/serialization/serialization.rs +++ b/src/serialization/serialization.rs @@ -1,11 +1,12 @@ //! # Serialization/deserialization //! //! The upstream Java project has established several different types of serialization. We have -//! currently implemented one (the "V2" format, following the names used by the Java -//! implementation), and will add others as time goes on. These formats are compact binary -//! representations of the state of the histogram. They are intended to be used -//! for archival or transmission to other systems for further analysis. A typical use case would be -//! to periodically serialize a histogram, save it somewhere, and reset the histogram. +//! currently implemented V2 and V2 + DEFLATE (following the names used by the Java implementation). +//! +//! These formats are compact binary representations of the state of the histogram. They are +//! intended to be used for archival or transmission to other systems for further analysis. A +//! typical use case would be to periodically serialize a histogram, save it somewhere, and reset +//! the histogram. //! //! Histograms are designed to be added, subtracted, and otherwise manipulated, and an efficient //! storage format facilitates this. As an example, you might be capturing histograms once a minute @@ -18,22 +19,27 @@ //! //! # Performance concerns //! -//! Serialization is quite fast; serializing a histogram that represents 1 to `u64::max_value()` -//! with 3 digits of precision with tens of thousands of recorded counts takes about 40 -//! microseconds on an E5-1650v3 Xeon. Deserialization is about 3x slower, but that will improve as -//! there are still some optimizations to perform. +//! Serialization is quite fast; serializing a histogram in V2 format that represents 1 to +//! `u64::max_value()` with 3 digits of precision with tens of thousands of recorded counts takes +//! about 40 microseconds on an E5-1650v3 Xeon. Deserialization is about 3x slower, but that will +//! improve as there are still some optimizations to perform. //! //! For the V2 format, the space used for a histogram will depend mainly on precision since higher //! precision will reduce the extent to which different values are grouped into the same bucket. //! Having a large value range (e.g. 1 to `u64::max_value()`) will not directly impact the size if //! there are many zero counts as zeros are compressed away. //! +//! V2 + DEFLATE is significantly slower to serialize (around 10x) but only a little bit slower to +//! deserialize (less than 2x). YMMV depending on the compressibility of your histogram data, the +//! speed of the underlying storage medium, etc. Naturally, you can always compress at a later time: +//! there's no reason why you couldn't serialize as V2 and then later re-serialize it as V2 + +//! DEFLATE on another system (perhaps as a batch job) for better archival storage density. +//! //! # API //! //! Each serialization format has its own serializer struct, but since each format is reliably //! distinguishable from each other, there is only one `Deserializer` struct that will work for -//! any of the formats this library implements. For now there is only one serializer -//! (`V2Serializer`) but more will be added. +//! any of the formats this library implements. //! //! Serializers and deserializers are intended to be re-used for many histograms. You can use them //! for one histogram and throw them away; it will just be less efficient as the cost of their @@ -84,9 +90,11 @@ //! //! impl Serialize for V2HistogramWrapper { //! fn serialize(&self, serializer: S) -> Result<(), ()> { -//! // not optimal to not re-use the vec and serializer, but it'll work +//! // Not optimal to not re-use the vec and serializer, but it'll work //! let mut vec = Vec::new(); -//! // map errors as appropriate for your use case +//! // Pick the serialization format you want to use. Here, we use plain V2, but V2 + +//! // DEFLATE is also available. +//! // Map errors as appropriate for your use case. //! V2Serializer::new().serialize(&self.histogram, &mut vec) //! .map_err(|_| ())?; //! serializer.serialize_bytes(&vec)?; @@ -163,6 +171,7 @@ //! extern crate byteorder; +extern crate flate2; #[path = "tests.rs"] #[cfg(test)] @@ -176,13 +185,19 @@ mod benchmarks; mod v2_serializer; pub use self::v2_serializer::{V2Serializer, V2SerializeError}; +#[path = "v2_deflate_serializer.rs"] +mod v2_deflate_serializer; +pub use self::v2_deflate_serializer::{V2DeflateSerializer, V2DeflateSerializeError}; + #[path = "deserializer.rs"] mod deserializer; pub use self::deserializer::{Deserializer, DeserializeError}; const V2_COOKIE_BASE: u32 = 0x1c849303; +const V2_COMPRESSED_COOKIE_BASE: u32 = 0x1c849304; const V2_COOKIE: u32 = V2_COOKIE_BASE | 0x10; +const V2_COMPRESSED_COOKIE: u32 = V2_COMPRESSED_COOKIE_BASE | 0x10; const V2_HEADER_SIZE: usize = 40; diff --git a/src/serialization/tests.rs b/src/serialization/tests.rs index 1c33146..454d65d 100644 --- a/src/serialization/tests.rs +++ b/src/serialization/tests.rs @@ -1,13 +1,13 @@ extern crate rand; -use super::{V2_COOKIE, V2_HEADER_SIZE}; -use super::v2_serializer::{V2Serializer, V2SerializeError, counts_array_max_encoded_size, encode_counts, varint_write, zig_zag_encode}; +use super::{V2_COOKIE, V2_HEADER_SIZE, V2Serializer, V2SerializeError, V2DeflateSerializer, V2DeflateSerializeError}; +use super::v2_serializer::{counts_array_max_encoded_size, encode_counts, varint_write, zig_zag_encode}; use super::deserializer::{Deserializer, varint_read, varint_read_slice, zig_zag_decode}; use super::byteorder::{BigEndian, ReadBytesExt}; use super::super::{Counter, Histogram}; use super::super::num::traits::{Saturating, ToPrimitive}; use super::super::tests::helpers::histo64; -use std::io::Cursor; +use std::io::{Cursor, Write}; use std::fmt::{Debug, Display}; use std::iter::once; use self::rand::{Rand, Rng}; @@ -124,23 +124,43 @@ fn serialize_roundtrip_1_count_for_every_value_2_buckets() { } #[test] -fn serialize_roundtrip_random_u64() { - do_serialize_roundtrip_random::(i64::max_value() as u64); +fn serialize_roundtrip_random_v2_u64() { + do_serialize_roundtrip_random(V2Serializer::new(), i64::max_value() as u64); } #[test] -fn serialize_roundtrip_random_u32() { - do_serialize_roundtrip_random::(u32::max_value()); +fn serialize_roundtrip_random_v2_u32() { + do_serialize_roundtrip_random(V2Serializer::new(), u32::max_value()); } #[test] -fn serialize_roundtrip_random_u16() { - do_serialize_roundtrip_random::(u16::max_value()); +fn serialize_roundtrip_random_v2_u16() { + do_serialize_roundtrip_random(V2Serializer::new(), u16::max_value()); } #[test] -fn serialize_roundtrip_random_u8() { - do_serialize_roundtrip_random::(u8::max_value()); +fn serialize_roundtrip_random_v2_u8() { + do_serialize_roundtrip_random(V2Serializer::new(), u8::max_value()); +} + +#[test] +fn serialize_roundtrip_random_v2_deflate_u64() { + do_serialize_roundtrip_random(V2DeflateSerializer::new(), i64::max_value() as u64); +} + +#[test] +fn serialize_roundtrip_random_v2_deflate_u32() { + do_serialize_roundtrip_random(V2DeflateSerializer::new(), u32::max_value()); +} + +#[test] +fn serialize_roundtrip_random_v2_deflate_u16() { + do_serialize_roundtrip_random(V2DeflateSerializer::new(), u16::max_value()); +} + +#[test] +fn serialize_roundtrip_random_v2_deflate_u8() { + do_serialize_roundtrip_random(V2DeflateSerializer::new(), u8::max_value()); } #[test] @@ -510,9 +530,9 @@ fn do_varint_write_read_slice_roundtrip_rand(byte_length: usize) { } } -fn do_serialize_roundtrip_random(max_count: T) - where T: Counter + Debug + Display + Rand + Saturating + ToPrimitive + SampleRange { - let mut s = V2Serializer::new(); +fn do_serialize_roundtrip_random(mut serializer: S, max_count: T) + where S: TestOnlyHypotheticalSerializerInterface, + T: Counter + Debug + Display + Rand + Saturating + ToPrimitive + SampleRange { let mut d = Deserializer::new(); let mut vec = Vec::new(); let mut count_rng = rand::weak_rng(); @@ -536,7 +556,7 @@ fn do_serialize_roundtrip_random(max_count: T) } } - let bytes_written = s.serialize(&h, &mut vec).unwrap(); + let bytes_written = serializer.serialize(&h, &mut vec).unwrap(); assert_eq!(bytes_written, vec.len()); let mut cursor = Cursor::new(&vec); @@ -674,3 +694,30 @@ impl Iterator for RandomVarintEncodedLengthIter { Some(value_range.ind_sample(&mut self.rng)) } } + +// Maybe someday there will be an obvious right answer for what serialization should look like, at +// least to the user, but for now we'll only take an easily reversible step towards that. There are +// still several ways the serializer interfaces could change to achieve better performance, so +// committing to anything right now would be premature. +trait TestOnlyHypotheticalSerializerInterface { + type SerializeError: Debug; + + fn serialize(&mut self, h: &Histogram, writer: &mut W) + -> Result; +} + +impl TestOnlyHypotheticalSerializerInterface for V2Serializer { + type SerializeError = V2SerializeError; + + fn serialize(&mut self, h: &Histogram, writer: &mut W) -> Result { + self.serialize(h, writer) + } +} + +impl TestOnlyHypotheticalSerializerInterface for V2DeflateSerializer { + type SerializeError = V2DeflateSerializeError; + + fn serialize(&mut self, h: &Histogram, writer: &mut W) -> Result { + self.serialize(h, writer) + } +} diff --git a/src/serialization/v2_deflate_serializer.rs b/src/serialization/v2_deflate_serializer.rs new file mode 100644 index 0000000..80b48e7 --- /dev/null +++ b/src/serialization/v2_deflate_serializer.rs @@ -0,0 +1,91 @@ +use super::super::{Counter, Histogram}; +use super::V2_COMPRESSED_COOKIE; +use super::v2_serializer::{V2Serializer, V2SerializeError}; +use super::byteorder::{BigEndian, WriteBytesExt}; +use super::flate2::Compression; +use std; +use std::io::{ErrorKind, Write}; +use super::flate2::write::DeflateEncoder; + +/// Errors that occur during serialization. +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum V2DeflateSerializeError { + /// The underlying serialization failed + InternalSerializationError(V2SerializeError), + /// An i/o operation failed. + IoError(ErrorKind) +} + +impl std::convert::From for V2DeflateSerializeError { + fn from(e: std::io::Error) -> Self { + V2DeflateSerializeError::IoError(e.kind()) + } +} + +/// Serializer for the V2 + DEFLATE binary format. +pub struct V2DeflateSerializer { + uncompressed_buf: Vec, + compressed_buf: Vec, + v2_serializer: V2Serializer +} + +impl V2DeflateSerializer { + /// Create a new serializer. + pub fn new() -> V2DeflateSerializer { + V2DeflateSerializer { + uncompressed_buf: Vec::new(), + compressed_buf: Vec::new(), + v2_serializer: V2Serializer::new() + } + } + + /// Serialize the histogram into the provided writer. + /// Returns the number of bytes written, or an error. + /// + /// Note that `Vec` is a reasonable `Write` implementation for simple usage. + pub fn serialize(&mut self, h: &Histogram, writer: &mut W) + -> Result { + // TODO benchmark serializing in chunks rather than all at once: each uncompressed v2 chunk + // could be compressed and written to the compressed buf, possibly using an approach like + // that of https://github.com/jonhoo/hdrsample/issues/32#issuecomment-287583055. + // This would reduce the overall buffer size needed for plain v2 serialization, and be + // more cache friendly. + + self.uncompressed_buf.clear(); + self.compressed_buf.clear(); + // TODO serialize directly into uncompressed_buf without the buffering inside v2_serializer + let uncompressed_len = self.v2_serializer.serialize(h, &mut self.uncompressed_buf) + .map_err(|e| V2DeflateSerializeError::InternalSerializationError(e))?; + + debug_assert_eq!(self.uncompressed_buf.len(), uncompressed_len); + // On randomized test histograms we get about 10% compression, but of course random data + // doesn't compress well. Real-world data may compress better, so let's assume a more + // optimistic 50% compression as a baseline to reserve. If we're overly optimistic that's + // still only one more allocation the first time it's needed. + self.compressed_buf.reserve(self.uncompressed_buf.len() / 2); + + self.compressed_buf.write_u32::(V2_COMPRESSED_COOKIE)?; + // placeholder for length + self.compressed_buf.write_u32::(0)?; + + // TODO pluggable compressors? configurable compression levels? + // TODO benchmark https://github.com/sile/libflate + + { + // TODO reuse deflate buf, or switch to lower-level flate2::Compress + let mut compressor = DeflateEncoder::new(&mut self.compressed_buf, Compression::Default); + compressor.write_all(&self.uncompressed_buf[0..uncompressed_len])?; + let _ = compressor.finish()?; + } + + // fill in length placeholder. Won't underflow since length is always at least 8, and won't + // overflow u32 as the largest array is about 6 million entries, so about 54MiB encoded (if + // counter is u64) + let total_compressed_len = self.compressed_buf.len(); + (&mut self.compressed_buf[4..8]).write_u32::((total_compressed_len as u32) - 8)?; + + writer.write_all(&self.compressed_buf)?; + + Ok(total_compressed_len) + } +} diff --git a/src/serialization/v2_serializer.rs b/src/serialization/v2_serializer.rs index 0d20396..fdc2d0b 100644 --- a/src/serialization/v2_serializer.rs +++ b/src/serialization/v2_serializer.rs @@ -42,6 +42,8 @@ impl V2Serializer { /// Note that `Vec` is a reasonable `Write` implementation for simple usage. pub fn serialize(&mut self, h: &Histogram, writer: &mut W) -> Result { + // TODO benchmark encoding directly into target Vec + self.buf.clear(); let max_size = max_encoded_size(h).ok_or(V2SerializeError::UsizeTypeTooSmall)?; self.buf.reserve(max_size);