scroll-tech · roynalnaruto · Jul 25, 2024 · Jul 22, 2024 · Jul 22, 2024 · Jul 22, 2024
diff --git a/aggregator/data/test_batches/batch274.hex b/aggregator/data/test_batches/batch274.hex
diff --git a/aggregator/data/test_blobs/blob005.hex b/aggregator/data/test_blobs/blob005.hex
diff --git a/aggregator/src/blob.rs b/aggregator/src/blob.rs
@@ -36,6 +36,9 @@ pub const N_DATA_BYTES_PER_COEFFICIENT: usize = 31;
 /// Data config. Since num_valid_chunks is u16, we use 2 bytes/rows.
 pub const N_ROWS_NUM_CHUNKS: usize = 2;
 
+/// The number of rows to encode chunk size (u32).
+pub const N_ROWS_CHUNK_SIZE: usize = 4;
+
 /// The number of bytes that we can fit in a blob. Note that each coefficient is represented in 32
 /// bytes, however, since those 32 bytes must represent a BLS12-381 scalar in its canonical form,
 /// we explicitly set the most-significant byte to 0, effectively utilising only 31 bytes.
@@ -74,6 +77,66 @@ pub struct BatchData<const N_SNARKS: usize> {
     pub chunk_data: [Vec<u8>; N_SNARKS],
 }
 
+impl<const N_SNARKS: usize> BatchData<N_SNARKS> {
+    /// For raw batch bytes with metadata, this function segments the byte stream into chunk segments.
+    /// Metadata will be removed from the result.
+    pub fn segment_with_metadata(batch_bytes_with_metadata: Vec<u8>) -> Vec<Vec<u8>> {
+        let n_bytes_metadata = Self::n_rows_metadata();
+        let metadata_bytes = batch_bytes_with_metadata
+            .clone()
+            .into_iter()
+            .take(n_bytes_metadata)
+            .collect::<Vec<u8>>();
+        let batch_bytes = batch_bytes_with_metadata
+            .clone()
+            .into_iter()
+            .skip(n_bytes_metadata)
+            .collect::<Vec<u8>>();
+
+        // Decoded batch bytes require segmentation based on chunk length
+        let batch_data_len = batch_bytes.len();
+        let chunk_lens = metadata_bytes[N_ROWS_NUM_CHUNKS..]
+            .chunks(N_ROWS_CHUNK_SIZE)
+            .map(|chunk| {
+                chunk
+                    .iter()
+                    .fold(0usize, |acc, &d| acc * 256usize + d as usize)
+            })
+            .collect::<Vec<usize>>();
+
+        // length segments sanity check
+        let valid_chunks = metadata_bytes
+            .iter()
+            .take(N_ROWS_NUM_CHUNKS)
+            .fold(0usize, |acc, &d| acc * 256usize + d as usize);
+        let calculated_len = chunk_lens.iter().take(valid_chunks).sum::<usize>();
+        assert_eq!(
+            batch_data_len, calculated_len,
+            "chunk segmentation len must add up to the correct value"
+        );
+
+        // reconstruct segments
+        let mut segmented_batch_data: Vec<Vec<u8>> = Vec::new();
+        let mut offset: usize = 0;
+        let mut segment: usize = 0;
+        while offset < batch_data_len {
+            segmented_batch_data.push(
+                batch_bytes
+                    .clone()
+                    .into_iter()
+                    .skip(offset)
+                    .take(chunk_lens[segment])
+                    .collect::<Vec<u8>>(),
+            );
+
+            offset += chunk_lens[segment];
+            segment += 1;
+        }
+
+        segmented_batch_data
+    }
+}
+
 impl<const N_SNARKS: usize> From<&BatchHash<N_SNARKS>> for BatchData<N_SNARKS> {
     fn from(batch_hash: &BatchHash<N_SNARKS>) -> Self {
         Self::new(
@@ -150,7 +213,7 @@ impl<const N_SNARKS: usize> BatchData<N_SNARKS> {
     /// The number of rows to encode the size of each chunk in a batch, in the Blob Data config.
     /// chunk_size is u32, we use 4 bytes/rows.
     const fn n_rows_chunk_sizes() -> usize {
-        N_SNARKS * 4
+        N_SNARKS * N_ROWS_CHUNK_SIZE
     }
 
     /// The total number of rows in "digest rlc" and "digest bytes" sections.

diff --git a/aggregator/src/tests/blob.rs b/aggregator/src/tests/blob.rs
@@ -1,8 +1,9 @@
+use crate::aggregation::witgen::{process, MultiBlockProcessResult};
 use crate::{
     aggregation::{
         AssignedBarycentricEvaluationConfig, BarycentricEvaluationConfig, BlobDataConfig, RlcConfig,
     },
-    blob::{BatchData, PointEvaluationAssignments, N_BYTES_U256},
+    blob::{BatchData, PointEvaluationAssignments, N_BLOB_BYTES, N_BYTES_U256},
     param::ConfigParams,
     BatchDataConfig, MAX_AGG_SNARKS,
 };
@@ -257,16 +258,16 @@ fn check_circuit(circuit: &BlobCircuit) -> Result<(), Vec<VerifyFailure>> {
 
 #[test]
 fn blob_circuit_completeness() {
-    // single chunk in batch, but the chunk has a size of N_ROWS_DATA
-    let full_blob = vec![
-        // batch274 contains batch bytes that will produce a full blob
-        hex::decode(
-            fs::read_to_string("./data/test_batches/batch274.hex")
-                .expect("file path exists")
-                .trim(),
-        )
-        .expect("should load full blob batch bytes"),
-    ];
+    // Full blob test case
+    // batch274 contains batch bytes that will produce a full blob
+    let full_blob = hex::decode(
+        fs::read_to_string("./data/test_batches/batch274.hex")
+            .expect("file path exists")
+            .trim(),
+    )
+    .expect("should load full blob batch bytes");
+    // batch274 contains metadata
+    let segmented_full_blob_src = BatchData::<MAX_AGG_SNARKS>::segment_with_metadata(full_blob);
 
     let all_empty_chunks: Vec<Vec<u8>> = vec![vec![]; MAX_AGG_SNARKS];
     let one_chunk = vec![vec![2, 3, 4, 100, 1]];
@@ -288,8 +289,8 @@ fn blob_circuit_completeness() {
         .chain(std::iter::once(vec![3, 100, 24, 30]))
         .collect::<Vec<_>>();
 
-    for blob in [
-        full_blob,
+    for (idx, blob) in [
+        segmented_full_blob_src,
         one_chunk,
         two_chunks,
         max_chunks,
@@ -298,11 +299,113 @@ fn blob_circuit_completeness() {
         nonempty_chunk_followed_by_empty_chunk,
         empty_and_nonempty_chunks,
         all_empty_except_last,
-    ] {
-        assert_eq!(check_data(BatchData::from(&blob)), Ok(()), "{:?}", blob);
+    ]
+    .into_iter()
+    .enumerate()
+    {
+        let batch_data = BatchData::from(&blob);
+
+        // First blob is purposely constructed to take full blob space
+        if idx == 0 {
+            let encoded_len = batch_data.get_encoded_batch_data_bytes().len();
+            assert_eq!(
+                encoded_len, N_BLOB_BYTES,
+                "should be full blob: expected={N_BLOB_BYTES}, got={encoded_len}",
+            );
+        }
+
+        assert_eq!(check_data(batch_data), Ok(()), "{:?}", blob);
     }
 }
 
+#[test]
+fn zstd_encoding_consistency() {
+    // Load test blob bytes
+    let blob_bytes = hex::decode(
+        fs::read_to_string("./data/test_blobs/blob005.hex")
+            .expect("file path exists")
+            .trim(),
+    )
+    .expect("should load blob bytes");
+
+    // Leave out most significant byte for compressed data
+    let mut compressed: Vec<u8> = vec![];
+    for i in 0..blob_bytes.len() / 32 {
+        for j in 1..32usize {
+            compressed.push(blob_bytes[i * 32 + j]);
+        }
+    }
+
+    // Decode into original batch bytes
+    let MultiBlockProcessResult {
+        witness_rows: _w,
+        literal_bytes: _l,
+        fse_aux_tables: _f,
+        block_info_arr: _b,
+        sequence_info_arr: _s,
+        address_table_rows: _a,
+        sequence_exec_results,
+    } = process::<Fr>(&compressed, Value::known(Fr::from(123456789)));
+
+    // The decoded batch data consists of:
+    // - [0..182] bytes of metadata
+    // - [182..] remaining bytes of chunk data
+    let recovered_bytes = sequence_exec_results
+        .into_iter()
+        .flat_map(|r| r.recovered_bytes)
+        .collect::<Vec<u8>>();
+    let segmented_batch_data = BatchData::<MAX_AGG_SNARKS>::segment_with_metadata(recovered_bytes);
+
+    // Re-encode into blob bytes
+    let re_encoded_batch_data: BatchData<MAX_AGG_SNARKS> = BatchData::from(&segmented_batch_data);
+    let re_encoded_blob_bytes = re_encoded_batch_data.get_encoded_batch_data_bytes();
+
+    assert_eq!(compressed, re_encoded_blob_bytes, "Blob bytes must match");
+}
+
+#[test]
+fn zstd_encoding_consistency_from_batch() {
+    // Load test batch bytes
+    // batch274 contains batch bytes that will produce a full blob
+    let batch_bytes = hex::decode(
+        fs::read_to_string("./data/test_batches/batch274.hex")
+            .expect("file path exists")
+            .trim(),
+    )
+    .expect("should load batch bytes");
+    let segmented_batch_bytes =
+        BatchData::<MAX_AGG_SNARKS>::segment_with_metadata(batch_bytes.clone());
+
+    // Re-encode into blob bytes
+    let encoded_batch_data: BatchData<MAX_AGG_SNARKS> = BatchData::from(&segmented_batch_bytes);
+    let encoded_blob_bytes = encoded_batch_data.get_encoded_batch_data_bytes();
+
+    // full blob len sanity check
+    assert_eq!(
+        encoded_blob_bytes.len(),
+        N_BLOB_BYTES,
+        "full blob is the correct len"
+    );
+
+    // Decode into original batch bytes
+    let MultiBlockProcessResult {
+        witness_rows: _w,
+        literal_bytes: _l,
+        fse_aux_tables: _f,
+        block_info_arr: _b,
+        sequence_info_arr: _s,
+        address_table_rows: _a,
+        sequence_exec_results,
+    } = process::<Fr>(&encoded_blob_bytes, Value::known(Fr::from(123456789)));
+
+    let decoded_batch_bytes = sequence_exec_results
+        .into_iter()
+        .flat_map(|r| r.recovered_bytes)
+        .collect::<Vec<u8>>();
+
+    assert_eq!(batch_bytes, decoded_batch_bytes, "batch bytes must match");
+}
+
 fn generic_batch_data() -> BatchData<MAX_AGG_SNARKS> {
     BatchData::from(&vec![
         vec![3, 100, 24, 30],