Skip to content
This repository was archived by the owner on Apr 18, 2025. It is now read-only.

Add full blob encoding and consistency check #1379

Merged
merged 10 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aggregator/data/test_batches/batch274.hex

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions aggregator/data/test_blobs/blob005.hex

Large diffs are not rendered by default.

56 changes: 56 additions & 0 deletions aggregator/src/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,62 @@ pub struct BatchData<const N_SNARKS: usize> {
pub chunk_data: [Vec<u8>; N_SNARKS],
}

impl<const N_SNARKS: usize> BatchData<N_SNARKS> {
/// For raw batch bytes with metadata, this function segments the byte stream into chunk segments.
/// Metadata will be removed from the result.
pub fn segment_with_metadata(batch_bytes_with_metadata: Vec<u8>) -> Vec<Vec<u8>> {
let metadata_bytes = batch_bytes_with_metadata
.clone()
.into_iter()
.take(182)
.collect::<Vec<u8>>();
let batch_bytes = batch_bytes_with_metadata
.clone()
.into_iter()
.skip(182)
.collect::<Vec<u8>>();

// Decoded batch bytes require segmentation based on chunk length
let batch_data_len = batch_bytes.len();
let chunk_lens = metadata_bytes[2..]
.chunks(4)
.map(|chunk| {
chunk
.iter()
.fold(0usize, |acc, d| acc * 256usize + *d as usize)
})
.collect::<Vec<usize>>();

// length segments sanity check
let valid_chunks = metadata_bytes[1] as usize;
let calculated_len = chunk_lens.iter().take(valid_chunks).sum::<usize>();
assert_eq!(
batch_data_len, calculated_len,
"chunk segmentation len must add up to the correct value"
);

// reconstruct segments
let mut segmented_batch_data: Vec<Vec<u8>> = Vec::new();
let mut offset: usize = 0;
let mut segment: usize = 0;
while offset < batch_data_len {
segmented_batch_data.push(
batch_bytes
.clone()
.into_iter()
.skip(offset)
.take(chunk_lens[segment])
.collect::<Vec<u8>>(),
);

offset += chunk_lens[segment];
segment += 1;
}

segmented_batch_data
}
}

impl<const N_SNARKS: usize> From<&BatchHash<N_SNARKS>> for BatchData<N_SNARKS> {
fn from(batch_hash: &BatchHash<N_SNARKS>) -> Self {
Self::new(
Expand Down
132 changes: 117 additions & 15 deletions aggregator/src/tests/blob.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::aggregation::witgen::{process, MultiBlockProcessResult};
use crate::{
aggregation::{
AssignedBarycentricEvaluationConfig, BarycentricEvaluationConfig, BlobDataConfig, RlcConfig,
},
blob::{BatchData, PointEvaluationAssignments, N_BYTES_U256},
blob::{BatchData, PointEvaluationAssignments, N_BLOB_BYTES, N_BYTES_U256},
param::ConfigParams,
BatchDataConfig, MAX_AGG_SNARKS,
};
Expand Down Expand Up @@ -257,16 +258,16 @@ fn check_circuit(circuit: &BlobCircuit) -> Result<(), Vec<VerifyFailure>> {

#[test]
fn blob_circuit_completeness() {
// single chunk in batch, but the chunk has a size of N_ROWS_DATA
let full_blob = vec![
// batch274 contains batch bytes that will produce a full blob
hex::decode(
fs::read_to_string("./data/test_batches/batch274.hex")
.expect("file path exists")
.trim(),
)
.expect("should load full blob batch bytes"),
];
// Full blob test case
// batch274 contains batch bytes that will produce a full blob
let full_blob = hex::decode(
fs::read_to_string("./data/test_batches/batch274.hex")
.expect("file path exists")
.trim(),
)
.expect("should load full blob batch bytes");
// batch274 contains metadata
let segmented_full_blob_src = BatchData::<45>::segment_with_metadata(full_blob);

let all_empty_chunks: Vec<Vec<u8>> = vec![vec![]; MAX_AGG_SNARKS];
let one_chunk = vec![vec![2, 3, 4, 100, 1]];
Expand All @@ -288,8 +289,8 @@ fn blob_circuit_completeness() {
.chain(std::iter::once(vec![3, 100, 24, 30]))
.collect::<Vec<_>>();

for blob in [
full_blob,
for (idx, blob) in [
segmented_full_blob_src,
one_chunk,
two_chunks,
max_chunks,
Expand All @@ -298,11 +299,112 @@ fn blob_circuit_completeness() {
nonempty_chunk_followed_by_empty_chunk,
empty_and_nonempty_chunks,
all_empty_except_last,
] {
assert_eq!(check_data(BatchData::from(&blob)), Ok(()), "{:?}", blob);
]
.into_iter()
.enumerate()
{
let batch_data = BatchData::from(&blob);

// First blob is purposely constructed to take full blob space
if idx == 0 {
let encoded_len = batch_data.get_encoded_batch_data_bytes().len();
assert_eq!(
encoded_len, N_BLOB_BYTES,
"should be full blob: expected={N_BLOB_BYTES}, got={encoded_len}",
);
}

assert_eq!(check_data(batch_data), Ok(()), "{:?}", blob);
}
}

#[test]
fn zstd_encoding_consistency() {
// Load test blob bytes
let blob_bytes = hex::decode(
fs::read_to_string("./data/test_blobs/blob005.hex")
.expect("file path exists")
.trim(),
)
.expect("should load blob bytes");

// Leave out most significant byte for compressed data
let mut compressed: Vec<u8> = vec![];
for i in 0..blob_bytes.len() / 32 {
for j in 1..32usize {
compressed.push(blob_bytes[i * 32 + j]);
}
}

// Decode into original batch bytes
let MultiBlockProcessResult {
witness_rows: _w,
literal_bytes: _l,
fse_aux_tables: _f,
block_info_arr: _b,
sequence_info_arr: _s,
address_table_rows: _a,
sequence_exec_results,
} = process::<Fr>(&compressed, Value::known(Fr::from(123456789)));

// The decoded batch data consists of:
// - [0..182] bytes of metadata
// - [182..] remaining bytes of chunk data
let recovered_bytes = sequence_exec_results
.into_iter()
.flat_map(|r| r.recovered_bytes)
.collect::<Vec<u8>>();
let segmented_batch_data = BatchData::<45>::segment_with_metadata(recovered_bytes);

// Re-encode into blob bytes
let re_encoded_batch_data: BatchData<45> = BatchData::from(&segmented_batch_data);
let re_encoded_blob_bytes = re_encoded_batch_data.get_encoded_batch_data_bytes();

assert_eq!(compressed, re_encoded_blob_bytes, "Blob bytes must match");
}

#[test]
fn zstd_encoding_consistency_from_batch() {
// Load test batch bytes
// batch274 contains batch bytes that will produce a full blob
let batch_bytes = hex::decode(
fs::read_to_string("./data/test_batches/batch274.hex")
.expect("file path exists")
.trim(),
)
.expect("should load batch bytes");
let segmented_batch_bytes = BatchData::<45>::segment_with_metadata(batch_bytes.clone());

// Re-encode into blob bytes
let encoded_batch_data: BatchData<45> = BatchData::from(&segmented_batch_bytes);
let encoded_blob_bytes = encoded_batch_data.get_encoded_batch_data_bytes();

// full blob len sanity check
assert_eq!(
encoded_blob_bytes.len(),
N_BLOB_BYTES,
"full blob is the correct len"
);

// Decode into original batch bytes
let MultiBlockProcessResult {
witness_rows: _w,
literal_bytes: _l,
fse_aux_tables: _f,
block_info_arr: _b,
sequence_info_arr: _s,
address_table_rows: _a,
sequence_exec_results,
} = process::<Fr>(&encoded_blob_bytes, Value::known(Fr::from(123456789)));

let decoded_batch_bytes = sequence_exec_results
.into_iter()
.flat_map(|r| r.recovered_bytes)
.collect::<Vec<u8>>();

assert_eq!(batch_bytes, decoded_batch_bytes, "batch bytes must match");
}

fn generic_batch_data() -> BatchData<MAX_AGG_SNARKS> {
BatchData::from(&vec![
vec![3, 100, 24, 30],
Expand Down
Loading