Skip to content

Commit 54125eb

Browse files
Add full blob encoding and consistency check (#1379)
* Add full blob assertion * Update aggregator/src/tests/blob.rs Co-authored-by: Rohit Narurkar <[email protected]> * fmt * Debug zstd encoding * Add consistency test * TODOs * Add batch data metadata processing * fmt * chore: cleanup --------- Co-authored-by: Rohit Narurkar <[email protected]> Co-authored-by: Rohit Narurkar <[email protected]>
1 parent d32705f commit 54125eb

File tree

4 files changed

+184
-17
lines changed

4 files changed

+184
-17
lines changed

aggregator/data/test_batches/batch274.hex

+1-1
Large diffs are not rendered by default.

aggregator/data/test_blobs/blob005.hex

+1
Large diffs are not rendered by default.

aggregator/src/blob.rs

+64-1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ pub const N_DATA_BYTES_PER_COEFFICIENT: usize = 31;
3636
/// Data config. Since num_valid_chunks is u16, we use 2 bytes/rows.
3737
pub const N_ROWS_NUM_CHUNKS: usize = 2;
3838

39+
/// The number of rows to encode chunk size (u32).
40+
pub const N_ROWS_CHUNK_SIZE: usize = 4;
41+
3942
/// The number of bytes that we can fit in a blob. Note that each coefficient is represented in 32
4043
/// bytes, however, since those 32 bytes must represent a BLS12-381 scalar in its canonical form,
4144
/// we explicitly set the most-significant byte to 0, effectively utilising only 31 bytes.
@@ -74,6 +77,66 @@ pub struct BatchData<const N_SNARKS: usize> {
7477
pub chunk_data: [Vec<u8>; N_SNARKS],
7578
}
7679

80+
impl<const N_SNARKS: usize> BatchData<N_SNARKS> {
81+
/// For raw batch bytes with metadata, this function segments the byte stream into chunk segments.
82+
/// Metadata will be removed from the result.
83+
pub fn segment_with_metadata(batch_bytes_with_metadata: Vec<u8>) -> Vec<Vec<u8>> {
84+
let n_bytes_metadata = Self::n_rows_metadata();
85+
let metadata_bytes = batch_bytes_with_metadata
86+
.clone()
87+
.into_iter()
88+
.take(n_bytes_metadata)
89+
.collect::<Vec<u8>>();
90+
let batch_bytes = batch_bytes_with_metadata
91+
.clone()
92+
.into_iter()
93+
.skip(n_bytes_metadata)
94+
.collect::<Vec<u8>>();
95+
96+
// Decoded batch bytes require segmentation based on chunk length
97+
let batch_data_len = batch_bytes.len();
98+
let chunk_lens = metadata_bytes[N_ROWS_NUM_CHUNKS..]
99+
.chunks(N_ROWS_CHUNK_SIZE)
100+
.map(|chunk| {
101+
chunk
102+
.iter()
103+
.fold(0usize, |acc, &d| acc * 256usize + d as usize)
104+
})
105+
.collect::<Vec<usize>>();
106+
107+
// length segments sanity check
108+
let valid_chunks = metadata_bytes
109+
.iter()
110+
.take(N_ROWS_NUM_CHUNKS)
111+
.fold(0usize, |acc, &d| acc * 256usize + d as usize);
112+
let calculated_len = chunk_lens.iter().take(valid_chunks).sum::<usize>();
113+
assert_eq!(
114+
batch_data_len, calculated_len,
115+
"chunk segmentation len must add up to the correct value"
116+
);
117+
118+
// reconstruct segments
119+
let mut segmented_batch_data: Vec<Vec<u8>> = Vec::new();
120+
let mut offset: usize = 0;
121+
let mut segment: usize = 0;
122+
while offset < batch_data_len {
123+
segmented_batch_data.push(
124+
batch_bytes
125+
.clone()
126+
.into_iter()
127+
.skip(offset)
128+
.take(chunk_lens[segment])
129+
.collect::<Vec<u8>>(),
130+
);
131+
132+
offset += chunk_lens[segment];
133+
segment += 1;
134+
}
135+
136+
segmented_batch_data
137+
}
138+
}
139+
77140
impl<const N_SNARKS: usize> From<&BatchHash<N_SNARKS>> for BatchData<N_SNARKS> {
78141
fn from(batch_hash: &BatchHash<N_SNARKS>) -> Self {
79142
Self::new(
@@ -150,7 +213,7 @@ impl<const N_SNARKS: usize> BatchData<N_SNARKS> {
150213
/// The number of rows to encode the size of each chunk in a batch, in the Blob Data config.
151214
/// chunk_size is u32, we use 4 bytes/rows.
152215
const fn n_rows_chunk_sizes() -> usize {
153-
N_SNARKS * 4
216+
N_SNARKS * N_ROWS_CHUNK_SIZE
154217
}
155218

156219
/// The total number of rows in "digest rlc" and "digest bytes" sections.

aggregator/src/tests/blob.rs

+118-15
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1+
use crate::aggregation::witgen::{process, MultiBlockProcessResult};
12
use crate::{
23
aggregation::{
34
AssignedBarycentricEvaluationConfig, BarycentricEvaluationConfig, BlobDataConfig, RlcConfig,
45
},
5-
blob::{BatchData, PointEvaluationAssignments, N_BYTES_U256},
6+
blob::{BatchData, PointEvaluationAssignments, N_BLOB_BYTES, N_BYTES_U256},
67
param::ConfigParams,
78
BatchDataConfig, MAX_AGG_SNARKS,
89
};
@@ -257,16 +258,16 @@ fn check_circuit(circuit: &BlobCircuit) -> Result<(), Vec<VerifyFailure>> {
257258

258259
#[test]
259260
fn blob_circuit_completeness() {
260-
// single chunk in batch, but the chunk has a size of N_ROWS_DATA
261-
let full_blob = vec![
262-
// batch274 contains batch bytes that will produce a full blob
263-
hex::decode(
264-
fs::read_to_string("./data/test_batches/batch274.hex")
265-
.expect("file path exists")
266-
.trim(),
267-
)
268-
.expect("should load full blob batch bytes"),
269-
];
261+
// Full blob test case
262+
// batch274 contains batch bytes that will produce a full blob
263+
let full_blob = hex::decode(
264+
fs::read_to_string("./data/test_batches/batch274.hex")
265+
.expect("file path exists")
266+
.trim(),
267+
)
268+
.expect("should load full blob batch bytes");
269+
// batch274 contains metadata
270+
let segmented_full_blob_src = BatchData::<MAX_AGG_SNARKS>::segment_with_metadata(full_blob);
270271

271272
let all_empty_chunks: Vec<Vec<u8>> = vec![vec![]; MAX_AGG_SNARKS];
272273
let one_chunk = vec![vec![2, 3, 4, 100, 1]];
@@ -288,8 +289,8 @@ fn blob_circuit_completeness() {
288289
.chain(std::iter::once(vec![3, 100, 24, 30]))
289290
.collect::<Vec<_>>();
290291

291-
for blob in [
292-
full_blob,
292+
for (idx, blob) in [
293+
segmented_full_blob_src,
293294
one_chunk,
294295
two_chunks,
295296
max_chunks,
@@ -298,11 +299,113 @@ fn blob_circuit_completeness() {
298299
nonempty_chunk_followed_by_empty_chunk,
299300
empty_and_nonempty_chunks,
300301
all_empty_except_last,
301-
] {
302-
assert_eq!(check_data(BatchData::from(&blob)), Ok(()), "{:?}", blob);
302+
]
303+
.into_iter()
304+
.enumerate()
305+
{
306+
let batch_data = BatchData::from(&blob);
307+
308+
// First blob is purposely constructed to take full blob space
309+
if idx == 0 {
310+
let encoded_len = batch_data.get_encoded_batch_data_bytes().len();
311+
assert_eq!(
312+
encoded_len, N_BLOB_BYTES,
313+
"should be full blob: expected={N_BLOB_BYTES}, got={encoded_len}",
314+
);
315+
}
316+
317+
assert_eq!(check_data(batch_data), Ok(()), "{:?}", blob);
303318
}
304319
}
305320

321+
#[test]
322+
fn zstd_encoding_consistency() {
323+
// Load test blob bytes
324+
let blob_bytes = hex::decode(
325+
fs::read_to_string("./data/test_blobs/blob005.hex")
326+
.expect("file path exists")
327+
.trim(),
328+
)
329+
.expect("should load blob bytes");
330+
331+
// Leave out most significant byte for compressed data
332+
let mut compressed: Vec<u8> = vec![];
333+
for i in 0..blob_bytes.len() / 32 {
334+
for j in 1..32usize {
335+
compressed.push(blob_bytes[i * 32 + j]);
336+
}
337+
}
338+
339+
// Decode into original batch bytes
340+
let MultiBlockProcessResult {
341+
witness_rows: _w,
342+
literal_bytes: _l,
343+
fse_aux_tables: _f,
344+
block_info_arr: _b,
345+
sequence_info_arr: _s,
346+
address_table_rows: _a,
347+
sequence_exec_results,
348+
} = process::<Fr>(&compressed, Value::known(Fr::from(123456789)));
349+
350+
// The decoded batch data consists of:
351+
// - [0..182] bytes of metadata
352+
// - [182..] remaining bytes of chunk data
353+
let recovered_bytes = sequence_exec_results
354+
.into_iter()
355+
.flat_map(|r| r.recovered_bytes)
356+
.collect::<Vec<u8>>();
357+
let segmented_batch_data = BatchData::<MAX_AGG_SNARKS>::segment_with_metadata(recovered_bytes);
358+
359+
// Re-encode into blob bytes
360+
let re_encoded_batch_data: BatchData<MAX_AGG_SNARKS> = BatchData::from(&segmented_batch_data);
361+
let re_encoded_blob_bytes = re_encoded_batch_data.get_encoded_batch_data_bytes();
362+
363+
assert_eq!(compressed, re_encoded_blob_bytes, "Blob bytes must match");
364+
}
365+
366+
#[test]
367+
fn zstd_encoding_consistency_from_batch() {
368+
// Load test batch bytes
369+
// batch274 contains batch bytes that will produce a full blob
370+
let batch_bytes = hex::decode(
371+
fs::read_to_string("./data/test_batches/batch274.hex")
372+
.expect("file path exists")
373+
.trim(),
374+
)
375+
.expect("should load batch bytes");
376+
let segmented_batch_bytes =
377+
BatchData::<MAX_AGG_SNARKS>::segment_with_metadata(batch_bytes.clone());
378+
379+
// Re-encode into blob bytes
380+
let encoded_batch_data: BatchData<MAX_AGG_SNARKS> = BatchData::from(&segmented_batch_bytes);
381+
let encoded_blob_bytes = encoded_batch_data.get_encoded_batch_data_bytes();
382+
383+
// full blob len sanity check
384+
assert_eq!(
385+
encoded_blob_bytes.len(),
386+
N_BLOB_BYTES,
387+
"full blob is the correct len"
388+
);
389+
390+
// Decode into original batch bytes
391+
let MultiBlockProcessResult {
392+
witness_rows: _w,
393+
literal_bytes: _l,
394+
fse_aux_tables: _f,
395+
block_info_arr: _b,
396+
sequence_info_arr: _s,
397+
address_table_rows: _a,
398+
sequence_exec_results,
399+
} = process::<Fr>(&encoded_blob_bytes, Value::known(Fr::from(123456789)));
400+
401+
let decoded_batch_bytes = sequence_exec_results
402+
.into_iter()
403+
.flat_map(|r| r.recovered_bytes)
404+
.collect::<Vec<u8>>();
405+
406+
assert_eq!(batch_bytes, decoded_batch_bytes, "batch bytes must match");
407+
}
408+
306409
fn generic_batch_data() -> BatchData<MAX_AGG_SNARKS> {
307410
BatchData::from(&vec![
308411
vec![3, 100, 24, 30],

0 commit comments

Comments
 (0)