Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add full blob encoding and consistency check #1379

Merged
merged 10 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aggregator/data/test_batches/batch274.hex

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions aggregator/data/test_blobs/blob005.hex

Large diffs are not rendered by default.

65 changes: 64 additions & 1 deletion aggregator/src/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ pub const N_DATA_BYTES_PER_COEFFICIENT: usize = 31;
/// Data config. Since num_valid_chunks is u16, we use 2 bytes/rows.
pub const N_ROWS_NUM_CHUNKS: usize = 2;

/// The number of rows to encode chunk size (u32).
pub const N_ROWS_CHUNK_SIZE: usize = 4;

/// The number of bytes that we can fit in a blob. Note that each coefficient is represented in 32
/// bytes, however, since those 32 bytes must represent a BLS12-381 scalar in its canonical form,
/// we explicitly set the most-significant byte to 0, effectively utilising only 31 bytes.
Expand Down Expand Up @@ -74,6 +77,66 @@ pub struct BatchData<const N_SNARKS: usize> {
pub chunk_data: [Vec<u8>; N_SNARKS],
}

impl<const N_SNARKS: usize> BatchData<N_SNARKS> {
/// For raw batch bytes with metadata, this function segments the byte stream into chunk segments.
/// Metadata will be removed from the result.
pub fn segment_with_metadata(batch_bytes_with_metadata: Vec<u8>) -> Vec<Vec<u8>> {
let n_bytes_metadata = Self::n_rows_metadata();
let metadata_bytes = batch_bytes_with_metadata
.clone()
.into_iter()
.take(n_bytes_metadata)
.collect::<Vec<u8>>();
let batch_bytes = batch_bytes_with_metadata
.clone()
.into_iter()
.skip(n_bytes_metadata)
.collect::<Vec<u8>>();

// Decoded batch bytes require segmentation based on chunk length
let batch_data_len = batch_bytes.len();
let chunk_lens = metadata_bytes[N_ROWS_NUM_CHUNKS..]
.chunks(N_ROWS_CHUNK_SIZE)
.map(|chunk| {
chunk
.iter()
.fold(0usize, |acc, &d| acc * 256usize + d as usize)
})
.collect::<Vec<usize>>();

// length segments sanity check
let valid_chunks = metadata_bytes
.iter()
.take(N_ROWS_NUM_CHUNKS)
.fold(0usize, |acc, &d| acc * 256usize + d as usize);
let calculated_len = chunk_lens.iter().take(valid_chunks).sum::<usize>();
assert_eq!(
batch_data_len, calculated_len,
"chunk segmentation len must add up to the correct value"
);

// reconstruct segments
let mut segmented_batch_data: Vec<Vec<u8>> = Vec::new();
let mut offset: usize = 0;
let mut segment: usize = 0;
while offset < batch_data_len {
segmented_batch_data.push(
batch_bytes
.clone()
.into_iter()
.skip(offset)
.take(chunk_lens[segment])
.collect::<Vec<u8>>(),
);

offset += chunk_lens[segment];
segment += 1;
}

segmented_batch_data
}
}

impl<const N_SNARKS: usize> From<&BatchHash<N_SNARKS>> for BatchData<N_SNARKS> {
fn from(batch_hash: &BatchHash<N_SNARKS>) -> Self {
Self::new(
Expand Down Expand Up @@ -150,7 +213,7 @@ impl<const N_SNARKS: usize> BatchData<N_SNARKS> {
/// The number of rows to encode the size of each chunk in a batch, in the Blob Data config.
/// chunk_size is u32, we use 4 bytes/rows.
const fn n_rows_chunk_sizes() -> usize {
N_SNARKS * 4
N_SNARKS * N_ROWS_CHUNK_SIZE
}

/// The total number of rows in "digest rlc" and "digest bytes" sections.
Expand Down
133 changes: 118 additions & 15 deletions aggregator/src/tests/blob.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::aggregation::witgen::{process, MultiBlockProcessResult};
use crate::{
aggregation::{
AssignedBarycentricEvaluationConfig, BarycentricEvaluationConfig, BlobDataConfig, RlcConfig,
},
blob::{BatchData, PointEvaluationAssignments, N_BYTES_U256},
blob::{BatchData, PointEvaluationAssignments, N_BLOB_BYTES, N_BYTES_U256},
param::ConfigParams,
BatchDataConfig, MAX_AGG_SNARKS,
};
Expand Down Expand Up @@ -257,16 +258,16 @@ fn check_circuit(circuit: &BlobCircuit) -> Result<(), Vec<VerifyFailure>> {

#[test]
fn blob_circuit_completeness() {
// single chunk in batch, but the chunk has a size of N_ROWS_DATA
let full_blob = vec![
// batch274 contains batch bytes that will produce a full blob
hex::decode(
fs::read_to_string("./data/test_batches/batch274.hex")
.expect("file path exists")
.trim(),
)
.expect("should load full blob batch bytes"),
];
// Full blob test case
// batch274 contains batch bytes that will produce a full blob
let full_blob = hex::decode(
fs::read_to_string("./data/test_batches/batch274.hex")
.expect("file path exists")
.trim(),
)
.expect("should load full blob batch bytes");
// batch274 contains metadata
let segmented_full_blob_src = BatchData::<MAX_AGG_SNARKS>::segment_with_metadata(full_blob);

let all_empty_chunks: Vec<Vec<u8>> = vec![vec![]; MAX_AGG_SNARKS];
let one_chunk = vec![vec![2, 3, 4, 100, 1]];
Expand All @@ -288,8 +289,8 @@ fn blob_circuit_completeness() {
.chain(std::iter::once(vec![3, 100, 24, 30]))
.collect::<Vec<_>>();

for blob in [
full_blob,
for (idx, blob) in [
segmented_full_blob_src,
one_chunk,
two_chunks,
max_chunks,
Expand All @@ -298,11 +299,113 @@ fn blob_circuit_completeness() {
nonempty_chunk_followed_by_empty_chunk,
empty_and_nonempty_chunks,
all_empty_except_last,
] {
assert_eq!(check_data(BatchData::from(&blob)), Ok(()), "{:?}", blob);
]
.into_iter()
.enumerate()
{
let batch_data = BatchData::from(&blob);

// First blob is purposely constructed to take full blob space
if idx == 0 {
let encoded_len = batch_data.get_encoded_batch_data_bytes().len();
assert_eq!(
encoded_len, N_BLOB_BYTES,
"should be full blob: expected={N_BLOB_BYTES}, got={encoded_len}",
);
}

assert_eq!(check_data(batch_data), Ok(()), "{:?}", blob);
}
}

#[test]
fn zstd_encoding_consistency() {
// Load test blob bytes
let blob_bytes = hex::decode(
fs::read_to_string("./data/test_blobs/blob005.hex")
.expect("file path exists")
.trim(),
)
.expect("should load blob bytes");

// Leave out most significant byte for compressed data
let mut compressed: Vec<u8> = vec![];
for i in 0..blob_bytes.len() / 32 {
for j in 1..32usize {
compressed.push(blob_bytes[i * 32 + j]);
}
}

// Decode into original batch bytes
let MultiBlockProcessResult {
witness_rows: _w,
literal_bytes: _l,
fse_aux_tables: _f,
block_info_arr: _b,
sequence_info_arr: _s,
address_table_rows: _a,
sequence_exec_results,
} = process::<Fr>(&compressed, Value::known(Fr::from(123456789)));

// The decoded batch data consists of:
// - [0..182] bytes of metadata
// - [182..] remaining bytes of chunk data
let recovered_bytes = sequence_exec_results
.into_iter()
.flat_map(|r| r.recovered_bytes)
.collect::<Vec<u8>>();
let segmented_batch_data = BatchData::<MAX_AGG_SNARKS>::segment_with_metadata(recovered_bytes);

// Re-encode into blob bytes
let re_encoded_batch_data: BatchData<MAX_AGG_SNARKS> = BatchData::from(&segmented_batch_data);
let re_encoded_blob_bytes = re_encoded_batch_data.get_encoded_batch_data_bytes();

assert_eq!(compressed, re_encoded_blob_bytes, "Blob bytes must match");
}

#[test]
fn zstd_encoding_consistency_from_batch() {
// Load test batch bytes
// batch274 contains batch bytes that will produce a full blob
let batch_bytes = hex::decode(
fs::read_to_string("./data/test_batches/batch274.hex")
.expect("file path exists")
.trim(),
)
.expect("should load batch bytes");
let segmented_batch_bytes =
BatchData::<MAX_AGG_SNARKS>::segment_with_metadata(batch_bytes.clone());

// Re-encode into blob bytes
let encoded_batch_data: BatchData<MAX_AGG_SNARKS> = BatchData::from(&segmented_batch_bytes);
let encoded_blob_bytes = encoded_batch_data.get_encoded_batch_data_bytes();

// full blob len sanity check
assert_eq!(
encoded_blob_bytes.len(),
N_BLOB_BYTES,
"full blob is the correct len"
);

// Decode into original batch bytes
let MultiBlockProcessResult {
witness_rows: _w,
literal_bytes: _l,
fse_aux_tables: _f,
block_info_arr: _b,
sequence_info_arr: _s,
address_table_rows: _a,
sequence_exec_results,
} = process::<Fr>(&encoded_blob_bytes, Value::known(Fr::from(123456789)));

let decoded_batch_bytes = sequence_exec_results
.into_iter()
.flat_map(|r| r.recovered_bytes)
.collect::<Vec<u8>>();

assert_eq!(batch_bytes, decoded_batch_bytes, "batch bytes must match");
}

fn generic_batch_data() -> BatchData<MAX_AGG_SNARKS> {
BatchData::from(&vec![
vec![3, 100, 24, 30],
Expand Down
Loading