diff --git a/.gitignore b/.gitignore
index 2f0a46ef53..8de98ea8ed 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,6 @@
 .idea
 *.log
 *.json
-*.sh
\ No newline at end of file
+*.sh
+*.txt
+*.srs
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index 8b4898dea2..714ece9233 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -20,6 +20,25 @@ dependencies = [
  "opaque-debug 0.3.0",
 ]
 
+[[package]]
+name = "aggregator"
+version = "0.1.0"
+dependencies = [
+ "ark-std 0.4.0",
+ "env_logger 0.10.0",
+ "eth-types 0.1.0",
+ "ethers-core",
+ "halo2_proofs",
+ "itertools",
+ "log",
+ "rand",
+ "serde",
+ "serde_json",
+ "snark-verifier",
+ "snark-verifier-sdk",
+ "zkevm-circuits",
+]
+
 [[package]]
 name = "ahash"
 version = "0.7.6"
@@ -82,6 +101,17 @@ dependencies = [
  "rand",
 ]
 
+[[package]]
+name = "ark-std"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94893f1e0c6eeab764ade8dc4c0db24caf4fe7cbbaafc0eba0a9030f447b5185"
+dependencies = [
+ "colored",
+ "num-traits",
+ "rand",
+]
+
 [[package]]
 name = "array-init"
 version = "2.1.0"
@@ -541,7 +571,7 @@ dependencies = [
 name = "circuit-benchmarks"
 version = "0.1.0"
 dependencies = [
- "ark-std",
+ "ark-std 0.3.0",
  "bus-mapping",
  "env_logger 0.9.3",
  "eth-types 0.1.0",
@@ -2176,7 +2206,7 @@ name = "halo2_proofs"
 version = "0.2.0"
 source = "git+https://github.com/scroll-tech/halo2.git?branch=v0.4#3d40ae4968759ac4516c5f9c45ad20140e2d35d5"
 dependencies = [
- "ark-std",
+ "ark-std 0.3.0",
  "blake2b_simd",
  "cfg-if 0.1.10",
  "crossbeam",
diff --git a/Cargo.toml b/Cargo.toml
index 7698a67c3e..af51f7fcbf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,7 +11,8 @@ members = [
     "eth-types",
     "external-tracer",
     "mock",
-    "testool"
+    "testool",
+    "aggregator"
 ]
 
 [patch.crates-io]
diff --git a/aggregator/Cargo.toml b/aggregator/Cargo.toml
new file mode 100644
index 0000000000..759727ed5d
--- /dev/null
+++ b/aggregator/Cargo.toml
@@ -0,0 +1,29 @@
+[package]
+name = "aggregator"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+eth-types = { path = "../eth-types" }
+zkevm-circuits = { path = "../zkevm-circuits" }
+
+
+ark-std = "0.4.0"
+env_logger = "0.10.0"
+ethers-core = "0.17.0"
+log = "0.4"
+itertools = "0.10.3"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+rand = "0.8"
+
+halo2_proofs = { git = "https://github.com/privacy-scaling-explorations/halo2.git", tag = "v2023_02_02" }
+snark-verifier = { git = "https://github.com/scroll-tech/snark-verifier", branch = "develop" }
+snark-verifier-sdk = { git = "https://github.com/scroll-tech/snark-verifier", branch = "develop", default-features=false, features = ["loader_halo2", "loader_evm", "halo2-pse"] }
+
+
+[features]
+default = []
+print-trace = [ "ark-std/print-trace" ]
\ No newline at end of file
diff --git a/aggregator/README.md b/aggregator/README.md
new file mode 100644
index 0000000000..98d25c60c8
--- /dev/null
+++ b/aggregator/README.md
@@ -0,0 +1,59 @@
+Proof Aggregation
+-----
+
+![Architecture](./figures/architecture.png)
+
+This repo does proof aggregations for zkEVM proofs.
+
+## zkEVM circuit
+A zkEVM circuits generates a ZK proof for a chunk of blocks. It takes 64 field elements as its public input, consist of 
+- chunk's data hash digest: each byte is encoded in an Fr element
+- chunk's public input hash digest: each byte is encoded in an Fr element
+The total size for a public input is 64 bytes, encoded in 64 Fr element
+
+For the ease of testing, this repo implements a `MockCircuit` which hash same public input APIs as a zkEVM circuit. 
+
+## First compression circuit
+The first compression circuit takes in a fresh snark proof and generates a new (potentially small) snark proof. 
+The public inputs to the new snark proof consists of 
+- 12 elements from the accumulators
+    - an accumulator consists of 2 G1 elements, which are the left and right inputs to the pairing
+    - this is treated as 4 Fq elements, each decomposed into 3 limbs and encoded in Fr  
+- 64 elements from previous snark
+    - re-expose the same public inputs as the original snark
+
+The first compression circuit is configured [wide config file](./configs/compression_wide.config).
+
+## Second compression circuit
+
+The second compression circuit takes in a compressed snark proof and generates a new (potentially small) snark proof. 
+The public inputs to the new snark proof consists of 
+- 12 elements from the accumulators
+    - an accumulator consists of 2 G1 elements, which are the left and right inputs to the pairing
+    - this is treated as 4 Fq elements, each decomposed into 3 limbs and encoded in Fr  
+    - accumulator from the previous snark is accumulated into the current accumulator
+- 64 elements from previous snark
+    - skipping the first 12 elements which are previous accumulator, as they are already accumulated
+    - re-expose the rest 64 field elements as the public inputs 
+
+The second compression circuit is configured [thin config file](./configs/compression_thin.config).
+
+## Aggregation circuit
+An aggregation circuit takes in a batch of `k` proofs, each for a chunk of blocks. 
+It generates a single proof asserting the validity of all the proofs. 
+
+It also performs public input aggregation, i.e., reducing the `64k` public elements  into a fixed number of `144` elements:
+- 12 elements from accumulators, which accumulates all the previous `k` accumulators from each snark
+- 132 elements from the hashes
+    - first_chunk_prev_state_root: 32 Field elements
+    - last_chunk_post_state_root: 32 Field elements
+    - last_chunk_withdraw_root: 32 Field elements
+    - batch_public_input_hash: 32 Field elements
+    - chain_id: 8 Field elements
+
+In addition, it attests that, for chunks indexed from `0` to `k-1`,
+- batch_data_hash := keccak(chunk_0.data_hash || ... || chunk_k-1.data_hash) where chunk_i.data_hash is a public input to the i-th batch snark circuit
+- chunk_pi_hash := keccak(chain_id || prev_state_root || post_state_root || withdraw_root || chunk_data_hash) where chunk_data_hash is a public input to the i-th batch snark circuit
+- and the related field matches public input
+
+See [public input aggregation](./src/proof_aggregation/public_input_aggregation.rs) for the details of public input aggregation.
\ No newline at end of file
diff --git a/aggregator/configs/compression_thin.config b/aggregator/configs/compression_thin.config
new file mode 100644
index 0000000000..6975f69170
--- /dev/null
+++ b/aggregator/configs/compression_thin.config
@@ -0,0 +1 @@
+{"strategy":"Simple","degree":26,"num_advice":[1],"num_lookup_advice":[1],"num_fixed":1,"lookup_bits":20,"limb_bits":88,"num_limbs":3}
\ No newline at end of file
diff --git a/aggregator/configs/compression_wide.config b/aggregator/configs/compression_wide.config
new file mode 100644
index 0000000000..78bbf04075
--- /dev/null
+++ b/aggregator/configs/compression_wide.config
@@ -0,0 +1 @@
+{"strategy":"Simple","degree":22,"num_advice":[8],"num_lookup_advice":[1],"num_fixed":1,"lookup_bits":20,"limb_bits":88,"num_limbs":3}
diff --git a/aggregator/figures/architecture.png b/aggregator/figures/architecture.png
new file mode 100644
index 0000000000..d5d89ef432
Binary files /dev/null and b/aggregator/figures/architecture.png differ
diff --git a/aggregator/src/aggregation.rs b/aggregator/src/aggregation.rs
new file mode 100644
index 0000000000..c8ac19823e
--- /dev/null
+++ b/aggregator/src/aggregation.rs
@@ -0,0 +1,38 @@
+/// Circuit implementation of aggregation circuit.
+mod circuit;
+/// CircuitExt implementation of compression circuit.
+mod circuit_ext;
+/// Config for aggregation circuit
+mod config;
+
+pub use circuit::AggregationCircuit;
+pub use config::AggregationConfig;
+
+// TODO(ZZ): update to the right degree
+pub(crate) const LOG_DEGREE: u32 = 19;
+
+// ================================
+// indices for hash bytes
+// ================================
+//
+// the preimages are arranged as
+// - chain_id:          8 bytes
+// - prev_state_root    32 bytes
+// - post_state_root    32 bytes
+// - withdraw_root      32 bytes
+// - chunk_data_hash    32 bytes
+//
+// A chain_id is u64 and uses 8 bytes
+pub(crate) const CHAIN_ID_LEN: usize = 8;
+pub(crate) const PREV_STATE_ROOT_INDEX: usize = 8;
+pub(crate) const POST_STATE_ROOT_INDEX: usize = 40;
+pub(crate) const WITHDRAW_ROOT_INDEX: usize = 72;
+pub(crate) const CHUNK_DATA_HASH_INDEX: usize = 104;
+
+// Each round requires (NUM_ROUNDS+1) * DEFAULT_KECCAK_ROWS = 300 rows.
+// This library is hard coded for this parameter.
+// Modifying the following parameters may result into bugs.
+// Adopted from keccak circuit
+pub(crate) const DEFAULT_KECCAK_ROWS: usize = 12;
+// Adopted from keccak circuit
+pub(crate) const NUM_ROUNDS: usize = 24;
diff --git a/aggregator/src/aggregation/circuit.rs b/aggregator/src/aggregation/circuit.rs
new file mode 100644
index 0000000000..938be7410f
--- /dev/null
+++ b/aggregator/src/aggregation/circuit.rs
@@ -0,0 +1,357 @@
+use ark_std::{end_timer, start_timer};
+use halo2_proofs::{
+    circuit::{Layouter, SimpleFloorPlanner, Value},
+    halo2curves::bn256::{Bn256, Fq, Fr, G1Affine},
+    plonk::{Circuit, ConstraintSystem, Error},
+    poly::{commitment::ParamsProver, kzg::commitment::ParamsKZG},
+};
+use itertools::Itertools;
+use rand::Rng;
+use snark_verifier::{
+    loader::{
+        halo2::{
+            halo2_ecc::halo2_base::{self, AssignedValue, Context, ContextParams},
+            Halo2Loader,
+        },
+        native::NativeLoader,
+    },
+    pcs::kzg::{Bdfg21, Kzg, KzgAccumulator, KzgSuccinctVerifyingKey},
+    util::arithmetic::fe_to_limbs,
+};
+use snark_verifier_sdk::{
+    aggregate, flatten_accumulator, types::Svk, CircuitExt, Snark, SnarkWitness,
+};
+use zkevm_circuits::util::Challenges;
+
+use crate::{
+    aggregation::config::AggregationConfig,
+    core::{assign_batch_hashes, extract_accumulators_and_proof},
+    param::{ConfigParams, BITS, LIMBS},
+    BatchHash, ChunkHash, CHAIN_ID_LEN, POST_STATE_ROOT_INDEX, PREV_STATE_ROOT_INDEX,
+    WITHDRAW_ROOT_INDEX,
+};
+
+/// Aggregation circuit that does not re-expose any public inputs from aggregated snarks
+#[derive(Clone)]
+pub struct AggregationCircuit {
+    pub(crate) svk: KzgSuccinctVerifyingKey<G1Affine>,
+    pub(crate) snarks: Vec<SnarkWitness>,
+    // the public instance for this circuit consists of
+    // - an accumulator (12 elements)
+    // - the batch's public_input_hash (32 elements)
+    pub(crate) flattened_instances: Vec<Fr>,
+    // accumulation scheme proof, private input
+    pub(crate) as_proof: Value<Vec<u8>>,
+    // batch hash circuit for which the snarks are generated
+    pub(crate) batch_hash: BatchHash,
+}
+
+impl AggregationCircuit {
+    /// Build a new aggregation circuit for a list of __compressed__ snarks.
+    /// Requires the chunk hashes that are used for the __fresh__ snark
+    pub fn new(
+        params: &ParamsKZG<Bn256>,
+        snarks: &[Snark],
+        rng: impl Rng + Send,
+        chunk_hashes: &[ChunkHash],
+    ) -> Self {
+        let timer = start_timer!(|| "generate aggregation circuit");
+        // sanity: for each chunk we have a snark
+        assert_eq!(
+            snarks.len(),
+            chunk_hashes.len(),
+            "num of snarks ({}) does not match number of chunks ({})",
+            snarks.len(),
+            chunk_hashes.len(),
+        );
+        // sanity check: snarks's public input matches chunk_hashes
+        for (chunk, snark) in chunk_hashes.iter().zip(snarks.iter()) {
+            let chunk_hash_bytes = chunk.public_input_hash();
+            let snark_hash_bytes = &snark.instances[0];
+
+            for i in 0..32 {
+                // for each snark,
+                //  first 12 elements are accumulator
+                //  next 8 elements are chain id
+                //  next 32 elements are data hash (52=20+32)
+                //  next 32 elements are public_input_hash
+                //  data hash + public_input_hash = snark public input
+                assert_eq!(
+                    Fr::from(chunk.data_hash.as_bytes()[i] as u64),
+                    snark_hash_bytes[i + 20]
+                );
+
+                assert_eq!(
+                    Fr::from(chunk_hash_bytes[i] as u64),
+                    snark_hash_bytes[i + 52]
+                );
+            }
+        }
+
+        // extract the accumulators and proofs
+        let svk = params.get_g()[0].into();
+
+        // this aggregates MULTIPLE snarks
+        //  (instead of ONE as in proof compression)
+        let (accumulator, as_proof) = extract_accumulators_and_proof(params, snarks, rng);
+        let KzgAccumulator::<G1Affine, NativeLoader> { lhs, rhs } = accumulator;
+        let acc_instances = [lhs.x, lhs.y, rhs.x, rhs.y]
+            .map(fe_to_limbs::<Fq, Fr, LIMBS, BITS>)
+            .concat();
+
+        // extract the pi aggregation circuit's instances
+        let batch_hash = BatchHash::construct(chunk_hashes);
+        let public_input_hash = &batch_hash.instances()[0];
+
+        let flattened_instances: Vec<Fr> =
+            [acc_instances.as_slice(), public_input_hash.as_slice()].concat();
+
+        log::trace!("flattened instances during construction");
+        for (i, e) in flattened_instances.iter().enumerate() {
+            log::trace!("{}-th: {:?}", i, e);
+        }
+        end_timer!(timer);
+        Self {
+            svk,
+            snarks: snarks.iter().cloned().map_into().collect(),
+            flattened_instances,
+            as_proof: Value::known(as_proof),
+            batch_hash,
+        }
+    }
+
+    pub fn succinct_verifying_key(&self) -> &Svk {
+        &self.svk
+    }
+
+    pub fn snarks(&self) -> &[SnarkWitness] {
+        &self.snarks
+    }
+
+    pub fn as_proof(&self) -> Value<&[u8]> {
+        self.as_proof.as_ref().map(Vec::as_slice)
+    }
+}
+
+impl Circuit<Fr> for AggregationCircuit {
+    type Config = (AggregationConfig, Challenges);
+    type FloorPlanner = SimpleFloorPlanner;
+    fn without_witnesses(&self) -> Self {
+        unimplemented!()
+    }
+
+    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
+        let params = ConfigParams::aggregation_param();
+        let challenges = Challenges::construct(meta);
+        let config = AggregationConfig::configure(meta, &params, challenges);
+        log::info!(
+            "aggregation circuit configured with k = {} and {:?} advice columns",
+            params.degree,
+            params.num_advice
+        );
+        (config, challenges)
+    }
+
+    fn synthesize(
+        &self,
+        config: Self::Config,
+        mut layouter: impl Layouter<Fr>,
+    ) -> Result<(), Error> {
+        let (config, challenge) = config;
+
+        let witness_time = start_timer!(|| "synthesize | Aggregation Circuit");
+        config
+            .range()
+            .load_lookup_table(&mut layouter)
+            .expect("load range lookup table");
+        let mut first_pass = halo2_base::SKIP_FIRST_PASS;
+
+        // This circuit takes 3 steps
+        // - 1. use aggregation circuit to aggregate the multiple snarks into a single one;
+        //   re-export all the public input of the snarks, denoted by [snarks_instances], and the
+        //   accumulator [acc_instances]
+        // - 2. use public input aggregation circuit to aggregate the chunks; expose the instance
+        //   denoted by [pi_agg_instances]
+        // - 3. assert [snarks_instances] are private inputs used for public input aggregation
+        //   circuit
+
+        // ==============================================
+        // Step 1: snark aggregation circuit
+        // ==============================================
+        let mut accumulator_instances: Vec<AssignedValue<Fr>> = vec![];
+        let mut snark_inputs: Vec<AssignedValue<Fr>> = vec![];
+        layouter.assign_region(
+            || "aggregation",
+            |region| {
+                if first_pass {
+                    first_pass = false;
+                    return Ok(());
+                }
+                let ctx = Context::new(
+                    region,
+                    ContextParams {
+                        max_rows: config.gate().max_rows,
+                        num_context_ids: 1,
+                        fixed_columns: config.gate().constants.clone(),
+                    },
+                );
+
+                let ecc_chip = config.ecc_chip();
+                let loader = Halo2Loader::new(ecc_chip, ctx);
+
+                //
+                // extract the assigned values for
+                // - instances which are the public inputs of each chunk (prefixed with 12 instances
+                //   from previous accumulators)
+                // - new accumulator to be verified on chain
+                //
+                let (assigned_aggregation_instances, acc) = aggregate::<Kzg<Bn256, Bdfg21>>(
+                    &self.svk,
+                    &loader,
+                    &self.snarks,
+                    self.as_proof(),
+                );
+                log::trace!("aggregation circuit during assigning");
+                for (i, e) in assigned_aggregation_instances[0].iter().enumerate() {
+                    log::trace!("{}-th instance: {:?}", i, e.value)
+                }
+
+                // extract the following cells for later constraints
+                // - the accumulators
+                // - the public input from snark
+                accumulator_instances.extend(flatten_accumulator(acc).iter().copied());
+                // - the snark is not a fresh one, assigned_instances already contains an
+                //   accumulator so we want to skip the first 12 elements from the public input
+                snark_inputs.extend(
+                    assigned_aggregation_instances
+                        .iter()
+                        .flat_map(|instance_column| instance_column.iter().skip(20)),
+                );
+
+                config.range().finalize(&mut loader.ctx_mut());
+
+                loader.ctx_mut().print_stats(&["Range"]);
+
+                Ok(())
+            },
+        )?;
+
+        log::trace!("instance outside aggregation function");
+        for (i, e) in snark_inputs.iter().enumerate() {
+            log::trace!("{}-th instance: {:?}", i, e.value)
+        }
+        // assert the accumulator in aggregation instance matchs public input
+        for (i, v) in accumulator_instances.iter().enumerate() {
+            layouter.constrain_instance(v.cell(), config.instance, i)?;
+        }
+
+        // ==============================================
+        // step 2: public input aggregation circuit
+        // ==============================================
+        // extract all the hashes and load them to the hash table
+        let challenges = challenge.values(&layouter);
+
+        let timer = start_timer!(|| ("extract hash").to_string());
+        let preimages = self.batch_hash.extract_hash_preimages();
+        end_timer!(timer);
+
+        log::trace!("hash preimages");
+        for (i, e) in preimages.iter().enumerate() {
+            log::trace!("{}-th hash preimage {:02x?}", i, e)
+        }
+
+        let timer = start_timer!(|| ("load aux table").to_string());
+        config
+            .keccak_circuit_config
+            .load_aux_tables(&mut layouter)?;
+        end_timer!(timer);
+
+        let timer = start_timer!(|| ("assign cells").to_string());
+        let (hash_input_cells, hash_output_cells) = assign_batch_hashes(
+            &config.keccak_circuit_config,
+            &mut layouter,
+            challenges,
+            &preimages,
+        )?;
+        end_timer!(timer);
+
+        log::trace!("hash input");
+        for v in hash_input_cells.iter() {
+            for (i, c) in v.iter().enumerate() {
+                log::trace!("{}-th {:?}", i, c.value())
+            }
+        }
+        log::trace!("hash output");
+        for v in hash_output_cells.iter() {
+            for (i, c) in v.iter().enumerate() {
+                log::trace!("{}-th {:?}", i, c.value())
+            }
+        }
+
+        // ==============================================
+        // step 3: aggregation circuit and public input aggregation circuit
+        // share common inputs
+        // ==============================================
+        // aggregation circuit's public input:
+        // - for each chunk:
+        //      - data hash
+        //      - public input hash
+        // Those are used as private inputs to the public input aggregation circuit
+        layouter.assign_region(
+            || "glue circuits",
+            |mut region| {
+                if first_pass {
+                    first_pass = false;
+                    return Ok(());
+                }
+
+                for chunk_idx in 0..self.snarks.len() {
+                    // step 3.1, data hash
+                    // - batch_data_hash := keccak(chunk_0.data_hash || ... || chunk_k-1.data_hash)
+                    // where batch_data_hash is the second hash for pi aggregation
+                    for i in 0..32 {
+                        region.constrain_equal(
+                            // the first 32 inputs for the snark
+                            snark_inputs[64 * chunk_idx + i].cell(),
+                            hash_input_cells[1][chunk_idx * 32 + i].cell(),
+                        )?;
+                    }
+                    // step 3.2, public input hash
+                    // the public input hash for the i-th snark is the (i+2)-th hash
+                    for i in 0..4 {
+                        for j in 0..8 {
+                            region.constrain_equal(
+                                // the second 32 inputs for the snark
+                                snark_inputs[64 * chunk_idx + i * 8 + j + 32].cell(),
+                                hash_output_cells[chunk_idx + 2][(3 - i) * 8 + j].cell(),
+                            )?;
+                        }
+                    }
+                }
+
+                Ok(())
+            },
+        )?;
+
+        // ====================================================
+        // Last step: Constraint the hash data matches the public input
+        // ====================================================
+        let acc_len = 12;
+        {
+            // batch_public_input_hash
+            for i in 0..4 {
+                for j in 0..8 {
+                    // digest in circuit has a different endianness
+                    layouter.constrain_instance(
+                        hash_output_cells[0][(3 - i) * 8 + j].cell(),
+                        config.instance,
+                        i * 8 + j + acc_len,
+                    )?;
+                }
+            }
+        }
+
+        end_timer!(witness_time);
+        Ok(())
+    }
+}
diff --git a/aggregator/src/aggregation/circuit_ext.rs b/aggregator/src/aggregation/circuit_ext.rs
new file mode 100644
index 0000000000..9f6b09c5b3
--- /dev/null
+++ b/aggregator/src/aggregation/circuit_ext.rs
@@ -0,0 +1,31 @@
+use halo2_proofs::{halo2curves::bn256::Fr, plonk::Selector};
+use snark_verifier_sdk::CircuitExt;
+
+use crate::param::LIMBS;
+
+use super::AggregationCircuit;
+
+impl CircuitExt<Fr> for AggregationCircuit {
+    fn num_instance(&self) -> Vec<usize> {
+        // accumulator [..lhs, ..rhs]
+        let acc_len = 4 * LIMBS;
+        // 32 elements for batch's public_input_hash
+        vec![acc_len + 32]
+    }
+
+    fn instances(&self) -> Vec<Vec<Fr>> {
+        vec![self.flattened_instances.clone()]
+    }
+
+    fn accumulator_indices() -> Option<Vec<(usize, usize)>> {
+        // the accumulator are the first 12 cells in the instance
+        Some((0..4 * LIMBS).map(|idx| (0, idx)).collect())
+    }
+
+    fn selectors(config: &Self::Config) -> Vec<Selector> {
+        config.0.gate().basic_gates[0]
+            .iter()
+            .map(|gate| gate.q_enable)
+            .collect()
+    }
+}
diff --git a/aggregator/src/aggregation/config.rs b/aggregator/src/aggregation/config.rs
new file mode 100644
index 0000000000..20664b6f04
--- /dev/null
+++ b/aggregator/src/aggregation/config.rs
@@ -0,0 +1,124 @@
+use halo2_proofs::{
+    halo2curves::bn256::{Fq, Fr, G1Affine},
+    plonk::{Column, ConstraintSystem, Instance},
+};
+use snark_verifier::{
+    loader::halo2::halo2_ecc::{
+        ecc::{BaseFieldEccChip, EccChip},
+        fields::fp::FpConfig,
+        halo2_base::gates::{flex_gate::FlexGateConfig, range::RangeConfig},
+    },
+    util::arithmetic::modulus,
+};
+use zkevm_circuits::{
+    keccak_circuit::{KeccakCircuitConfig, KeccakCircuitConfigArgs},
+    table::KeccakTable,
+    util::{Challenges, SubCircuitConfig},
+};
+
+use crate::param::{ConfigParams, BITS, LIMBS};
+
+#[derive(Debug, Clone)]
+#[rustfmt::skip]
+/// Configurations for aggregation circuit.
+/// This config is hard coded for BN256 curve.
+pub struct AggregationConfig {
+    /// Non-native field chip configurations
+    pub base_field_config: FpConfig<Fr, Fq>,
+    /// Keccak circuit configurations
+    pub keccak_circuit_config: KeccakCircuitConfig<Fr>,
+    /// Instance for public input; stores
+    /// - accumulator from aggregation (12 elements)
+    /// - batch_public_input_hash (32 elements)
+    pub instance: Column<Instance>,
+}
+
+impl AggregationConfig {
+    /// Build a configuration from parameters.
+    pub fn configure(
+        meta: &mut ConstraintSystem<Fr>,
+        params: &ConfigParams,
+        challenges: Challenges,
+    ) -> Self {
+        assert!(
+            params.limb_bits == BITS && params.num_limbs == LIMBS,
+            "For now we fix limb_bits = {}, otherwise change code",
+            BITS
+        );
+
+        // base field configuration for aggregation circuit
+        let base_field_config = FpConfig::configure(
+            meta,
+            params.strategy.clone(),
+            &params.num_advice,
+            &params.num_lookup_advice,
+            params.num_fixed,
+            params.lookup_bits,
+            BITS,
+            LIMBS,
+            modulus::<Fq>(),
+            0,
+            params.degree as usize,
+        );
+
+        // hash configuration for aggregation circuit
+        let keccak_circuit_config = {
+            let keccak_table = KeccakTable::construct(meta);
+            let challenges_exprs = challenges.exprs(meta);
+
+            let keccak_circuit_config_args = KeccakCircuitConfigArgs {
+                keccak_table,
+                challenges: challenges_exprs,
+            };
+
+            KeccakCircuitConfig::new(meta, keccak_circuit_config_args)
+        };
+
+        // The current code base is hardcoded for KeccakCircuit configured
+        // with 300 rows and 87 columns per hash call.
+        let columns = keccak_circuit_config.cell_manager.columns();
+
+        assert_eq!(
+            columns.len(),
+            87,
+            "cell manager configuration does not match the hard coded setup"
+        );
+
+        // enabling equality for preimage and digest columns
+        meta.enable_equality(columns[6].advice);
+        // digest column
+        meta.enable_equality(columns.last().unwrap().advice);
+
+        // Instance column stores public input column
+        // - the accumulator
+        // - the batch public input hash
+        let instance = meta.instance_column();
+        meta.enable_equality(instance);
+
+        Self {
+            base_field_config,
+            keccak_circuit_config,
+            instance,
+        }
+    }
+
+    /// Expose the instance column
+    pub fn instance_column(&self) -> Column<Instance> {
+        self.instance
+    }
+
+    /// Range gate configuration
+    pub fn range(&self) -> &RangeConfig<Fr> {
+        &self.base_field_config.range
+    }
+
+    /// Flex gate configuration
+    pub fn gate(&self) -> &FlexGateConfig<Fr> {
+        &self.base_field_config.range.gate
+    }
+
+    /// Ecc gate configuration
+    pub fn ecc_chip(&self) -> BaseFieldEccChip<G1Affine> {
+        EccChip::construct(self.base_field_config.clone())
+    }
+}
diff --git a/aggregator/src/batch.rs b/aggregator/src/batch.rs
new file mode 100644
index 0000000000..0632935c61
--- /dev/null
+++ b/aggregator/src/batch.rs
@@ -0,0 +1,197 @@
+//! This module implements related functions that aggregates public inputs of many chunks into a
+//! single one.
+//!
+//! # Spec
+//!
+//! A chunk is a list of continuous blocks. It consists of 4 hashes:
+//! - state root before this chunk
+//! - state root after this chunk
+//! - the withdraw root of this chunk
+//! - the data hash of this chunk
+//! Those 4 hashes are obtained from the caller.
+//!
+//! A chunk's public input hash is then derived from the above 4 attributes via
+//!
+//! - chunk_pi_hash   := keccak(chain_id || prev_state_root || post_state_root || withdraw_root ||
+//!   chunk_data_hash)
+//!
+//! A batch is a list of continuous chunks. It consists of 2 hashes
+//!
+//! - batch_data_hash := keccak(chunk_0.data_hash || ... || chunk_k-1.data_hash)
+//!
+//! - batch_pi_hash   := keccak(chain_id || chunk_0.prev_state_root || chunk_k-1.post_state_root ||
+//!   chunk_k-1.withdraw_root || batch_data_hash)
+//!
+//! Note that chain_id is used for all public input hashes. But not for any data hashes.
+//!
+//! # Circuit
+//!
+//! A BatchHashCircuit asserts that the batch is well-formed.
+//!
+//! ## Public Input
+//! The public inputs of the circuit (32 Field elements) is constructed as
+//! - batch_pi_hash: 32 Field elements
+//!
+//! ## Constraints
+//! The circuit attests the following statements:
+//!
+//! 1. all hashes are computed correctly
+//! 2. the relations between hash preimages and digests are satisfied
+//!     - batch_data_hash is part of the input to compute batch_pi_hash
+//!     - batch_pi_hash used same roots as chunk_pi_hash
+//!     - same data_hash is used to compute batch_data_hash and chunk_pi_hash for all chunks
+//!     - chunks are continuous: they are linked via the state roots
+//!     - all hashes uses a same chain_id
+//! 3. the batch_pi_hash matches the circuit's public input (32 field elements) above
+
+use eth_types::{Field, H256};
+use ethers_core::utils::keccak256;
+
+use super::chunk::ChunkHash;
+
+#[derive(Default, Debug, Clone)]
+/// A batch is a set of continuous chunks.
+/// A BatchHash consists of 2 hashes.
+/// - batch_data_hash := keccak(chunk_0.data_hash || ... || chunk_k-1.data_hash)
+/// - batch_pi_hash   := keccak(chain_id || chunk_0.prev_state_root || chunk_k-1.post_state_root ||
+///   chunk_k-1.withdraw_root || batch_data_hash)
+pub struct BatchHash {
+    pub(crate) chain_id: u64,
+    pub(crate) chunks: Vec<ChunkHash>,
+    pub(crate) data_hash: H256,
+    pub(crate) public_input_hash: H256,
+}
+
+impl BatchHash {
+    /// Sample a batch hash circuit from random (for testing)
+    #[cfg(test)]
+    pub(crate) fn mock_batch_hash_circuit<R: rand::RngCore>(r: &mut R, size: usize) -> Self {
+        let mut chunks = (0..size)
+            .map(|_| ChunkHash::mock_chunk_hash(r))
+            .collect::<Vec<_>>();
+        for i in 0..size - 1 {
+            chunks[i + 1].prev_state_root = chunks[i].post_state_root;
+        }
+
+        Self::construct(&chunks)
+    }
+
+    /// Build Batch hash from a list of chunks
+    pub(crate) fn construct(chunk_hashes: &[ChunkHash]) -> Self {
+        assert!(!chunk_hashes.is_empty(), "input chunk slice is empty");
+
+        // sanity: the chunks are continuous
+        for i in 0..chunk_hashes.len() - 1 {
+            assert_eq!(
+                chunk_hashes[i].post_state_root,
+                chunk_hashes[i + 1].prev_state_root,
+            );
+            assert_eq!(chunk_hashes[i].chain_id, chunk_hashes[i + 1].chain_id,)
+        }
+
+        // batch's data hash is build as
+        //  keccak( chunk[0].data_hash || ... || chunk[k-1].data_hash)
+        let preimage = chunk_hashes
+            .iter()
+            .flat_map(|chunk_hash| chunk_hash.data_hash.0.iter())
+            .cloned()
+            .collect::<Vec<_>>();
+        let data_hash = keccak256(preimage);
+
+        // public input hash is build as
+        //  keccak(
+        //      chain_id ||
+        //      chunk[0].prev_state_root ||
+        //      chunk[k-1].post_state_root ||
+        //      chunk[k-1].withdraw_root ||
+        //      batch_data_hash )
+        let preimage = [
+            chunk_hashes[0].chain_id.to_be_bytes().as_ref(),
+            chunk_hashes[0].prev_state_root.as_bytes(),
+            chunk_hashes.last().unwrap().post_state_root.as_bytes(),
+            chunk_hashes.last().unwrap().withdraw_root.as_bytes(),
+            data_hash.as_slice(),
+        ]
+        .concat();
+        let public_input_hash = keccak256(preimage);
+
+        Self {
+            chain_id: chunk_hashes[0].chain_id,
+            chunks: chunk_hashes.to_vec(),
+            data_hash: data_hash.into(),
+            public_input_hash: public_input_hash.into(),
+        }
+    }
+
+    /// Extract all the hash inputs that will ever be used
+    /// orders:
+    /// - batch_public_input_hash
+    /// - batch_data_hash_preimage
+    /// - chunk\[i\].piHash for i in \[0, k)
+    pub(crate) fn extract_hash_preimages(&self) -> Vec<Vec<u8>> {
+        let mut res = vec![];
+
+        // batchPiHash =
+        //  keccak(
+        //      chain_id ||
+        //      chunk[0].prev_state_root ||
+        //      chunk[k-1].post_state_root ||
+        //      chunk[k-1].withdraw_root ||
+        //      batch_data_hash )
+        let batch_public_input_hash_preimage = [
+            self.chain_id.to_be_bytes().as_ref(),
+            self.chunks[0].prev_state_root.as_bytes(),
+            self.chunks.last().unwrap().post_state_root.as_bytes(),
+            self.chunks.last().unwrap().withdraw_root.as_bytes(),
+            self.data_hash.as_bytes(),
+        ]
+        .concat();
+        res.push(batch_public_input_hash_preimage);
+
+        // batchDataHash = keccak(chunk[0].dataHash || ... || chunk[k-1].dataHash)
+        let batch_data_hash_preimage = self
+            .chunks
+            .iter()
+            .flat_map(|x| x.data_hash.as_bytes().iter())
+            .cloned()
+            .collect();
+        res.push(batch_data_hash_preimage);
+
+        // compute piHash for each chunk for i in [0..k)
+        // chunk[i].piHash =
+        // keccak(
+        //        chain id ||
+        //        chunk[i].prevStateRoot || chunk[i].postStateRoot || chunk[i].withdrawRoot ||
+        //        chunk[i].datahash)
+        for chunk in self.chunks.iter() {
+            let chunk_pi_hash_preimage = [
+                self.chain_id.to_be_bytes().as_ref(),
+                chunk.prev_state_root.as_bytes(),
+                chunk.post_state_root.as_bytes(),
+                chunk.withdraw_root.as_bytes(),
+                chunk.data_hash.as_bytes(),
+            ]
+            .concat();
+            res.push(chunk_pi_hash_preimage)
+        }
+
+        res
+    }
+
+    fn num_instance(&self) -> Vec<usize> {
+        // 12 elements from the accumulators
+        // 32 elements from batch_data_hash_digest
+        vec![44]
+    }
+
+    /// Compute the public inputs for this circuit
+    /// which is the public_input_hash
+    pub(crate) fn instances<F: Field>(&self) -> Vec<Vec<F>> {
+        vec![self
+            .public_input_hash
+            .as_bytes()
+            .iter()
+            .map(|&x| F::from(x as u64))
+            .collect()]
+    }
+}
diff --git a/aggregator/src/chunk.rs b/aggregator/src/chunk.rs
new file mode 100644
index 0000000000..84712936dc
--- /dev/null
+++ b/aggregator/src/chunk.rs
@@ -0,0 +1,112 @@
+//! This module implements `Chunk` related data types.
+//! A chunk is a list of blocks.
+use eth_types::{ToBigEndian, H256};
+use ethers_core::utils::keccak256;
+use halo2_proofs::halo2curves::bn256::Fr;
+use std::iter;
+use zkevm_circuits::witness::Block;
+
+#[derive(Default, Debug, Clone, Copy)]
+/// A chunk is a set of continuous blocks.
+/// A ChunkHash consists of 4 hashes, representing the changes incurred by this chunk of blocks:
+/// - state root before this chunk
+/// - state root after this chunk
+/// - the withdraw root of this chunk
+/// - the data hash of this chunk
+pub struct ChunkHash {
+    /// Chain identifier
+    pub(crate) chain_id: u64,
+    /// state root before this chunk
+    pub(crate) prev_state_root: H256,
+    /// state root after this chunk
+    pub(crate) post_state_root: H256,
+    /// the withdraw root of this chunk
+    pub(crate) withdraw_root: H256,
+    /// the data hash of this chunk
+    pub(crate) data_hash: H256,
+}
+
+impl From<&Block<Fr>> for ChunkHash {
+    fn from(block: &Block<Fr>) -> Self {
+        // <https://github.com/scroll-tech/zkevm-circuits/blob/25dd32aa316ec842ffe79bb8efe9f05f86edc33e/bus-mapping/src/circuit_input_builder.rs#L690>
+
+        let data_bytes = iter::empty()
+            .chain(block.context.ctxs.iter().flat_map(|(b_num, b_ctx)| {
+                let num_txs = block
+                    .txs
+                    .iter()
+                    .filter(|tx| tx.block_number == *b_num)
+                    .count() as u16;
+
+                iter::empty()
+                    // Block Values
+                    .chain(b_ctx.number.as_u64().to_be_bytes())
+                    .chain(b_ctx.timestamp.as_u64().to_be_bytes())
+                    .chain(b_ctx.base_fee.to_be_bytes())
+                    .chain(b_ctx.gas_limit.to_be_bytes())
+                    .chain(num_txs.to_be_bytes())
+            }))
+            // Tx Hashes
+            .chain(block.txs.iter().flat_map(|tx| tx.hash.to_fixed_bytes()))
+            .collect::<Vec<u8>>();
+
+        let data_hash = H256(keccak256(data_bytes));
+
+        let post_state_root = block
+            .context
+            .ctxs
+            .last_key_value()
+            .map(|(_, b_ctx)| b_ctx.eth_block.state_root)
+            .unwrap_or(H256(block.prev_state_root.to_be_bytes()));
+
+        Self {
+            chain_id: block.chain_id,
+            prev_state_root: H256(block.prev_state_root.to_be_bytes()),
+            post_state_root,
+            withdraw_root: H256(block.withdraw_root.to_be_bytes()),
+            data_hash,
+        }
+    }
+}
+
+impl ChunkHash {
+    /// Sample a chunk hash from random (for testing)
+    #[cfg(test)]
+    pub(crate) fn mock_chunk_hash<R: rand::RngCore>(r: &mut R) -> Self {
+        let mut prev_state_root = [0u8; 32];
+        r.fill_bytes(&mut prev_state_root);
+        let mut post_state_root = [0u8; 32];
+        r.fill_bytes(&mut post_state_root);
+        let mut withdraw_root = [0u8; 32];
+        r.fill_bytes(&mut withdraw_root);
+        let mut data_hash = [0u8; 32];
+        r.fill_bytes(&mut data_hash);
+        Self {
+            chain_id: 0,
+            prev_state_root: prev_state_root.into(),
+            post_state_root: post_state_root.into(),
+            withdraw_root: withdraw_root.into(),
+            data_hash: data_hash.into(),
+        }
+    }
+
+    /// Public input hash for a given chunk is defined as
+    ///  keccak( chain id || prev state root || post state root || withdraw root || data hash )
+    pub fn public_input_hash(&self) -> H256 {
+        let preimage = self.extract_hash_preimage();
+        keccak256::<&[u8]>(preimage.as_ref()).into()
+    }
+
+    /// Extract the preimage for the hash
+    ///  chain id || prev state root || post state root || withdraw root || data hash
+    pub fn extract_hash_preimage(&self) -> Vec<u8> {
+        [
+            self.chain_id.to_be_bytes().as_ref(),
+            self.prev_state_root.as_bytes(),
+            self.post_state_root.as_bytes(),
+            self.withdraw_root.as_bytes(),
+            self.data_hash.as_bytes(),
+        ]
+        .concat()
+    }
+}
diff --git a/aggregator/src/compression.rs b/aggregator/src/compression.rs
new file mode 100644
index 0000000000..5ed296ac86
--- /dev/null
+++ b/aggregator/src/compression.rs
@@ -0,0 +1,14 @@
+//! Input a proof, a compression circuit generates a new proof that may have smaller size.
+//!
+//! It re-exposes same public inputs from the input snark.
+//! All this circuit does is to reduce the proof size.
+
+/// Circuit implementation of compression circuit.
+mod circuit;
+/// CircuitExt implementation of compression circuit.
+mod circuit_ext;
+/// Config for compression circuit
+mod config;
+
+pub use circuit::CompressionCircuit;
+pub use config::CompressionConfig;
diff --git a/aggregator/src/compression/circuit.rs b/aggregator/src/compression/circuit.rs
new file mode 100644
index 0000000000..cb55d7f575
--- /dev/null
+++ b/aggregator/src/compression/circuit.rs
@@ -0,0 +1,237 @@
+//! Circuit implementation for compression circuit.
+
+use std::fs::File;
+
+use ark_std::{end_timer, start_timer};
+use halo2_proofs::{
+    circuit::{Layouter, SimpleFloorPlanner, Value},
+    halo2curves::bn256::{Fq, G1Affine},
+    plonk::{Circuit, ConstraintSystem, Error},
+};
+use rand::Rng;
+use snark_verifier::{
+    loader::{
+        halo2::{
+            halo2_ecc::halo2_base::{
+                self,
+                halo2_proofs::{
+                    halo2curves::bn256::{Bn256, Fr},
+                    poly::{commitment::ParamsProver, kzg::commitment::ParamsKZG},
+                },
+                Context, ContextParams,
+            },
+            Halo2Loader,
+        },
+        native::NativeLoader,
+    },
+    pcs::kzg::{Bdfg21, Kzg, KzgAccumulator, KzgSuccinctVerifyingKey},
+    util::arithmetic::fe_to_limbs,
+};
+use snark_verifier_sdk::{aggregate, flatten_accumulator, types::Svk, Snark, SnarkWitness};
+
+use crate::{
+    core::extract_accumulators_and_proof,
+    param::{ConfigParams, BITS, LIMBS},
+};
+
+use super::config::CompressionConfig;
+
+/// Input a proof, this compression circuit generates a new proof that may have smaller size.
+///
+/// It re-exposes same public inputs from the input snark.
+/// All this circuit does is to reduce the proof size.
+#[derive(Clone)]
+pub struct CompressionCircuit {
+    pub(crate) svk: KzgSuccinctVerifyingKey<G1Affine>,
+    pub(crate) snark: SnarkWitness,
+    /// whether this circuit compresses a fresh snark
+    pub(crate) is_fresh: bool,
+    /// instances, flattened.
+    /// It re-exposes same public inputs from the input snark.
+    /// If the previous snark is already a compressed, this flattened_instances will
+    /// exclude the previous accumulator.
+    pub(crate) flattened_instances: Vec<Fr>,
+    // accumulation scheme proof, private input
+    pub(crate) as_proof: Value<Vec<u8>>,
+}
+
+impl Circuit<Fr> for CompressionCircuit {
+    type Config = CompressionConfig;
+    type FloorPlanner = SimpleFloorPlanner;
+
+    fn without_witnesses(&self) -> Self {
+        let flattened_instances = self
+            .snark
+            .instances
+            .iter()
+            .flat_map(|instance| instance.iter().map(|_| Fr::zero()))
+            .collect();
+
+        Self {
+            svk: self.svk,
+            snark: SnarkWitness::without_witnesses(&self.snark),
+            is_fresh: true,
+            flattened_instances,
+            as_proof: Value::unknown(),
+        }
+    }
+
+    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
+        // Too bad that configure function doesn't take additional input
+        // it would be nicer to load parameters from API rather than ENV
+        let path = std::env::var("VERIFY_CONFIG")
+            .unwrap_or_else(|_| "configs/verify_circuit.config".to_owned());
+        let params: ConfigParams = serde_json::from_reader(
+            File::open(path.as_str()).unwrap_or_else(|_| panic!("{path:?} does not exist")),
+        )
+        .unwrap();
+
+        log::info!(
+            "compression circuit configured with k = {} and {:?} advice columns",
+            params.degree,
+            params.num_advice
+        );
+
+        // circuit configuration is built from config with given num columns etc
+        // can be wide or thin circuit
+        Self::Config::configure(meta, params)
+    }
+
+    fn synthesize(
+        &self,
+        config: Self::Config,
+        mut layouter: impl Layouter<Fr>,
+    ) -> Result<(), Error> {
+        let witness_time = start_timer!(|| "synthesize | compression Circuit");
+        config
+            .range()
+            .load_lookup_table(&mut layouter)
+            .expect("load range lookup table");
+        let mut first_pass = halo2_base::SKIP_FIRST_PASS;
+        let mut instances = vec![];
+        layouter.assign_region(
+            || "compression circuit",
+            |region| {
+                if first_pass {
+                    first_pass = false;
+                    return Ok(());
+                }
+                let ctx = Context::new(
+                    region,
+                    ContextParams {
+                        max_rows: config.gate().max_rows,
+                        num_context_ids: 1,
+                        fixed_columns: config.gate().constants.clone(),
+                    },
+                );
+
+                let ecc_chip = config.ecc_chip();
+                let loader = Halo2Loader::new(ecc_chip, ctx);
+                let (assigned_instances, acc) = aggregate::<Kzg<Bn256, Bdfg21>>(
+                    &self.svk,
+                    &loader,
+                    &[self.snark.clone()],
+                    self.as_proof(),
+                );
+
+                // instance of the compression circuit is defined as
+                // - accumulators
+                // - re-export the public input from snark
+                instances.extend(
+                    flatten_accumulator(acc)
+                        .iter()
+                        .map(|assigned| assigned.cell()),
+                );
+                // - if the snark is not a fresh one, assigned_instances already contains an
+                //   accumulator so we want to skip the first 12 elements from the public input
+                let skip = if self.is_fresh { 0 } else { 12 };
+                instances.extend(assigned_instances.iter().flat_map(|instance_column| {
+                    instance_column.iter().skip(skip).map(|x| x.cell())
+                }));
+
+                config.range().finalize(&mut loader.ctx_mut());
+
+                loader.ctx_mut().print_stats(&["Range"]);
+                Ok(())
+            },
+        )?;
+
+        // Expose instances
+        for (i, cell) in instances.into_iter().enumerate() {
+            layouter.constrain_instance(cell, config.instance, i)?;
+        }
+
+        end_timer!(witness_time);
+        Ok(())
+    }
+}
+
+impl CompressionCircuit {
+    /// Build a new circuit from a snark, with a flag whether this snark has been compressed before
+    pub fn new(
+        params: &ParamsKZG<Bn256>,
+        snark: Snark,
+        is_fresh: bool,
+        rng: impl Rng + Send,
+    ) -> Self {
+        let svk = params.get_g()[0].into();
+
+        // for the proof compression, only ONE snark is under accumulation
+        // it is turned into an accumulator via KzgAs accumulation scheme
+        // in case not first time:
+        // (old_accumulator, public inputs) -> (new_accumulator, public inputs)
+        let (accumulator, as_proof) = extract_accumulators_and_proof(params, &[snark.clone()], rng);
+
+        // the instance for the outer circuit is
+        // - new accumulator, consists of 12 elements
+        // - inner circuit's instance, flattened (old accumulator is stripped out if exists)
+        //
+        // it is important that new accumulator is the first 12 elements
+        // as specified in CircuitExt::accumulator_indices()
+        let KzgAccumulator::<G1Affine, NativeLoader> { lhs, rhs } = accumulator;
+        let acc_instances = [lhs.x, lhs.y, rhs.x, rhs.y]
+            .map(fe_to_limbs::<Fq, Fr, LIMBS, BITS>)
+            .concat();
+        // skip the old accumulator if exists
+        let skip = if is_fresh { 0 } else { 12 };
+        let snark_instance = snark
+            .instances
+            .iter()
+            .flat_map(|instance| instance.iter().skip(skip));
+
+        let flattened_instances = acc_instances
+            .iter()
+            .chain(snark_instance)
+            .cloned()
+            .collect::<Vec<_>>();
+
+        {
+            log::trace!("acc lhs: {:?}", lhs);
+            log::trace!("acc rhs: {:?}", rhs);
+            log::trace!("flattened instances:");
+            for i in flattened_instances.iter() {
+                log::trace!("{:?}", i);
+            }
+        }
+
+        Self {
+            svk,
+            snark: snark.into(),
+            is_fresh,
+            flattened_instances,
+            as_proof: Value::known(as_proof),
+        }
+    }
+
+    pub fn succinct_verifying_key(&self) -> &Svk {
+        &self.svk
+    }
+
+    pub fn snark(&self) -> &SnarkWitness {
+        &self.snark
+    }
+
+    pub fn as_proof(&self) -> Value<&[u8]> {
+        self.as_proof.as_ref().map(Vec::as_slice)
+    }
+}
diff --git a/aggregator/src/compression/circuit_ext.rs b/aggregator/src/compression/circuit_ext.rs
new file mode 100644
index 0000000000..94186a28f2
--- /dev/null
+++ b/aggregator/src/compression/circuit_ext.rs
@@ -0,0 +1,36 @@
+//! CircuitExt implementation for compression circuit.
+
+use halo2_proofs::{halo2curves::bn256::Fr, plonk::Selector};
+use snark_verifier_sdk::CircuitExt;
+
+use crate::param::LIMBS;
+
+use super::circuit::CompressionCircuit;
+
+impl CircuitExt<Fr> for CompressionCircuit {
+    fn num_instance(&self) -> Vec<usize> {
+        // [..lhs, ..rhs]
+        let acc_len = 4 * LIMBS;
+        // re-expose inner public input
+        let snark_pi_len: usize = self.snark.instances.iter().map(|x| x.len()).sum();
+
+        // if the snark is not fresh, the snark_pi already contains elements for the accumulator
+        vec![snark_pi_len + acc_len * self.is_fresh as usize]
+    }
+
+    fn instances(&self) -> Vec<Vec<Fr>> {
+        vec![self.flattened_instances.clone()]
+    }
+
+    fn accumulator_indices() -> Option<Vec<(usize, usize)>> {
+        // the accumulator are the first 12 cells in the instance
+        Some((0..4 * LIMBS).map(|idx| (0, idx)).collect())
+    }
+
+    fn selectors(config: &Self::Config) -> Vec<Selector> {
+        config.gate().basic_gates[0]
+            .iter()
+            .map(|gate| gate.q_enable)
+            .collect()
+    }
+}
diff --git a/aggregator/src/compression/config.rs b/aggregator/src/compression/config.rs
new file mode 100644
index 0000000000..ce66edeb07
--- /dev/null
+++ b/aggregator/src/compression/config.rs
@@ -0,0 +1,71 @@
+use halo2_proofs::{
+    halo2curves::bn256::{Fq, Fr, G1Affine},
+    plonk::{Column, ConstraintSystem, Instance},
+};
+use snark_verifier::loader::halo2::halo2_ecc::{
+    ecc::{BaseFieldEccChip, EccChip},
+    fields::fp::FpConfig,
+    halo2_base::{
+        gates::{flex_gate::FlexGateConfig, range::RangeConfig},
+        utils::modulus,
+    },
+};
+
+use crate::param::{ConfigParams, BITS, LIMBS};
+
+#[derive(Clone, Debug)]
+/// Configurations for compression circuit
+/// This config is hard coded for BN256 curve
+pub struct CompressionConfig {
+    /// Non-native field chip configurations
+    pub base_field_config: FpConfig<Fr, Fq>,
+    /// Instance for public input
+    pub instance: Column<Instance>,
+}
+
+impl CompressionConfig {
+    /// Build a configuration from parameters.
+    pub fn configure(meta: &mut ConstraintSystem<Fr>, params: ConfigParams) -> Self {
+        assert!(
+            params.limb_bits == BITS && params.num_limbs == LIMBS,
+            "For now we fix limb_bits = {}, otherwise change code",
+            BITS
+        );
+        let base_field_config = FpConfig::configure(
+            meta,
+            params.strategy,
+            &params.num_advice,
+            &params.num_lookup_advice,
+            params.num_fixed,
+            params.lookup_bits,
+            BITS,
+            LIMBS,
+            modulus::<Fq>(),
+            0,
+            params.degree as usize,
+        );
+
+        let instance = meta.instance_column();
+        meta.enable_equality(instance);
+
+        Self {
+            base_field_config,
+            instance,
+        }
+    }
+
+    /// Range gate configuration
+    pub fn range(&self) -> &RangeConfig<Fr> {
+        &self.base_field_config.range
+    }
+
+    /// Flex gate configuration
+    pub fn gate(&self) -> &FlexGateConfig<Fr> {
+        &self.base_field_config.range.gate
+    }
+
+    /// Ecc gate configuration
+    pub fn ecc_chip(&self) -> BaseFieldEccChip<G1Affine> {
+        EccChip::construct(self.base_field_config.clone())
+    }
+}
diff --git a/aggregator/src/core.rs b/aggregator/src/core.rs
new file mode 100644
index 0000000000..33db582f63
--- /dev/null
+++ b/aggregator/src/core.rs
@@ -0,0 +1,319 @@
+use ark_std::{end_timer, start_timer};
+use eth_types::Field;
+use halo2_proofs::{
+    circuit::{AssignedCell, Layouter, Value},
+    halo2curves::bn256::{Bn256, G1Affine},
+    plonk::Error,
+    poly::{commitment::ParamsProver, kzg::commitment::ParamsKZG},
+};
+use rand::Rng;
+use snark_verifier::{
+    loader::native::NativeLoader,
+    pcs::{
+        kzg::{Bdfg21, Kzg, KzgAccumulator, KzgAs},
+        AccumulationSchemeProver,
+    },
+    verifier::PlonkVerifier,
+};
+use snark_verifier_sdk::{
+    types::{PoseidonTranscript, Shplonk, POSEIDON_SPEC},
+    Snark,
+};
+use zkevm_circuits::{
+    keccak_circuit::{keccak_packed_multi::multi_keccak, KeccakCircuitConfig},
+    table::LookupTable,
+    util::Challenges,
+};
+
+use crate::{
+    util::{assert_equal, capacity, get_indices},
+    CHAIN_ID_LEN, CHUNK_DATA_HASH_INDEX, LOG_DEGREE, POST_STATE_ROOT_INDEX, PREV_STATE_ROOT_INDEX,
+    WITHDRAW_ROOT_INDEX,
+};
+
+/// Input the hash input bytes,
+/// assign the circuit for the hash function,
+/// return cells of the hash inputs and digests.
+#[allow(clippy::type_complexity)]
+pub(crate) fn assign_batch_hashes<F: Field>(
+    config: &KeccakCircuitConfig<F>,
+    layouter: &mut impl Layouter<F>,
+    challenges: Challenges<Value<F>>,
+    preimages: &[Vec<u8>],
+) -> Result<
+    (
+        Vec<Vec<AssignedCell<F, F>>>, // input cells
+        Vec<Vec<AssignedCell<F, F>>>, // digest cells
+    ),
+    Error,
+> {
+    let mut is_first_time = true;
+    let num_rows = 1 << LOG_DEGREE;
+
+    let timer = start_timer!(|| ("multi keccak").to_string());
+    // preimages consists of the following parts
+    // (1) batchPiHash preimage =
+    //      (chain_id ||
+    //      chunk[0].prev_state_root ||
+    //      chunk[k-1].post_state_root ||
+    //      chunk[k-1].withdraw_root ||
+    //      batch_data_hash)
+    // (2) batchDataHash preimage =
+    //      (chunk[0].dataHash || ... || chunk[k-1].dataHash)
+    // (3) chunk[i].piHash preimage =
+    //      (chain id ||
+    //      chunk[i].prevStateRoot || chunk[i].postStateRoot ||
+    //      chunk[i].withdrawRoot || chunk[i].datahash)
+    // each part of the preimage is mapped to image by Keccak256
+    let witness = multi_keccak(preimages, challenges, capacity(num_rows))?;
+    end_timer!(timer);
+
+    // extract the indices of the rows for which the preimage and the digest cells lie in
+    let (preimage_indices, digest_indices) = get_indices(preimages);
+    let mut preimage_indices_iter = preimage_indices.iter();
+    let mut digest_indices_iter = digest_indices.iter();
+
+    let mut hash_input_cells = vec![];
+    let mut hash_output_cells = vec![];
+
+    let mut cur_preimage_index = preimage_indices_iter.next();
+    let mut cur_digest_index = digest_indices_iter.next();
+
+    layouter.assign_region(
+        || "assign keccak rows",
+        |mut region| {
+            if is_first_time {
+                is_first_time = false;
+                let offset = witness.len() - 1;
+                config.set_row(&mut region, offset, &witness[offset])?;
+                return Ok(());
+            }
+            // ====================================================
+            // Step 1. Extract the hash cells
+            // ====================================================
+            let mut current_hash_input_cells = vec![];
+            let mut current_hash_output_cells = vec![];
+
+            let timer = start_timer!(|| "assign row");
+            for (offset, keccak_row) in witness.iter().enumerate() {
+                let row = config.set_row(&mut region, offset, keccak_row)?;
+
+                if cur_preimage_index.is_some() && *cur_preimage_index.unwrap() == offset {
+                    // 7-th column is Keccak input in Keccak circuit
+                    current_hash_input_cells.push(row[6].clone());
+                    cur_preimage_index = preimage_indices_iter.next();
+                }
+                if cur_digest_index.is_some() && *cur_digest_index.unwrap() == offset {
+                    // last column is Keccak output in Keccak circuit
+                    current_hash_output_cells.push(row.last().unwrap().clone());
+                    cur_digest_index = digest_indices_iter.next();
+                }
+
+                // we reset the current hash when it is finalized
+                // note that length == 0 indicate that the hash is a padding
+                // so we simply skip it
+                if keccak_row.is_final && keccak_row.length != 0 {
+                    hash_input_cells.push(current_hash_input_cells);
+                    hash_output_cells.push(current_hash_output_cells);
+                    current_hash_input_cells = vec![];
+                    current_hash_output_cells = vec![];
+                }
+            }
+            end_timer!(timer);
+
+            // sanity: we have same number of hash input and output
+            let hash_num = hash_input_cells.len();
+            let num_chunks = hash_num - 2;
+            assert_eq!(hash_num, preimages.len());
+            assert_eq!(hash_num, hash_output_cells.len());
+
+            // ====================================================
+            // Step 2. Constraint the relations between hash preimages and digests
+            // ====================================================
+            //
+            // 2.1 batch_data_hash digest is reused for public input hash
+            //
+            // public input hash is build as
+            //  keccak(
+            //      chain_id ||
+            //      chunk[0].prev_state_root ||
+            //      chunk[k-1].post_state_root ||
+            //      chunk[k-1].withdraw_root ||
+            //      batch_data_hash )
+            for i in 0..4 {
+                for j in 0..8 {
+                    // sanity check
+                    // CHUNK_DATA_HASH_INDEX is the byte position for batch_data_hash
+                    assert_equal(
+                        &hash_input_cells[0][i * 8 + j + CHUNK_DATA_HASH_INDEX],
+                        &hash_output_cells[1][(3 - i) * 8 + j],
+                    );
+                    region.constrain_equal(
+                        // preimage and digest has different endianness
+                        hash_input_cells[0][i * 8 + j + CHUNK_DATA_HASH_INDEX].cell(),
+                        hash_output_cells[1][(3 - i) * 8 + j].cell(),
+                    )?;
+                }
+            }
+
+            // 2.2 batch_pi_hash used same roots as chunk_pi_hash
+            //
+            // batch_pi_hash =
+            //   keccak(
+            //      chain_id ||
+            //      chunk[0].prev_state_root ||
+            //      chunk[k-1].post_state_root ||
+            //      chunk[k-1].withdraw_root ||
+            //      batchData_hash )
+            //
+            // chunk[i].piHash =
+            //   keccak(
+            //        chain id ||
+            //        chunk[i].prevStateRoot ||
+            //        chunk[i].postStateRoot ||
+            //        chunk[i].withdrawRoot  ||
+            //        chunk[i].datahash)
+            //
+            // PREV_STATE_ROOT_INDEX, POST_STATE_ROOT_INDEX, WITHDRAW_ROOT_INDEX
+            // used below are byte positions for
+            // prev_state_root, post_state_root, withdraw_root
+            for i in 0..32 {
+                // 2.2.1 chunk[0].prev_state_root
+                // sanity check
+                assert_equal(
+                    &hash_input_cells[0][i + PREV_STATE_ROOT_INDEX],
+                    &hash_input_cells[2][i + PREV_STATE_ROOT_INDEX],
+                );
+                region.constrain_equal(
+                    hash_input_cells[0][i + PREV_STATE_ROOT_INDEX].cell(),
+                    hash_input_cells[2][i + PREV_STATE_ROOT_INDEX].cell(),
+                )?;
+                // 2.2.2 chunk[k-1].post_state_root
+                // sanity check
+                assert_equal(
+                    &hash_input_cells[0][i + POST_STATE_ROOT_INDEX],
+                    &hash_input_cells[hash_num - 1][i + POST_STATE_ROOT_INDEX],
+                );
+                region.constrain_equal(
+                    hash_input_cells[0][i + POST_STATE_ROOT_INDEX].cell(),
+                    hash_input_cells[hash_num - 1][i + POST_STATE_ROOT_INDEX].cell(),
+                )?;
+                // 2.2.3 chunk[k-1].withdraw_root
+                assert_equal(
+                    &hash_input_cells[0][i + WITHDRAW_ROOT_INDEX],
+                    &hash_input_cells[hash_num - 1][i + WITHDRAW_ROOT_INDEX],
+                );
+                region.constrain_equal(
+                    hash_input_cells[0][i + WITHDRAW_ROOT_INDEX].cell(),
+                    hash_input_cells[hash_num - 1][i + WITHDRAW_ROOT_INDEX].cell(),
+                )?;
+            }
+
+            // 2.3 same dataHash is used for batchDataHash and chunk[i].piHash
+            //
+            // batchDataHash = keccak(chunk[0].dataHash || ... || chunk[k-1].dataHash)
+            //
+            // chunk[i].piHash =
+            //     keccak(
+            //        &chain id ||
+            //        chunk[i].prevStateRoot ||
+            //        chunk[i].postStateRoot ||
+            //        chunk[i].withdrawRoot  ||
+            //        chunk[i].datahash)
+            for (i, chunk) in hash_input_cells[1].chunks(32).enumerate().take(num_chunks) {
+                for (j, cell) in chunk.iter().enumerate() {
+                    // sanity check
+                    assert_equal(cell, &hash_input_cells[2 + i][j + CHUNK_DATA_HASH_INDEX]);
+                    region.constrain_equal(
+                        cell.cell(),
+                        hash_input_cells[2 + i][j + CHUNK_DATA_HASH_INDEX].cell(),
+                    )?;
+                }
+            }
+
+            // 2.4  chunks are continuous: they are linked via the state roots
+            for i in 0..num_chunks - 1 {
+                for j in 0..32 {
+                    // sanity check
+                    assert_equal(
+                        &hash_input_cells[i + 3][PREV_STATE_ROOT_INDEX + j],
+                        &hash_input_cells[i + 2][POST_STATE_ROOT_INDEX + j],
+                    );
+                    region.constrain_equal(
+                        // chunk[i+1].prevStateRoot
+                        hash_input_cells[i + 3][PREV_STATE_ROOT_INDEX + j].cell(),
+                        // chunk[i].postStateRoot
+                        hash_input_cells[i + 2][POST_STATE_ROOT_INDEX + j].cell(),
+                    )?;
+                }
+            }
+
+            // 2.5 assert hashes use a same chain id
+            for i in 0..num_chunks {
+                for j in 0..CHAIN_ID_LEN {
+                    // sanity check
+                    assert_equal(&hash_input_cells[0][j], &hash_input_cells[i + 2][j]);
+                    region.constrain_equal(
+                        // chunk[i+1].prevStateRoot
+                        hash_input_cells[0][j].cell(),
+                        // chunk[i].postStateRoot
+                        hash_input_cells[i + 2][j].cell(),
+                    )?;
+                }
+            }
+
+            config.keccak_table.annotate_columns_in_region(&mut region);
+            config.annotate_circuit(&mut region);
+            Ok(())
+        },
+    )?;
+
+    Ok((hash_input_cells, hash_output_cells))
+}
+
+/// Subroutine for the witness generations.
+/// Extract the accumulator and proof that from previous snarks.
+/// Uses SHPlonk for accumulation.
+pub(crate) fn extract_accumulators_and_proof(
+    params: &ParamsKZG<Bn256>,
+    snarks: &[Snark],
+    rng: impl Rng + Send,
+) -> (KzgAccumulator<G1Affine, NativeLoader>, Vec<u8>) {
+    let svk = params.get_g()[0].into();
+
+    let mut transcript_read =
+        PoseidonTranscript::<NativeLoader, &[u8]>::from_spec(&[], POSEIDON_SPEC.clone());
+    let accumulators = snarks
+        .iter()
+        .flat_map(|snark| {
+            transcript_read.new_stream(snark.proof.as_slice());
+            let proof = Shplonk::read_proof(
+                &svk,
+                &snark.protocol,
+                &snark.instances,
+                &mut transcript_read,
+            );
+            // each accumulator has (lhs, rhs) based on Shplonk
+            // lhs and rhs are EC points
+            Shplonk::succinct_verify(&svk, &snark.protocol, &snark.instances, &proof)
+        })
+        .collect::<Vec<_>>();
+
+    let mut transcript_write =
+        PoseidonTranscript::<NativeLoader, Vec<u8>>::from_spec(vec![], POSEIDON_SPEC.clone());
+    // We always use SHPLONK for accumulation scheme when aggregating proofs
+    let accumulator =
+        // core step
+        // KzgAs does KZG accumulation scheme based on given accumulators and random number (for adding blinding)
+        // accumulated ec_pt = ec_pt_1 * 1 + ec_pt_2 * r + ... + ec_pt_n * r^{n-1}
+        // ec_pt can be lhs and rhs
+        // r is the challenge squeezed from proof
+        KzgAs::<Kzg<Bn256, Bdfg21>>::create_proof::<PoseidonTranscript<NativeLoader, Vec<u8>>, _>(
+            &Default::default(),
+            &accumulators,
+            &mut transcript_write,
+            rng,
+        )
+        .unwrap();
+    (accumulator, transcript_write.finalize())
+}
diff --git a/aggregator/src/lib.rs b/aggregator/src/lib.rs
new file mode 100644
index 0000000000..2c8d22b66d
--- /dev/null
+++ b/aggregator/src/lib.rs
@@ -0,0 +1,25 @@
+// This module implements `Chunk` related data types.
+// A chunk is a list of blocks.
+mod chunk;
+// This module implements `Batch` related data types.
+// A batch is a list of chunk.
+/// proof aggregation
+mod aggregation;
+mod batch;
+/// proof compression
+mod compression;
+/// Core module for circuit assignment
+mod core;
+/// Parameters for compression circuit
+mod param;
+/// utilities
+mod util;
+
+#[cfg(test)]
+mod tests;
+
+pub use aggregation::*;
+pub use batch::BatchHash;
+pub use chunk::ChunkHash;
+pub use compression::*;
+pub use param::*;
diff --git a/aggregator/src/param.rs b/aggregator/src/param.rs
new file mode 100644
index 0000000000..8b042211e4
--- /dev/null
+++ b/aggregator/src/param.rs
@@ -0,0 +1,58 @@
+use snark_verifier::loader::halo2::halo2_ecc::fields::fp::FpStrategy;
+
+pub(crate) const LIMBS: usize = 3;
+pub(crate) const BITS: usize = 88;
+
+#[derive(serde::Serialize, serde::Deserialize, Clone, Debug)]
+/// Parameters for aggregation circuit and compression circuit configs.
+pub struct ConfigParams {
+    pub strategy: FpStrategy,
+    pub degree: u32,
+    pub num_advice: Vec<usize>,
+    pub num_lookup_advice: Vec<usize>,
+    pub num_fixed: usize,
+    pub lookup_bits: usize,
+    pub limb_bits: usize,
+    pub num_limbs: usize,
+}
+
+impl ConfigParams {
+    pub(crate) fn aggregation_param() -> Self {
+        Self {
+            strategy: FpStrategy::Simple,
+            degree: 23,
+            num_advice: vec![8],
+            num_lookup_advice: vec![1],
+            num_fixed: 1,
+            lookup_bits: 20,
+            limb_bits: 88,
+            num_limbs: 3,
+        }
+    }
+
+    pub(crate) fn _compress_wide_param() -> Self {
+        Self {
+            strategy: FpStrategy::Simple,
+            degree: 22,
+            num_advice: vec![35],
+            num_lookup_advice: vec![1],
+            num_fixed: 1,
+            lookup_bits: 20,
+            limb_bits: 88,
+            num_limbs: 3,
+        }
+    }
+
+    pub(crate) fn _compress_thin_param() -> Self {
+        Self {
+            strategy: FpStrategy::Simple,
+            degree: 25,
+            num_advice: vec![1],
+            num_lookup_advice: vec![1],
+            num_fixed: 1,
+            lookup_bits: 20,
+            limb_bits: 88,
+            num_limbs: 3,
+        }
+    }
+}
diff --git a/aggregator/src/tests.rs b/aggregator/src/tests.rs
new file mode 100644
index 0000000000..e80b41178d
--- /dev/null
+++ b/aggregator/src/tests.rs
@@ -0,0 +1,197 @@
+pub(crate) mod aggregation;
+pub(crate) mod end_to_end;
+pub(crate) mod compression;
+pub(crate) mod mock_chunk;
+
+#[macro_export]
+macro_rules! layer_0 {
+    // generate a snark for layer 0
+    ($circuit: ident, $circuit_type: ident, $param: ident, $degree: ident, $path: ident) => {{
+        let timer = start_timer!(|| "gen layer 0 snark");
+
+        let mut rng = test_rng();
+        let param = {
+            let mut param = $param.clone();
+            param.downsize($degree);
+            param
+        };
+
+        let pk = gen_pk(
+            &param,
+            &$circuit,
+            Some(&$path.join(Path::new("layer_0.pkey"))),
+        );
+        log::trace!("finished layer 0 pk generation for circuit");
+
+        let snark = gen_snark_shplonk(
+            &param,
+            &pk,
+            $circuit.clone(),
+            &mut rng,
+            None::<String>,
+            // Some(&$path.join(Path::new("layer_0.snark"))),
+        );
+        log::trace!("finished layer 0 snark generation for circuit");
+
+        assert!(verify_snark_shplonk::<$circuit_type>(
+            &param,
+            snark.clone(),
+            pk.get_vk()
+        ));
+
+        log::trace!("finished layer 0 snark verification");
+        log::trace!("proof size: {}", snark.proof.len());
+        log::trace!(
+            "pi size: {}",
+            snark.instances.iter().map(|x| x.len()).sum::<usize>()
+        );
+
+        log::trace!("layer 0 circuit instances");
+        for (i, e) in $circuit.instances()[0].iter().enumerate() {
+            log::trace!("{}-th public input: {:?}", i, e);
+        }
+        end_timer!(timer);
+        snark
+    }};
+}
+
+#[macro_export]
+macro_rules! compression_layer_snark {
+    // generate a snark for compression layer
+    ($previous_snark: ident, $param: ident, $degree: ident, $path: ident, $layer_index: expr) => {{
+        let timer = start_timer!(|| format!("gen layer {} snark", $layer_index));
+
+        let param = {
+            let mut param = $param.clone();
+            param.downsize($degree);
+            param
+        };
+
+        let mut rng = test_rng();
+
+        let is_fresh = if $layer_index == 1 { true } else { false };
+        let compression_circuit =
+            CompressionCircuit::new(&$param, $previous_snark.clone(), is_fresh, &mut rng);
+
+        let pk = gen_pk(&$param, &compression_circuit, None);
+        // build the snark for next layer
+        let snark = gen_snark_shplonk(
+            &param,
+            &pk,
+            compression_circuit.clone(),
+            &mut rng,
+            None::<String>, // Some(&$path.join(Path::new("layer_1.snark"))),
+        );
+        log::trace!(
+            "finished layer {} snark generation for circuit",
+            $layer_index
+        );
+
+        assert!(verify_snark_shplonk::<CompressionCircuit>(
+            &param,
+            snark.clone(),
+            pk.get_vk()
+        ));
+
+        end_timer!(timer);
+        snark
+    }};
+}
+
+#[macro_export]
+macro_rules! compression_layer_evm {
+    // generate a evm proof and verify it for compression layer
+    ($previous_snark: ident, $param: ident, $degree: ident, $path: ident,$layer_index: expr) => {{
+        let timer = start_timer!(|| format!("gen layer {} snark", $layer_index));
+
+        let param = {
+            let mut param = $param.clone();
+            param.downsize($degree);
+            param
+        };
+
+        let mut rng = test_rng();
+
+        let compression_circuit =
+            CompressionCircuit::new(&$param, $previous_snark, false, &mut rng);
+
+        let instances = compression_circuit.instances();
+
+        let pk = gen_pk(&$param, &compression_circuit, None);
+        // build the snark for next layer
+        let proof = gen_evm_proof_shplonk(
+            &param,
+            &pk,
+            compression_circuit.clone(),
+            instances.clone(),
+            &mut rng,
+        );
+
+        log::trace!("finished layer 4 aggregation generation");
+        log::trace!("proof size: {}", proof.len());
+
+        // verify proof via EVM
+        let deployment_code = gen_evm_verifier::<CompressionCircuit, Kzg<Bn256, Bdfg21>>(
+            &param,
+            pk.get_vk(),
+            compression_circuit.num_instance(),
+            Some(&$path.join(Path::new("contract.sol"))),
+        );
+        log::trace!("finished layer 4 bytecode generation");
+
+        evm_verify(
+            deployment_code,
+            compression_circuit.instances(),
+            proof.clone(),
+        );
+        log::trace!("layer 2 evm verification finished");
+
+        end_timer!(timer);
+    }};
+}
+
+#[macro_export]
+macro_rules! aggregation_layer_snark {
+    // generate a snark for compression layer
+    ($previous_snarks: ident, $param: ident, $degree: ident, $path: ident, $layer_index: expr, $chunks: ident) => {{
+        let timer = start_timer!(|| format!("gen layer {} snark", $layer_index));
+
+        let param = {
+            let mut param = $param.clone();
+            param.downsize($degree);
+            param
+        };
+
+        let mut rng = test_rng();
+
+        let aggregation_circuit = AggregationCircuit::new(
+            &$param,
+            $previous_snarks.as_ref(),
+            &mut rng,
+            $chunks.as_ref(),
+        );
+
+        let pk = gen_pk(&$param, &aggregation_circuit, None);
+        // build the snark for next layer
+        let snark = gen_snark_shplonk(
+            &param,
+            &pk,
+            aggregation_circuit.clone(),
+            &mut rng,
+            None::<String>, // Some(&$path.join(Path::new("layer_3.snark"))),
+        );
+        log::trace!(
+            "finished layer {} snark generation for circuit",
+            $layer_index
+        );
+
+        assert!(verify_snark_shplonk::<AggregationCircuit>(
+            &param,
+            snark.clone(),
+            pk.get_vk()
+        ));
+
+        end_timer!(timer);
+        snark
+    }};
+}
diff --git a/aggregator/src/tests/aggregation.rs b/aggregator/src/tests/aggregation.rs
new file mode 100644
index 0000000000..0fdb66c1f6
--- /dev/null
+++ b/aggregator/src/tests/aggregation.rs
@@ -0,0 +1,133 @@
+use std::{fs, path::Path, process};
+
+use ark_std::{end_timer, start_timer, test_rng};
+use halo2_proofs::{dev::MockProver, halo2curves::bn256::Fr, poly::commitment::Params};
+use itertools::Itertools;
+use snark_verifier::loader::halo2::halo2_ecc::halo2_base::utils::fs::gen_srs;
+use snark_verifier_sdk::{gen_pk, gen_snark_shplonk, verify_snark_shplonk, CircuitExt};
+
+use crate::{compression_layer_snark, layer_0, AggregationCircuit, ChunkHash, CompressionCircuit};
+
+use super::mock_chunk::MockChunkCircuit;
+
+const CHUNKS_PER_BATCH: usize = 2;
+
+#[test]
+fn test_mock_aggregation() {
+    env_logger::init();
+    let process_id = process::id();
+
+    let dir = format!("data/{}", process_id);
+    let path = Path::new(dir.as_str());
+    fs::create_dir(path).unwrap();
+
+    // inner circuit: Mock circuit
+    let k0 = 8;
+    // aggregation
+    let k1 = 23;
+
+    let mut rng = test_rng();
+    let params = gen_srs(k1);
+
+    let mut chunks = (0..CHUNKS_PER_BATCH)
+        .map(|_| ChunkHash::mock_chunk_hash(&mut rng))
+        .collect_vec();
+    for i in 0..CHUNKS_PER_BATCH - 1 {
+        chunks[i + 1].prev_state_root = chunks[i].post_state_root;
+    }
+    // Proof for test circuit
+    let circuits = chunks
+        .iter()
+        .map(|&chunk| MockChunkCircuit::new(false, 0, chunk))
+        .collect_vec();
+    let layer_0_snarks = circuits
+        .iter()
+        .map(|&circuit| layer_0!(circuit, MockChunkCircuit, params, k0, path))
+        .collect_vec();
+
+    // layer 1 proof aggregation
+    {
+        let param = {
+            let mut param = params;
+            param.downsize(k1);
+            param
+        };
+        let aggregation_circuit =
+            AggregationCircuit::new(&param, &layer_0_snarks, &mut rng, chunks.as_ref());
+        let instance = aggregation_circuit.instances();
+        println!("instance length {:?}", instance.len());
+
+        let mock_prover = MockProver::<Fr>::run(k1, &aggregation_circuit, instance).unwrap();
+
+        mock_prover.assert_satisfied_par()
+    }
+}
+
+// This test takes about 1 hour on CPU
+#[ignore = "it takes too much time"]
+#[test]
+fn test_aggregation_circuit() {
+    let process_id = process::id();
+
+    let dir = format!("data/{}", process_id);
+    let path = Path::new(dir.as_str());
+    fs::create_dir(path).unwrap();
+
+    // inner circuit: Mock circuit
+    let k0 = 8;
+    // wide compression
+    let k1 = 26;
+    // thin compression
+    let k2 = 26;
+    // aggregation
+    let k3 = 26;
+
+    let mut rng = test_rng();
+    let params = gen_srs(k2);
+
+    let mut chunks = (0..CHUNKS_PER_BATCH)
+        .map(|_| ChunkHash::mock_chunk_hash(&mut rng))
+        .collect_vec();
+    for i in 0..CHUNKS_PER_BATCH - 1 {
+        chunks[i + 1].prev_state_root = chunks[i].post_state_root;
+    }
+    // Proof for test circuit
+    let circuits = chunks
+        .iter()
+        .map(|&chunk| MockChunkCircuit::new(true, 0, chunk))
+        .collect_vec();
+    let layer_0_snarks = circuits
+        .iter()
+        .map(|&circuit| layer_0!(circuit, MockChunkCircuit, params, k0, path))
+        .collect_vec();
+
+    // Layer 1 proof compression
+    std::env::set_var("VERIFY_CONFIG", "./configs/compression_wide.config");
+    let layer_1_snarks = layer_0_snarks
+        .iter()
+        .map(|layer_0_snark| compression_layer_snark!(layer_0_snark, params, k1, path, 1))
+        .collect_vec();
+
+    // Layer 2 proof compression
+    std::env::set_var("VERIFY_CONFIG", "./configs/compression_thin.config");
+    let layer_2_snarks = layer_1_snarks
+        .iter()
+        .map(|layer_1_snark| compression_layer_snark!(layer_1_snark, params, k2, path, 2))
+        .collect_vec();
+
+    // layer 3 proof aggregation
+    {
+        let param = {
+            let mut param = params;
+            param.downsize(k3);
+            param
+        };
+        let aggregation_circuit =
+            AggregationCircuit::new(&param, &layer_2_snarks, &mut rng, chunks.as_ref());
+        let instance = aggregation_circuit.instances();
+
+        let mock_prover = MockProver::<Fr>::run(k3, &aggregation_circuit, instance).unwrap();
+
+        mock_prover.assert_satisfied_par()
+    }
+}
diff --git a/aggregator/src/tests/compression.rs b/aggregator/src/tests/compression.rs
new file mode 100644
index 0000000000..f03166a032
--- /dev/null
+++ b/aggregator/src/tests/compression.rs
@@ -0,0 +1,80 @@
+use std::{fs, path::Path, process};
+
+use ark_std::{end_timer, start_timer, test_rng};
+use halo2_proofs::{halo2curves::bn256::{Bn256, Fr}, poly::commitment::Params, dev::MockProver};
+use snark_verifier::{
+    loader::halo2::halo2_ecc::halo2_base::{halo2_proofs, utils::fs::gen_srs},
+    pcs::kzg::{Bdfg21, Kzg},
+};
+use snark_verifier_sdk::{
+    evm_verify, gen_evm_proof_shplonk, gen_evm_verifier, gen_pk, gen_snark_shplonk,
+    verify_snark_shplonk, CircuitExt,
+};
+
+use crate::{
+    compression_layer_evm, compression_layer_snark, layer_0, tests::mock_chunk::MockChunkCircuit,
+    CompressionCircuit,
+};
+
+#[test]
+fn test_mock_compression() {
+    env_logger::init();
+
+    let dir = format!("data/{}", process::id());
+    let path = Path::new(dir.as_str());
+    fs::create_dir(path).unwrap();
+
+    let k0 = 8;
+    let k1 = 22;
+
+    let mut rng = test_rng();
+    let params = gen_srs(k1);
+
+    // Proof for test circuit
+    let circuit = MockChunkCircuit::random(&mut rng, true);
+    let layer_0_snark = layer_0!(circuit, MockChunkCircuit, params, k0, path);
+
+    std::env::set_var("VERIFY_CONFIG", "./configs/compression_wide.config");
+    // layer 1 proof compression
+    {
+        let param = {
+            let mut param = params;
+            param.downsize(k1);
+            param
+        };
+        let compression_circuit = CompressionCircuit::new(&param, layer_0_snark, true, &mut rng);
+        let instance = compression_circuit.instances();
+        println!("instance length {:?}", instance.len());
+
+        let mock_prover = MockProver::<Fr>::run(k1, &compression_circuit, instance).unwrap();
+
+        mock_prover.assert_satisfied_par()
+    }
+}
+
+// This test takes about 1 hour on CPU
+#[ignore = "it takes too much time"]
+#[test]
+fn test_two_layer_proof_compression() {
+    env_logger::init();
+
+    let dir = format!("data/{}", process::id());
+    let path = Path::new(dir.as_str());
+    fs::create_dir(path).unwrap();
+
+    let k0 = 19;
+    let k1 = 25;
+    let k2 = 25;
+
+    let mut rng = test_rng();
+    let layer_2_params = gen_srs(k2);
+
+    let circuit = MockChunkCircuit::random(&mut rng, true);
+    let layer_0_snark = layer_0!(circuit, MockChunkCircuit, layer_2_params, k0, path);
+
+    std::env::set_var("VERIFY_CONFIG", "./configs/compression_wide.config");
+    let layer_1_snark = compression_layer_snark!(layer_0_snark, layer_2_params, k1, path, 1);
+
+    std::env::set_var("VERIFY_CONFIG", "./configs/compression_thin.config");
+    compression_layer_evm!(layer_1_snark, layer_2_params, k2, path, 2);
+}
diff --git a/aggregator/src/tests/end_to_end.rs b/aggregator/src/tests/end_to_end.rs
new file mode 100644
index 0000000000..e69d76ee9f
--- /dev/null
+++ b/aggregator/src/tests/end_to_end.rs
@@ -0,0 +1,88 @@
+use std::{fs, path::Path, process};
+
+use ark_std::{end_timer, start_timer, test_rng};
+use halo2_proofs::{halo2curves::bn256::Bn256, poly::commitment::Params};
+use itertools::Itertools;
+use snark_verifier::{
+    loader::halo2::halo2_ecc::halo2_base::{halo2_proofs, utils::fs::gen_srs},
+    pcs::kzg::{Bdfg21, Kzg},
+};
+use snark_verifier_sdk::{
+    evm_verify, gen_evm_proof_shplonk, gen_evm_verifier, gen_pk, gen_snark_shplonk,
+    verify_snark_shplonk, CircuitExt,
+};
+
+use crate::{
+    aggregation_layer_snark, compression_layer_evm, compression_layer_snark, layer_0,
+    tests::mock_chunk::MockChunkCircuit, AggregationCircuit, ChunkHash, CompressionCircuit,
+};
+
+const CHUNKS_PER_BATCH: usize = 2;
+
+// This test takes about 1 hour on CPU
+#[ignore = "it takes too much time"]
+#[test]
+fn test_e2e() {
+    env_logger::init();
+
+    let dir = format!("data/{}", process::id());
+    let path = Path::new(dir.as_str());
+    fs::create_dir(path).unwrap();
+
+    // inner circuit: Mock circuit
+    let k0 = 8;
+    // wide compression
+    let k1 = 21;
+    // thin compression
+    let k2 = 26;
+    // aggregation
+    let k3 = 26;
+    // thin compression
+    let k4 = 26;
+
+    let mut rng = test_rng();
+    let params = gen_srs(k4);
+
+    let mut chunks = (0..CHUNKS_PER_BATCH)
+        .map(|_| ChunkHash::mock_chunk_hash(&mut rng))
+        .collect_vec();
+    for i in 0..CHUNKS_PER_BATCH - 1 {
+        chunks[i + 1].prev_state_root = chunks[i].post_state_root;
+    }
+
+    // Proof for test circuit
+    let circuits = chunks
+        .iter()
+        .map(|&chunk| MockChunkCircuit {
+            is_fresh: true,
+            chain_id: 0,
+            chunk,
+        })
+        .collect_vec();
+    let layer_0_snarks = circuits
+        .iter()
+        .map(|&circuit| layer_0!(circuit, MockChunkCircuit, params, k0, path))
+        .collect_vec();
+
+    // Layer 1 proof compression
+    std::env::set_var("VERIFY_CONFIG", "./configs/compression_wide.config");
+    let layer_1_snarks = layer_0_snarks
+        .iter()
+        .map(|layer_0_snark| compression_layer_snark!(layer_0_snark, params, k1, path, 1))
+        .collect_vec();
+
+    // Layer 2 proof compression
+    std::env::set_var("VERIFY_CONFIG", "./configs/compression_thin.config");
+    let layer_2_snarks = layer_1_snarks
+        .iter()
+        .map(|layer_1_snark| compression_layer_snark!(layer_1_snark, params, k2, path, 2))
+        .collect_vec();
+
+    // layer 3 proof aggregation
+    std::env::set_var("VERIFY_CONFIG", "./configs/aggregation.config");
+    let layer_3_snark = aggregation_layer_snark!(layer_2_snarks, params, k3, path, 3, chunks);
+
+    // layer 4 proof compression and final evm verification
+    std::env::set_var("VERIFY_CONFIG", "./configs/compression_thin.config");
+    compression_layer_evm!(layer_3_snark, params, k4, path, 4);
+}
diff --git a/aggregator/src/tests/mock_chunk.rs b/aggregator/src/tests/mock_chunk.rs
new file mode 100644
index 0000000000..0a8a4c17e7
--- /dev/null
+++ b/aggregator/src/tests/mock_chunk.rs
@@ -0,0 +1,53 @@
+use ark_std::test_rng;
+use halo2_proofs::{dev::MockProver, halo2curves::bn256::Fr};
+use snark_verifier_sdk::CircuitExt;
+
+use crate::{ChunkHash, LOG_DEGREE};
+
+mod circuit;
+mod circuit_ext;
+mod config;
+
+#[derive(Debug, Default, Clone, Copy)]
+/// A mock chunk circuit
+///
+/// This mock chunk circuit simulates a zkEVM circuit.
+/// It's public inputs consists of 64 elements:
+/// - data hash
+/// - public input hash
+pub(crate) struct MockChunkCircuit {
+    pub(crate) is_fresh: bool,
+    pub(crate) chain_id: u64,
+    pub(crate) chunk: ChunkHash,
+}
+
+impl MockChunkCircuit {
+    pub(crate) fn new(is_fresh: bool, chain_id: u64, chunk: ChunkHash) -> Self {
+        MockChunkCircuit {
+            is_fresh,
+            chain_id,
+            chunk,
+        }
+    }
+}
+
+#[test]
+fn test_mock_chunk_prover() {
+    env_logger::init();
+
+    let mut rng = test_rng();
+
+    let circuit = MockChunkCircuit::random(&mut rng, true);
+    let instance = circuit.instances();
+
+    let mock_prover = MockProver::<Fr>::run(LOG_DEGREE, &circuit, instance).unwrap();
+
+    mock_prover.assert_satisfied_par();
+
+    let circuit = MockChunkCircuit::random(&mut rng, false);
+    let instance = circuit.instances();
+
+    let mock_prover = MockProver::<Fr>::run(LOG_DEGREE, &circuit, instance).unwrap();
+
+    mock_prover.assert_satisfied_par();
+}
diff --git a/aggregator/src/tests/mock_chunk/circuit.rs b/aggregator/src/tests/mock_chunk/circuit.rs
new file mode 100644
index 0000000000..0867e7217f
--- /dev/null
+++ b/aggregator/src/tests/mock_chunk/circuit.rs
@@ -0,0 +1,93 @@
+use std::iter;
+
+use ark_std::{end_timer, start_timer};
+use halo2_proofs::{
+    circuit::{Layouter, SimpleFloorPlanner, Value},
+    halo2curves::bn256::Fr,
+    plonk::{Circuit, ConstraintSystem, Error},
+};
+use zkevm_circuits::util::{Challenges, SubCircuitConfig};
+
+use crate::ChunkHash;
+
+use super::{config::MockPlonkConfig, MockChunkCircuit};
+
+impl MockChunkCircuit {
+    pub(crate) fn random<R: rand::RngCore>(r: &mut R, is_fresh: bool) -> Self {
+        Self {
+            is_fresh,
+            chain_id: 0,
+            chunk: ChunkHash::mock_chunk_hash(r),
+        }
+    }
+}
+
+impl Circuit<Fr> for MockChunkCircuit {
+    type Config = MockPlonkConfig;
+    type FloorPlanner = SimpleFloorPlanner;
+
+    fn without_witnesses(&self) -> Self {
+        Self::default()
+    }
+
+    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
+        meta.set_minimum_degree(4);
+        MockPlonkConfig::configure(meta)
+    }
+
+    fn synthesize(
+        &self,
+        config: Self::Config,
+        mut layouter: impl Layouter<Fr>,
+    ) -> Result<(), Error> {
+        layouter.assign_region(
+            || "mock circuit",
+            |mut region| {
+                let acc_len = if self.is_fresh { 0 } else { 12 };
+
+                for (i, byte) in iter::repeat(0)
+                    .take(acc_len)
+                    .chain(
+                        self.chunk
+                            .chain_id
+                            .to_be_bytes()
+                            .iter()
+                            .chain(
+                                self.chunk
+                                    .data_hash
+                                    .as_bytes()
+                                    .iter()
+                                    .chain(self.chunk.public_input_hash().as_bytes().iter()),
+                            )
+                            .copied(),
+                    )
+                    .enumerate()
+                {
+                    // "q_a·a + q_b·b + q_c·c + q_ab·a·b + constant + instance = 0",
+                    region.assign_advice(
+                        || "a",
+                        config.a,
+                        i,
+                        || Value::known(Fr::from(byte as u64)),
+                    )?;
+                    region.assign_advice(|| "b", config.b, i, || Value::known(Fr::zero()))?;
+                    region.assign_advice(|| "c", config.c, i, || Value::known(Fr::zero()))?;
+
+                    region.assign_fixed(|| "q_a", config.q_a, i, || Value::known(-Fr::one()))?;
+                    region.assign_fixed(|| "q_b", config.q_b, i, || Value::known(Fr::zero()))?;
+                    region.assign_fixed(|| "q_c", config.q_c, i, || Value::known(Fr::zero()))?;
+                    region.assign_fixed(|| "q_ab", config.q_ab, i, || Value::known(Fr::zero()))?;
+                    region.assign_fixed(
+                        || "constant",
+                        config.constant,
+                        i,
+                        || Value::known(Fr::zero()),
+                    )?;
+                }
+                Ok(())
+            },
+        )?;
+
+        Ok(())
+    }
+}
diff --git a/aggregator/src/tests/mock_chunk/circuit_ext.rs b/aggregator/src/tests/mock_chunk/circuit_ext.rs
new file mode 100644
index 0000000000..cfc6b1aec5
--- /dev/null
+++ b/aggregator/src/tests/mock_chunk/circuit_ext.rs
@@ -0,0 +1,38 @@
+use std::iter;
+
+use halo2_proofs::halo2curves::bn256::Fr;
+use snark_verifier_sdk::CircuitExt;
+
+use crate::CHAIN_ID_LEN;
+
+use super::MockChunkCircuit;
+
+impl CircuitExt<Fr> for MockChunkCircuit {
+    /// 64 elements from digest
+    fn num_instance(&self) -> Vec<usize> {
+        let acc_len = if self.is_fresh { 0 } else { 12 };
+        vec![64 + CHAIN_ID_LEN + acc_len]
+    }
+
+    /// return vec![data hash | public input hash]
+    fn instances(&self) -> Vec<Vec<Fr>> {
+        let acc_len = if self.is_fresh { 0 } else { 12 };
+        vec![iter::repeat(0)
+            .take(acc_len)
+            .chain(
+                self.chain_id
+                    .to_be_bytes()
+                    .iter()
+                    .chain(
+                        self.chunk
+                            .data_hash
+                            .as_bytes()
+                            .iter()
+                            .chain(self.chunk.public_input_hash().as_bytes().iter()),
+                    )
+                    .copied(),
+            )
+            .map(|x| Fr::from(x as u64))
+            .collect()]
+    }
+}
diff --git a/aggregator/src/tests/mock_chunk/config.rs b/aggregator/src/tests/mock_chunk/config.rs
new file mode 100644
index 0000000000..d5e7f4a4f0
--- /dev/null
+++ b/aggregator/src/tests/mock_chunk/config.rs
@@ -0,0 +1,60 @@
+use halo2_proofs::{
+    halo2curves::bn256::Fr,
+    plonk::{Advice, Column, ConstraintSystem, Fixed, Instance},
+    poly::Rotation,
+};
+use snark_verifier::loader::halo2::halo2_ecc::halo2_base::halo2_proofs;
+
+#[derive(Clone, Copy)]
+pub(crate) struct MockPlonkConfig {
+    pub(crate) a: Column<Advice>,
+    pub(crate) b: Column<Advice>,
+    pub(crate) c: Column<Advice>,
+    pub(crate) q_a: Column<Fixed>,
+    pub(crate) q_b: Column<Fixed>,
+    pub(crate) q_c: Column<Fixed>,
+    pub(crate) q_ab: Column<Fixed>,
+    pub(crate) constant: Column<Fixed>,
+    #[allow(dead_code)]
+    pub(crate) instance: Column<Instance>,
+}
+
+impl MockPlonkConfig {
+    pub(crate) fn configure(meta: &mut ConstraintSystem<Fr>) -> Self {
+        let [a, b, c] = [(); 3].map(|_| meta.advice_column());
+        let [q_a, q_b, q_c, q_ab, constant] = [(); 5].map(|_| meta.fixed_column());
+        let instance = meta.instance_column();
+
+        [a, b, c].map(|column| meta.enable_equality(column));
+
+        meta.create_gate(
+            "q_a·a + q_b·b + q_c·c + q_ab·a·b + constant + instance = 0",
+            |meta| {
+                let [a, b, c] = [a, b, c].map(|column| meta.query_advice(column, Rotation::cur()));
+                let [q_a, q_b, q_c, q_ab, constant] = [q_a, q_b, q_c, q_ab, constant]
+                    .map(|column| meta.query_fixed(column, Rotation::cur()));
+                let instance = meta.query_instance(instance, Rotation::cur());
+                Some(
+                    q_a * a.clone()
+                        + q_b * b.clone()
+                        + q_c * c
+                        + q_ab * a * b
+                        + constant
+                        + instance,
+                )
+            },
+        );
+
+        MockPlonkConfig {
+            a,
+            b,
+            c,
+            q_a,
+            q_b,
+            q_c,
+            q_ab,
+            constant,
+            instance,
+        }
+    }
+}
diff --git a/aggregator/src/util.rs b/aggregator/src/util.rs
new file mode 100644
index 0000000000..301e30a9b8
--- /dev/null
+++ b/aggregator/src/util.rs
@@ -0,0 +1,82 @@
+use eth_types::Field;
+use halo2_proofs::circuit::AssignedCell;
+
+use crate::{DEFAULT_KECCAK_ROWS, NUM_ROUNDS};
+
+use std::env::var;
+
+pub(crate) fn capacity(num_rows: usize) -> Option<usize> {
+    if num_rows > 0 {
+        // Subtract two for unusable rows
+        Some(num_rows / ((NUM_ROUNDS + 1) * get_num_rows_per_round()) - 2)
+    } else {
+        None
+    }
+}
+
+pub(crate) fn get_num_rows_per_round() -> usize {
+    var("KECCAK_ROWS")
+        .unwrap_or_else(|_| format!("{DEFAULT_KECCAK_ROWS}"))
+        .parse()
+        .expect("Cannot parse KECCAK_ROWS env var as usize")
+}
+
+/// Return
+/// - the indices of the rows that contain the input preimages
+/// - the indices of the rows that contain the output digest
+pub(crate) fn get_indices(preimages: &[Vec<u8>]) -> (Vec<usize>, Vec<usize>) {
+    let mut preimage_indices = vec![];
+    let mut digest_indices = vec![];
+    let mut round_ctr = 0;
+
+    for preimage in preimages.iter() {
+        //  136 = 17 * 8 is the size in bits of each
+        //  input chunk that can be processed by Keccak circuit using absorb
+        //  each chunk of size 136 needs 300 Keccak circuit rows to prove
+        //  which consists of 12 Keccak rows for each of 24 + 1 Keccak cicuit rounds
+        //  digest only happens at the end of the last input chunk with
+        //  4 Keccak circuit rounds, so 48 Keccak rows, and 300 - 48 = 256
+        let num_rounds = 1 + preimage.len() / 136;
+        let mut preimage_padded = preimage.clone();
+        preimage_padded.resize(136 * num_rounds, 0);
+        for (i, round) in preimage_padded.chunks(136).enumerate() {
+            // indices for preimages
+            for (j, _chunk) in round.chunks(8).into_iter().enumerate() {
+                for k in 0..8 {
+                    preimage_indices.push(round_ctr * 300 + j * 12 + k + 12)
+                }
+            }
+            // indices for digests
+            if i == num_rounds - 1 {
+                for j in 0..4 {
+                    for k in 0..8 {
+                        digest_indices.push(round_ctr * 300 + j * 12 + k + 252)
+                    }
+                }
+            }
+            round_ctr += 1;
+        }
+    }
+
+    debug_assert!(is_ascending(&preimage_indices));
+    debug_assert!(is_ascending(&digest_indices));
+
+    (preimage_indices, digest_indices)
+}
+
+#[inline]
+// assert two cells have same value
+// (NOT constraining equality in circuit)
+pub(crate) fn assert_equal<F: Field>(a: &AssignedCell<F, F>, b: &AssignedCell<F, F>) {
+    let mut t1 = F::default();
+    let mut t2 = F::default();
+    a.value().map(|f| t1 = *f);
+    b.value().map(|f| t2 = *f);
+    assert_eq!(t1, t2)
+}
+
+#[inline]
+// assert that the slice is ascending
+fn is_ascending(a: &[usize]) -> bool {
+    a.windows(2).all(|w| w[0] <= w[1])
+}
diff --git a/aggregator/tests.sh b/aggregator/tests.sh
new file mode 100755
index 0000000000..e871301512
--- /dev/null
+++ b/aggregator/tests.sh
@@ -0,0 +1,8 @@
+RUST_LOG=trace MODE=greeter cargo test --release --features=print-trace test_mock_chunk_prover -- --nocapture 2>&1 | tee mock_chunk.log
+RUST_LOG=trace MODE=greeter cargo test --release --features=print-trace test_mock_aggregation -- --nocapture 2>&1 | tee mock_aggregation.log
+RUST_LOG=trace MODE=greeter cargo test --release --features=print-trace test_mock_compression -- --nocapture 2>&1 | tee compression.log
+
+# the following 3 tests takes super long time
+# RUST_LOG=trace MODE=greeter cargo test --release --features=print-trace test_aggregation_circuit -- --ignored --nocapture 2>&1 | tee aggregation.log
+# RUST_LOG=trace MODE=greeter cargo test --release --features=print-trace test_two_layer_proof_compression -- --ignored --nocapture 2>&1 | tee compression_2_layer.log
+# RUST_LOG=trace MODE=greeter cargo test --release --features=print-trace test_e2e -- --ignored --nocapture 2>&1 | tee aggregation_e2e.log
diff --git a/zkevm-circuits/src/keccak_circuit.rs b/zkevm-circuits/src/keccak_circuit.rs
index fe12af8890..8f45588d16 100644
--- a/zkevm-circuits/src/keccak_circuit.rs
+++ b/zkevm-circuits/src/keccak_circuit.rs
@@ -2,7 +2,7 @@
 mod cell_manager;
 /// Keccak packed multi
 pub mod keccak_packed_multi;
-mod param;
+pub(crate) mod param;
 mod table;
 /// Util
 mod util;
@@ -57,7 +57,7 @@ pub struct KeccakCircuitConfig<F> {
     q_padding_last: Column<Fixed>,
     /// The columns for other circuits to lookup Keccak hash results
     pub keccak_table: KeccakTable,
-    /// Expose the columns that stores the cells for hash input/output
+    /// The cell manager that stores/allocates the advice columns
     pub cell_manager: CellManager<F>,
     round_cst: Column<Fixed>,
     normalize_3: [TableColumn; 2],
@@ -69,6 +69,7 @@ pub struct KeccakCircuitConfig<F> {
 }
 
 /// Circuit configuration arguments
+#[derive(Debug, Clone)]
 pub struct KeccakCircuitConfigArgs<F: Field> {
     /// KeccakTable
     pub keccak_table: KeccakTable,
@@ -303,7 +304,7 @@ impl<F: Field> SubCircuitConfig<F> for KeccakCircuitConfig<F> {
         // multiple rows with lookups in a way that doesn't require any
         // extra additional cells or selectors we have to put all `s[i]`'s on the same
         // row. This isn't that strong of a requirement actually because we the
-        // words are split into multipe parts, and so only the parts at the same
+        // words are split into multiple parts, and so only the parts at the same
         // position of those words need to be on the same row.
         let target_word_sizes = target_part_sizes(part_size);
         let num_word_parts = target_word_sizes.len();
@@ -868,6 +869,7 @@ impl<F: Field> SubCircuitConfig<F> for KeccakCircuitConfig<F> {
 }
 
 impl<F: Field> KeccakCircuitConfig<F> {
+    /// Assign the circuit for hash function
     pub(crate) fn assign(
         &self,
         layouter: &mut impl Layouter<F>,
@@ -960,7 +962,7 @@ impl<F: Field> KeccakCircuitConfig<F> {
         Ok(res)
     }
 
-    /// Load the auxiliary table for keccak table.
+    /// Load the auxiliary tables for keccak circuit
     pub fn load_aux_tables(&self, layouter: &mut impl Layouter<F>) -> Result<(), Error> {
         load_normalize_table(layouter, "normalize_6", &self.normalize_6, 6u64)?;
         load_normalize_table(layouter, "normalize_4", &self.normalize_4, 4u64)?;
@@ -989,7 +991,12 @@ impl<F: Field> KeccakCircuitConfig<F> {
 /// KeccakCircuit
 #[derive(Default, Clone, Debug)]
 pub struct KeccakCircuit<F: Field> {
+    // The input is a two dimensional vector
+    // Each input row is a pre-image of the hash
+    // The output row of the hash, i.e., the digest is NOT part of the circuit input
     inputs: Vec<Vec<u8>>,
+    // The maximum number of rows, for example, 2^20
+    // This needs to be large enough for the circuit.
     num_rows: usize,
     _marker: PhantomData<F>,
 }
@@ -1001,7 +1008,7 @@ impl<F: Field> SubCircuit<F> for KeccakCircuit<F> {
         keccak_unusable_rows()
     }
 
-    /// The `block.circuits_params.keccak_padding` parmeter, when enabled, sets
+    /// The `block.circuits_params.keccak_padding` parameter, when enabled, sets
     /// up the circuit to support a fixed number of permutations/keccak_f's,
     /// independently of the permutations required by `inputs`.
     fn new_from_block(block: &witness::Block<F>) -> Self {
diff --git a/zkevm-circuits/src/keccak_circuit/cell_manager.rs b/zkevm-circuits/src/keccak_circuit/cell_manager.rs
index de58ad619e..a2fdb30e57 100644
--- a/zkevm-circuits/src/keccak_circuit/cell_manager.rs
+++ b/zkevm-circuits/src/keccak_circuit/cell_manager.rs
@@ -153,7 +153,7 @@ impl<F: FieldExt> CellManager<F> {
         self.rows.iter().cloned().max().unwrap()
     }
 
-    /// expose the columns used for keccak cell
+    /// Expose the columns used by the cell manager by reference.
     pub fn columns(&self) -> &[CellColumn<F>] {
         &self.columns
     }
diff --git a/zkevm-circuits/src/keccak_circuit/keccak_packed_multi.rs b/zkevm-circuits/src/keccak_circuit/keccak_packed_multi.rs
index 90fd3e6ddd..6a97fe221d 100644
--- a/zkevm-circuits/src/keccak_circuit/keccak_packed_multi.rs
+++ b/zkevm-circuits/src/keccak_circuit/keccak_packed_multi.rs
@@ -67,11 +67,11 @@ pub struct KeccakRow<F: Field> {
     pub(crate) q_padding: bool,
     pub(crate) q_padding_last: bool,
     pub(crate) round_cst: F,
-    /// if the row is the last row of the current keccak hash
+    /// if the row is the last row of the current keccak round
     pub is_final: bool,
     /// the value of the cells that are to be assigned
     pub cell_values: Vec<F>,
-    /// the length of the hash input
+    /// The input length of the hash function
     pub length: usize,
     pub(crate) data_rlc: Value<F>,
     pub(crate) hash_rlc: Value<F>,
@@ -423,7 +423,7 @@ pub(crate) mod transform {
     }
 }
 
-// Transfroms values to cells
+// Transforms values to cells
 pub(crate) mod transform_to {
     use super::{Cell, KeccakRegion, Part, PartValue};
     use crate::{
@@ -492,7 +492,10 @@ pub(crate) mod transform_to {
     }
 }
 
-fn keccak_rows<F: Field>(bytes: &[u8], challenges: Challenges<Value<F>>) -> Vec<KeccakRow<F>> {
+pub(crate) fn keccak_rows<F: Field>(
+    bytes: &[u8],
+    challenges: Challenges<Value<F>>,
+) -> Vec<KeccakRow<F>> {
     let mut rows = Vec::new();
     keccak(&mut rows, bytes, challenges);
     rows
@@ -868,17 +871,18 @@ pub fn multi_keccak<F: Field>(
         });
     }
 
-    // Dedup actual keccaks
+    // // Dedup actual keccaks
     // let inputs_len: usize = bytes.iter().map(|k| k.len()).sum();
     // let inputs_num = bytes.len();
     // for (idx, bytes) in bytes.iter().enumerate() {
-    // debug!("{}th keccak is of len {}", idx, bytes.len());
+    //     println!("{}th keccak is of len {}", idx, bytes.len());
     // }
     // let bytes: Vec<_> = bytes.iter().unique().collect();
     // let inputs_len2: usize = bytes.iter().map(|k| k.len()).sum();
     // let inputs_num2 = bytes.len();
-    // debug!("after dedup inputs, input num {inputs_num}->{inputs_num2}, input total len
-    // {inputs_len}->{inputs_len2}");
+    // println!(
+    //     "after dedup inputs, input num {inputs_num}->{inputs_num2}, input total len
+    // {inputs_len}->{inputs_len2}" );
 
     // TODO: optimize the `extend` using Iter?
     let real_rows: Vec<_> = bytes
diff --git a/zkevm-circuits/src/sig_circuit.rs b/zkevm-circuits/src/sig_circuit.rs
index 9e5266f843..f976386b33 100644
--- a/zkevm-circuits/src/sig_circuit.rs
+++ b/zkevm-circuits/src/sig_circuit.rs
@@ -101,9 +101,9 @@ impl<F: Field> SubCircuitConfig<F> for SigCircuitConfig<F> {
         let num_advice = [calc_required_advices(MAX_NUM_SIG), 1];
 
         #[cfg(feature = "onephase")]
-        log::debug!("configuring ECDSA chip with single phase");
+        log::info!("configuring ECDSA chip with single phase");
         #[cfg(not(feature = "onephase"))]
-        log::debug!("configuring ECDSA chip with multiple phases");
+        log::info!("configuring ECDSA chip with multiple phases");
 
         // halo2-ecc's ECDSA config
         //
@@ -766,7 +766,7 @@ impl<F: Field> SigCircuit<F> {
                     .collect::<Result<Vec<SignDataDecomposed<F>>, Error>>()?;
 
                 // IMPORTANT: Move to Phase2 before RLC
-                log::debug!("before proceeding to the next phase");
+                log::info!("before proceeding to the next phase");
                 ctx.print_stats(&["Range"]);
 
                 #[cfg(not(feature = "onephase"))]
@@ -827,7 +827,7 @@ impl<F: Field> SigCircuit<F> {
                 // check lookups
                 // This is not optional.
                 let lookup_cells = ecdsa_chip.finalize(&mut ctx);
-                log::debug!("total number of lookup cells: {}", lookup_cells);
+                log::info!("total number of lookup cells: {}", lookup_cells);
 
                 ctx.print_stats(&["Range"]);
                 Ok(assigned_keccak_values_and_sigs