lambdaclass · diegokingston · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026
@@ -193,13 +193,10 @@ where
         &self,
         values: &HashMap<Variable, FieldElement<F>>,
     ) -> Vec<FieldElement<F>> {
-        let mut public_inputs = Vec::new();
-        for key in &self.public_input_variables {
-            if let Some(value) = values.get(key) {
-                public_inputs.push(value.clone());
-            }
-        }
-        public_inputs
+        self.public_input_variables
+            .iter()
+            .filter_map(|key| values.get(key).cloned())
+            .collect()
     }
 }
 

@@ -19,18 +19,18 @@ where
         mut assignments: HashMap<Variable, FE<F>>,
     ) -> Result<HashMap<Variable, FE<F>>, SolverError> {
         loop {
-            let old_solved = assignments.keys().len();
-            for constraint in self.constraints.iter() {
+            let old_solved = assignments.len();
+            for constraint in &self.constraints {
                 assignments = solve_hint(assignments, constraint);
                 assignments = solve_constraint(assignments, constraint);
             }
-            if old_solved == assignments.keys().len() {
+            if old_solved == assignments.len() {
                 break;
             }
         }
 
         // Check the system is solved
-        for constraint in self.constraints.iter() {
+        for constraint in &self.constraints {
             let a = assignments.get(&constraint.l);
             let b = assignments.get(&constraint.r);
             let c = assignments.get(&constraint.o);

@@ -334,28 +334,41 @@ where
         gamma: FieldElement<F>,
     ) -> Round2Result<F, CS::Commitment> {
         let cpi = common_preprocessed_input;
-        let mut coefficients: Vec<FieldElement<F>> = vec![FieldElement::one()];
         let (s1, s2, s3) = (&cpi.s1_lagrange, &cpi.s2_lagrange, &cpi.s3_lagrange);
 
         let k2 = &cpi.k1 * &cpi.k1;
 
         let lp = |w: &FieldElement<F>, eta: &FieldElement<F>| w + &beta * eta + &gamma;
 
-        for i in 0..&cpi.n - 1 {
+        // Compute all numerators and denominators first.
+        // We need n-1 factors to compute n coefficients: z[0]=1, z[i+1]=z[i]*factor[i] for i in 0..n-1.
+        // This matches the original loop range `0..cpi.n - 1`.
+        let n_minus_1 = cpi.n - 1;
+        let mut numerators = Vec::with_capacity(n_minus_1);
+        let mut denominators = Vec::with_capacity(n_minus_1);
+
+        for i in 0..n_minus_1 {
             let (a_i, b_i, c_i) = (&witness.a[i], &witness.b[i], &witness.c[i]);
             let num = lp(a_i, &cpi.domain[i])
                 * lp(b_i, &(&cpi.domain[i] * &cpi.k1))
                 * lp(c_i, &(&cpi.domain[i] * &k2));
             let den = lp(a_i, &s1[i]) * lp(b_i, &s2[i]) * lp(c_i, &s3[i]);
-            // den != 0 with overwhelming probability because beta and gamma are random elements.
-            let new_factor = (num / den).expect(
-                "division by zero in permutation polynomial: beta and gamma should prevent this",
-            );
-
-            let new_term = coefficients
-                .last()
-                .expect("coefficients vector is non-empty")
-                * &new_factor;
+            numerators.push(num);
+            denominators.push(den);
+        }
+
+        // Batch invert all denominators at once (much faster than n-1 individual inversions)
+        FieldElement::inplace_batch_inverse(&mut denominators).expect(
+            "batch inversion failed in permutation polynomial: beta and gamma should prevent zeros",
+        );
+
+        // Compute coefficients using the inverted denominators
+        let mut coefficients: Vec<FieldElement<F>> = Vec::with_capacity(cpi.n);
+        coefficients.push(FieldElement::one());
+
+        for i in 0..n_minus_1 {
+            let factor = &numerators[i] * &denominators[i];
+            let new_term = coefficients.last().expect("coefficients non-empty") * &factor;
             coefficients.push(new_term);
         }
 
@@ -386,10 +399,6 @@ where
         let cpi = common_preprocessed_input;
         let k2 = &cpi.k1 * &cpi.k1;
 
-        let one = Polynomial::new_monomial(FieldElement::one(), 0);
-        let p_x = &Polynomial::new_monomial(FieldElement::<F>::one(), 1);
-        let zh = Polynomial::new_monomial(FieldElement::<F>::one(), cpi.n) - &one;
-
         let z_x_omega_coefficients: Vec<FieldElement<F>> = p_z
             .coefficients()
             .iter()
@@ -430,8 +439,25 @@ where
             .expect("FFT evaluation of qc must be within field's two-adicity limit");
         let p_pi_eval = Polynomial::evaluate_offset_fft(&p_pi, 1, Some(degree), offset)
             .expect("FFT evaluation of p_pi must be within field's two-adicity limit");
-        let p_x_eval = Polynomial::evaluate_offset_fft(p_x, 1, Some(degree), offset)
-            .expect("FFT evaluation of p_x must be within field's two-adicity limit");
+
+        // Optimization: p_x = X (identity polynomial), so p_x(offset * ω^i) = offset * ω^i.
+        // Generate the coset directly instead of using FFT.
+        // Note: This uses the same primitive root that evaluate_offset_fft uses internally,
+        // since both derive it from F::get_primitive_root_of_unity for the same domain size.
+        let omega = F::get_primitive_root_of_unity(degree.trailing_zeros() as u64)
+            .expect("primitive root exists for degree");
+        let p_x_eval: Vec<_> = (0..degree)
+            .scan(offset.clone(), |current, _| {
+                let val = current.clone();
+                *current = &*current * &omega;
+                Some(val)
+            })
+            .collect();
+        debug_assert_eq!(
+            p_x_eval.len(),
+            p_a_eval.len(),
+            "p_x_eval length must match FFT evaluation length"
+        );
         let p_z_eval = Polynomial::evaluate_offset_fft(p_z, 1, Some(degree), offset)
             .expect("FFT evaluation of p_z must be within field's two-adicity limit");
         let p_z_x_omega_eval = Polynomial::evaluate_offset_fft(&z_x_omega, 1, Some(degree), offset)
@@ -505,11 +531,38 @@ where
             .map(|((p2, p1), co)| (p2 * &alpha + p1) * &alpha + co)
             .collect();
 
-        let mut zh_eval = Polynomial::evaluate_offset_fft(&zh, 1, Some(degree), offset).expect(
-            "FFT evaluation of vanishing polynomial must be within field's two-adicity limit",
+        // Optimization: Z_H(x) = x^n - 1 has only 4 distinct values on a coset of size 4n.
+        // On coset {offset * ω^i : i = 0..4n-1} where ω is primitive 4n-th root:
+        //   Z_H(offset * ω^i) = offset^n * (ω^n)^i - 1
+        // Since ω^n is a 4th root of unity, (ω^n)^i cycles through 4 values.
+        //
+        // SAFETY: This optimization assumes degree == 4 * n. If degree changes (see TODO above),
+        // this optimization must be revisited.
+        debug_assert_eq!(
+            degree,
+            4 * cpi.n,
+            "Z_H optimization requires degree == 4n; if degree formula changes, update this code"
         );
-        FieldElement::inplace_batch_inverse(&mut zh_eval)
-            .expect("vanishing polynomial evaluations are non-zero because evaluated on coset offset from the roots of unity");
+        let omega_4n = F::get_primitive_root_of_unity(degree.trailing_zeros() as u64)
+            .expect("primitive root exists for degree");
+        let omega_n = omega_4n.pow(cpi.n as u64); // ω^n where ω is 4n-th root; this is a 4th root of unity
+        let offset_to_n = offset.pow(cpi.n as u64);
+
+        // Compute the 4 distinct Z_H values and their inverses
+        // Use multiplication chain for small powers (faster than pow)
+        let omega_n_sq = &omega_n * &omega_n;
+        let omega_n_cubed = &omega_n_sq * &omega_n;
+        let mut zh_base = [
+            &offset_to_n - FieldElement::<F>::one(), // i ≡ 0 (mod 4)
+            &offset_to_n * &omega_n - FieldElement::<F>::one(), // i ≡ 1 (mod 4)
+            &offset_to_n * &omega_n_sq - FieldElement::<F>::one(), // i ≡ 2 (mod 4)
+            &offset_to_n * &omega_n_cubed - FieldElement::<F>::one(), // i ≡ 3 (mod 4)
+        ];
+        FieldElement::inplace_batch_inverse(&mut zh_base)
+            .expect("Z_H evaluations are non-zero on coset offset from roots of unity");
+
+        // Build full evaluation vector by cycling through the 4 values
+        let zh_eval: Vec<_> = (0..degree).map(|i| zh_base[i % 4].clone()).collect();
         let c: Vec<_> = p_eval
             .iter()
             .zip(zh_eval.iter())
@@ -583,11 +636,19 @@ where
         let (r1, r2, r3, r4) = (round_1, round_2, round_3, round_4);
         // Precompute variables
         let k2 = &cpi.k1 * &cpi.k1;
-        let zeta_raised_n = Polynomial::new_monomial(r4.zeta.pow(cpi.n + 2), 0); // TODO: Paper says n and 2n, but Gnark uses n+2 and 2n+4
-        let zeta_raised_2n = Polynomial::new_monomial(r4.zeta.pow(2 * cpi.n + 4), 0);
+
+        // Compute zeta powers efficiently: zeta^n, zeta^(n+2), zeta^(2n+4)
+        // Start with zeta^n, then derive others to avoid redundant exponentiations
+        let zeta_n = r4.zeta.pow(cpi.n as u64);
+        let zeta_sq = &r4.zeta * &r4.zeta;
+        let zeta_n_plus_2 = &zeta_n * &zeta_sq; // zeta^(n+2) = zeta^n * zeta^2
+        let zeta_2n_plus_4 = &zeta_n_plus_2 * &zeta_n_plus_2; // zeta^(2n+4) = (zeta^(n+2))^2
+
+        let zeta_raised_n = Polynomial::new_monomial(zeta_n_plus_2, 0); // TODO: Paper says n and 2n, but Gnark uses n+2 and 2n+4
+        let zeta_raised_2n = Polynomial::new_monomial(zeta_2n_plus_4, 0);
 
         // zeta is sampled outside the set of roots of unity so zeta != 1, and n != 0.
-        let l1_zeta = ((&r4.zeta.pow(cpi.n as u64) - FieldElement::<F>::one())
+        let l1_zeta = ((&zeta_n - FieldElement::<F>::one())
             / ((&r4.zeta - FieldElement::<F>::one()) * FieldElement::<F>::from(cpi.n as u64)))
         .expect("zeta is outside roots of unity so denominator is non-zero");
 
@@ -606,10 +667,11 @@ where
             * &r2.beta
             * &r4.z_zeta_omega
             * &cpi.s3;
+        let alpha_squared = &r3.alpha * &r3.alpha;
         p_non_constant += (r_2_2 - r_2_1) * &r3.alpha;
 
         let r_3 = &r2.p_z * l1_zeta;
-        p_non_constant += r_3 * &r3.alpha * &r3.alpha;
+        p_non_constant += r_3 * &alpha_squared;
 
         let partial_t = &r3.p_t_lo + zeta_raised_n * &r3.p_t_mid + zeta_raised_2n * &r3.p_t_hi;
 

@@ -34,20 +34,28 @@ pub fn test_srs(n: usize) -> StructuredReferenceString<G1Point, G2Point> {
     let g1 = <BLS12381Curve as IsEllipticCurve>::generator();
     let g2 = <BLS12381TwistCurve as IsEllipticCurve>::generator();
 
+    // Use iterative multiplication instead of pow() for efficiency
     let powers_main_group: Vec<G1Point> = (0..n + 3)
-        .map(|exp| g1.operate_with_self(s.pow(exp as u64).canonical()))
+        .scan(FrElement::one(), |s_power, _| {
+            let result = g1.operate_with_self(s_power.canonical());
+            *s_power = &*s_power * &s;
+            Some(result)
+        })
         .collect();
     let powers_secondary_group = [g2.clone(), g2.operate_with_self(s.canonical())];
 
     StructuredReferenceString::new(&powers_main_group, &powers_secondary_group)
 }
 
-/// Generates a domain to interpolate: 1, omega, omega², ..., omega^size
+/// Generates a domain to interpolate: 1, omega, omega², ..., omega^(size-1)
 pub fn generate_domain<F: IsField>(omega: &FieldElement<F>, size: usize) -> Vec<FieldElement<F>> {
-    (1..size).fold(vec![FieldElement::one()], |mut acc, _| {
-        acc.push(acc.last().unwrap() * omega);
-        acc
-    })
+    (0..size)
+        .scan(FieldElement::one(), |power, _| {
+            let result = power.clone();
+            *power = &*power * omega;
+            Some(result)
+        })
+        .collect()
 }
 
 /// Generates the permutation coefficients for the copy constraints.
@@ -59,10 +67,11 @@ pub fn generate_permutation_coefficients<F: IsField>(
     order_r_minus_1_root_unity: &FieldElement<F>,
 ) -> Vec<FieldElement<F>> {
     let identity = identity_permutation(omega, n, order_r_minus_1_root_unity);
-    let permuted: Vec<FieldElement<F>> = (0..n * 3)
-        .map(|i| identity[permutation[i]].clone())
-        .collect();
-    permuted
+    permutation
+        .iter()
+        .take(n * 3)
+        .map(|&i| identity[i].clone())
+        .collect()
 }
 
 /// The identity permutation, auxiliary function to generate the copy constraints.
@@ -72,11 +81,27 @@ fn identity_permutation<F: IsField>(
     order_r_minus_1_root_unity: &FieldElement<F>,
 ) -> Vec<FieldElement<F>> {
     let u = order_r_minus_1_root_unity;
-    let mut result: Vec<FieldElement<F>> = vec![];
-    for index_column in 0..=2 {
-        for index_row in 0..n {
-            result.push(w.pow(index_row) * u.pow(index_column as u64));
-        }
+    let u_sq = u * u;
+
+    // Precompute w^i for i in 0..n using iterative multiplication
+    let w_powers: Vec<_> = (0..n)
+        .scan(FieldElement::one(), |w_power, _| {
+            let result = w_power.clone();
+            *w_power = &*w_power * w;
+            Some(result)
+        })
+        .collect();
+
+    // Build result: [w^i * u^0, w^i * u^1, w^i * u^2] for each column
+    let mut result = Vec::with_capacity(3 * n);
+    for w_i in &w_powers {
+        result.push(w_i.clone());
+    }
+    for w_i in &w_powers {
+        result.push(w_i * u);
+    }
+    for w_i in &w_powers {
+        result.push(w_i * &u_sq);
     }
     result
 }

@@ -326,17 +326,26 @@ impl<F: IsField + IsFFTField + HasDefaultTranscript, CS: IsCommitmentScheme<F>>
         FieldElement<F>: ByteConversion,
     {
         let [beta, gamma, alpha, zeta, upsilon] = self.compute_challenges(p, vk, public_input);
-        let zh_zeta = zeta.pow(input.n) - FieldElement::<F>::one();
 
         let k1 = &input.k1;
         let k2 = k1 * k1;
 
+        // Precompute zeta powers efficiently
+        let zeta_n = zeta.pow(input.n as u64);
+        let zeta_sq = &zeta * &zeta;
+        let zeta_n_plus_2 = &zeta_n * &zeta_sq;
+        let zeta_2n_plus_4 = &zeta_n_plus_2 * &zeta_n_plus_2;
+        let zh_zeta = &zeta_n - FieldElement::<F>::one();
+
         // We are using that zeta != 0 because is sampled outside the set of roots of unity,
         // and n != 0 because is the length of the trace.
-        let l1_zeta = ((zeta.pow(input.n as u64) - FieldElement::<F>::one())
+        let l1_zeta = (&zh_zeta
             / ((&zeta - FieldElement::<F>::one()) * FieldElement::from(input.n as u64)))
         .expect("zeta is outside roots of unity so denominator is non-zero");
 
+        // Precompute alpha^2 for reuse
+        let alpha_squared = &alpha * &alpha;
+
         // Use the following equality to compute PI(ζ)
         // without interpolating:
         // Lᵢ₊₁ = ω Lᵢ (X − ωⁱ) / (X − ωⁱ⁺¹)
@@ -362,7 +371,7 @@ impl<F: IsField + IsFFTField + HasDefaultTranscript, CS: IsCommitmentScheme<F>>
             * (&p.c_zeta + &gamma)
             * (&p.a_zeta + &beta * &p.s1_zeta + &gamma)
             * (&p.b_zeta + &beta * &p.s2_zeta + &gamma);
-        p_constant_zeta = p_constant_zeta - &l1_zeta * &alpha * &alpha;
+        p_constant_zeta = p_constant_zeta - &l1_zeta * &alpha_squared;
         p_constant_zeta += p_pi_zeta;
 
         let p_zeta = p_constant_zeta + &p.p_non_constant_zeta;
@@ -372,14 +381,8 @@ impl<F: IsField + IsFFTField + HasDefaultTranscript, CS: IsCommitmentScheme<F>>
         // Compute commitment of partial evaluation of t (p = zh * t)
         let partial_t_1 = p
             .t_lo_1
-            .operate_with(
-                &p.t_mid_1
-                    .operate_with_self(zeta.pow(input.n + 2).canonical()),
-            )
-            .operate_with(
-                &p.t_hi_1
-                    .operate_with_self(zeta.pow(2 * input.n + 4).canonical()),
-            );
+            .operate_with(&p.t_mid_1.operate_with_self(zeta_n_plus_2.canonical()))
+            .operate_with(&p.t_hi_1.operate_with_self(zeta_2n_plus_4.canonical()));
 
         // Compute commitment of the non constant part of the linearization of p
         // The first term corresponds to the gates constraints
@@ -408,7 +411,7 @@ impl<F: IsField + IsFFTField + HasDefaultTranscript, CS: IsCommitmentScheme<F>>
         // α²*L₁(ζ)*Z(X)
         let third_term = p
             .z_1
-            .operate_with_self((&alpha * &alpha * l1_zeta).canonical());
+            .operate_with_self((&alpha_squared * l1_zeta).canonical());
 
         let p_non_constant_1 = first_term
             .operate_with(&second_term)