Skip to content

Commit 10e2afc

Browse files
committed
neon impl wrap up
1 parent 5b5e645 commit 10e2afc

File tree

2 files changed

+323
-7
lines changed

2 files changed

+323
-7
lines changed

arith/gf2/src/gf2x128/neon.rs

+321-5
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,19 @@
1+
use std::{
2+
arch::aarch64::*,
3+
mem::{transmute, zeroed},
4+
ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign},
5+
};
6+
7+
use arith::{Field, FieldSerde, FieldSerdeResult, SimdField};
8+
9+
use crate::{GF2x64, GF2};
10+
111
#[derive(Clone, Copy, Debug)]
212
pub struct NeonGF2x128 {
313
pub(crate) v: uint32x4_t,
414
}
515

6-
impl FieldSerde for NeonGF2_128 {
16+
impl FieldSerde for NeonGF2x128 {
717
const SERIALIZED_SIZE: usize = 16;
818

919
#[inline(always)]
@@ -17,7 +27,7 @@ impl FieldSerde for NeonGF2_128 {
1727
let mut u = [0u8; 16];
1828
reader.read_exact(&mut u)?;
1929
unsafe {
20-
Ok(NeonGF2_128 {
30+
Ok(NeonGF2x128 {
2131
v: transmute::<[u8; 16], uint32x4_t>(u),
2232
})
2333
}
@@ -31,14 +41,320 @@ impl FieldSerde for NeonGF2_128 {
3141
let mut u = [0u8; 32];
3242
reader.read_exact(&mut u)?;
3343
Ok(unsafe {
34-
NeonGF2_128 {
44+
NeonGF2x128 {
3545
v: transmute::<[u8; 16], uint32x4_t>(u[..16].try_into().unwrap()),
3646
}
3747
})
3848
}
3949
}
40-
// TODO: FieldSerde
4150

42-
// TODO: Field
51+
impl Field for NeonGF2x128 {
52+
const NAME: &'static str = "Galios Field 2 SIMD 128";
53+
54+
const SIZE: usize = 128 / 8;
55+
56+
const FIELD_SIZE: usize = 128; // in bits
57+
58+
const ZERO: Self = NeonGF2x128 {
59+
v: unsafe { zeroed() },
60+
};
61+
62+
const ONE: Self = NeonGF2x128 {
63+
v: unsafe { transmute::<[u64; 2], uint32x4_t>([!0u64, !0u64]) },
64+
};
65+
66+
const INV_2: Self = NeonGF2x128 {
67+
v: unsafe { zeroed() },
68+
}; // should not be used
69+
70+
#[inline(always)]
71+
fn zero() -> Self {
72+
NeonGF2x128 {
73+
v: unsafe { zeroed() },
74+
}
75+
}
76+
77+
#[inline(always)]
78+
fn one() -> Self {
79+
NeonGF2x128 {
80+
v: unsafe { transmute::<[u64; 2], uint32x4_t>([!0u64, !0u64]) },
81+
}
82+
}
83+
84+
#[inline(always)]
85+
fn is_zero(&self) -> bool {
86+
unsafe { transmute::<uint32x4_t, [u8; 16]>(self.v) == [0; 16] }
87+
}
88+
89+
#[inline(always)]
90+
fn random_unsafe(mut rng: impl rand::RngCore) -> Self {
91+
let mut u = [0u8; 16];
92+
rng.fill_bytes(&mut u);
93+
unsafe {
94+
NeonGF2x128 {
95+
v: *(u.as_ptr() as *const uint32x4_t),
96+
}
97+
}
98+
}
99+
100+
#[inline(always)]
101+
fn random_bool(mut rng: impl rand::RngCore) -> Self {
102+
let mut u = [0u8; 16];
103+
rng.fill_bytes(&mut u);
104+
unsafe {
105+
NeonGF2x128 {
106+
v: *(u.as_ptr() as *const uint32x4_t),
107+
}
108+
}
109+
}
110+
111+
#[inline(always)]
112+
fn exp(&self, exponent: u128) -> Self {
113+
if exponent % 2 == 0 {
114+
NeonGF2x128::ONE
115+
} else {
116+
*self
117+
}
118+
}
119+
120+
#[inline(always)]
121+
fn inv(&self) -> Option<Self> {
122+
unimplemented!()
123+
}
124+
125+
#[inline(always)]
126+
fn as_u32_unchecked(&self) -> u32 {
127+
unimplemented!("u32 for GFx128 doesn't make sense")
128+
}
129+
130+
#[inline(always)]
131+
fn from_uniform_bytes(bytes: &[u8; 32]) -> Self {
132+
unsafe {
133+
NeonGF2x128 {
134+
v: transmute::<[u8; 16], uint32x4_t>(bytes[..16].try_into().unwrap()),
135+
}
136+
}
137+
}
138+
}
139+
140+
impl Default for NeonGF2x128 {
141+
#[inline(always)]
142+
fn default() -> Self {
143+
Self::ZERO
144+
}
145+
}
146+
147+
impl PartialEq for NeonGF2x128 {
148+
#[inline(always)]
149+
fn eq(&self, other: &Self) -> bool {
150+
unsafe {
151+
transmute::<uint32x4_t, [u8; 16]>(self.v) == transmute::<uint32x4_t, [u8; 16]>(other.v)
152+
}
153+
}
154+
}
155+
156+
impl Mul<&NeonGF2x128> for NeonGF2x128 {
157+
type Output = NeonGF2x128;
158+
159+
#[inline(always)]
160+
#[allow(clippy::suspicious_arithmetic_impl)]
161+
fn mul(self, rhs: &NeonGF2x128) -> NeonGF2x128 {
162+
NeonGF2x128 {
163+
v: unsafe { vandq_u32(self.v, rhs.v) },
164+
}
165+
}
166+
}
167+
168+
impl Mul<NeonGF2x128> for NeonGF2x128 {
169+
type Output = NeonGF2x128;
170+
171+
#[inline(always)]
172+
#[allow(clippy::suspicious_arithmetic_impl)]
173+
fn mul(self, rhs: NeonGF2x128) -> NeonGF2x128 {
174+
NeonGF2x128 {
175+
v: unsafe { vandq_u32(self.v, rhs.v) },
176+
}
177+
}
178+
}
179+
180+
impl MulAssign<&NeonGF2x128> for NeonGF2x128 {
181+
#[inline(always)]
182+
#[allow(clippy::suspicious_op_assign_impl)]
183+
fn mul_assign(&mut self, rhs: &NeonGF2x128) {
184+
self.v = unsafe { vandq_u32(self.v, rhs.v) };
185+
}
186+
}
187+
188+
impl MulAssign<NeonGF2x128> for NeonGF2x128 {
189+
#[inline(always)]
190+
#[allow(clippy::suspicious_op_assign_impl)]
191+
fn mul_assign(&mut self, rhs: NeonGF2x128) {
192+
self.v = unsafe { vandq_u32(self.v, rhs.v) };
193+
}
194+
}
195+
196+
impl Sub for NeonGF2x128 {
197+
type Output = NeonGF2x128;
198+
199+
#[inline(always)]
200+
#[allow(clippy::suspicious_arithmetic_impl)]
201+
fn sub(self, rhs: NeonGF2x128) -> NeonGF2x128 {
202+
NeonGF2x128 {
203+
v: unsafe { veorq_u32(self.v, rhs.v) },
204+
}
205+
}
206+
}
207+
208+
impl SubAssign for NeonGF2x128 {
209+
#[inline(always)]
210+
#[allow(clippy::suspicious_op_assign_impl)]
211+
fn sub_assign(&mut self, rhs: NeonGF2x128) {
212+
self.v = unsafe { veorq_u32(self.v, rhs.v) };
213+
}
214+
}
215+
216+
impl Add for NeonGF2x128 {
217+
type Output = NeonGF2x128;
218+
219+
#[inline(always)]
220+
#[allow(clippy::suspicious_arithmetic_impl)]
221+
fn add(self, rhs: NeonGF2x128) -> NeonGF2x128 {
222+
NeonGF2x128 {
223+
v: unsafe { veorq_u32(self.v, rhs.v) },
224+
}
225+
}
226+
}
227+
228+
impl AddAssign for NeonGF2x128 {
229+
#[inline(always)]
230+
#[allow(clippy::suspicious_op_assign_impl)]
231+
fn add_assign(&mut self, rhs: NeonGF2x128) {
232+
self.v = unsafe { veorq_u32(self.v, rhs.v) };
233+
}
234+
}
235+
236+
impl Add<&NeonGF2x128> for NeonGF2x128 {
237+
type Output = NeonGF2x128;
238+
239+
#[inline(always)]
240+
#[allow(clippy::suspicious_arithmetic_impl)]
241+
fn add(self, rhs: &NeonGF2x128) -> NeonGF2x128 {
242+
NeonGF2x128 {
243+
v: unsafe { veorq_u32(self.v, rhs.v) },
244+
}
245+
}
246+
}
247+
248+
impl AddAssign<&NeonGF2x128> for NeonGF2x128 {
249+
#[inline(always)]
250+
#[allow(clippy::suspicious_op_assign_impl)]
251+
fn add_assign(&mut self, rhs: &NeonGF2x128) {
252+
self.v = unsafe { veorq_u32(self.v, rhs.v) };
253+
}
254+
}
255+
256+
impl Sub<&NeonGF2x128> for NeonGF2x128 {
257+
type Output = NeonGF2x128;
258+
259+
#[inline(always)]
260+
#[allow(clippy::suspicious_arithmetic_impl)]
261+
fn sub(self, rhs: &NeonGF2x128) -> NeonGF2x128 {
262+
NeonGF2x128 {
263+
v: unsafe { veorq_u32(self.v, rhs.v) },
264+
}
265+
}
266+
}
267+
268+
impl SubAssign<&NeonGF2x128> for NeonGF2x128 {
269+
#[inline(always)]
270+
#[allow(clippy::suspicious_op_assign_impl)]
271+
fn sub_assign(&mut self, rhs: &NeonGF2x128) {
272+
self.v = unsafe { veorq_u32(self.v, rhs.v) };
273+
}
274+
}
275+
276+
impl<T: std::borrow::Borrow<NeonGF2x128>> std::iter::Sum<T> for NeonGF2x128 {
277+
fn sum<I: Iterator<Item = T>>(iter: I) -> Self {
278+
iter.fold(Self::zero(), |acc, item| acc + item.borrow())
279+
}
280+
}
281+
282+
impl<T: std::borrow::Borrow<NeonGF2x128>> std::iter::Product<T> for NeonGF2x128 {
283+
fn product<I: Iterator<Item = T>>(iter: I) -> Self {
284+
iter.fold(Self::one(), |acc, item| acc * item.borrow())
285+
}
286+
}
287+
288+
impl Neg for NeonGF2x128 {
289+
type Output = NeonGF2x128;
290+
291+
#[inline(always)]
292+
#[allow(clippy::suspicious_arithmetic_impl)]
293+
fn neg(self) -> NeonGF2x128 {
294+
NeonGF2x128 { v: self.v }
295+
}
296+
}
297+
298+
impl From<u32> for NeonGF2x128 {
299+
#[inline(always)]
300+
fn from(v: u32) -> Self {
301+
assert!(v < 2);
302+
if v == 0 {
303+
NeonGF2x128::ZERO
304+
} else {
305+
NeonGF2x128::ONE
306+
}
307+
}
308+
}
309+
310+
impl From<GF2> for NeonGF2x128 {
311+
#[inline(always)]
312+
fn from(v: GF2) -> Self {
313+
assert!(v.v < 2);
314+
if v.v == 0 {
315+
NeonGF2x128::ZERO
316+
} else {
317+
NeonGF2x128::ONE
318+
}
319+
}
320+
}
43321

44322
// TODO: SimdField
323+
324+
impl SimdField for NeonGF2x128 {
325+
type Scalar = GF2;
326+
327+
const PACK_SIZE: usize = 128;
328+
329+
#[inline(always)]
330+
fn scale(&self, challenge: &Self::Scalar) -> Self {
331+
if challenge.v == 0 {
332+
Self::ZERO
333+
} else {
334+
*self
335+
}
336+
}
337+
338+
#[inline(always)]
339+
fn pack(base_vec: &[Self::Scalar]) -> Self {
340+
assert_eq!(base_vec.len(), Self::PACK_SIZE);
341+
let mut packed_to_gf2x64 = [GF2x64::ZERO; Self::PACK_SIZE / GF2x64::PACK_SIZE];
342+
packed_to_gf2x64
343+
.iter_mut()
344+
.zip(base_vec.chunks(GF2x64::PACK_SIZE))
345+
.for_each(|(gf2x64, pack)| *gf2x64 = GF2x64::pack(pack));
346+
347+
unsafe { transmute(packed_to_gf2x64) }
348+
}
349+
350+
#[inline(always)]
351+
fn unpack(&self) -> Vec<Self::Scalar> {
352+
let packed_to_gf2x64: [GF2x64; Self::PACK_SIZE / GF2x64::PACK_SIZE] =
353+
unsafe { transmute(*self) };
354+
355+
packed_to_gf2x64
356+
.iter()
357+
.flat_map(|packed| packed.unpack())
358+
.collect()
359+
}
360+
}

arith/gf2/src/lib.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ pub use gf2x8::GF2x8;
99
mod gf2x64;
1010
pub use gf2x64::GF2x64;
1111

12-
// mod gf2x128;
13-
// pub use gf2x128::GF2x128;
12+
mod gf2x128;
13+
pub use gf2x128::GF2x128;
1414

1515
#[cfg(test)]
1616
mod tests;

0 commit comments

Comments
 (0)