Skip to content

Commit d1c1a98

Browse files
committed
Move message words gather to SIMD code
For each round, BLAKE2 loads a different set of words from the message, controlled by the SIGMA array. This seems an obvious place to use a SIMD gather instruction. To allow for further experimentation, move the gather of the message words to the SIMD code.
1 parent 907e2a6 commit d1c1a98

File tree

2 files changed

+24
-15
lines changed

2 files changed

+24
-15
lines changed

src/blake2.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ macro_rules! blake2_impl {
4848
use $crate::as_bytes::AsBytes;
4949
use $crate::bytes::{MutableByteVector, copy_memory};
5050
use $crate::constant_time_eq::constant_time_eq;
51-
use $crate::simd::{Vector, $vec};
51+
use $crate::simd::{Vector4, $vec};
5252

5353
/// Container for a hash result.
5454
///
@@ -245,16 +245,16 @@ macro_rules! blake2_impl {
245245

246246
#[inline(always)]
247247
fn round(v: &mut [$vec; 4], m: &[$word; 16], s: &[usize; 16]) {
248-
$state::quarter_round(v, $R1, $R2, $vec(
249-
m[s[ 0]], m[s[ 2]], m[s[ 4]], m[s[ 6]]));
250-
$state::quarter_round(v, $R3, $R4, $vec(
251-
m[s[ 1]], m[s[ 3]], m[s[ 5]], m[s[ 7]]));
248+
$state::quarter_round(v, $R1, $R2, $vec::gather(m,
249+
s[ 0], s[ 2], s[ 4], s[ 6]));
250+
$state::quarter_round(v, $R3, $R4, $vec::gather(m,
251+
s[ 1], s[ 3], s[ 5], s[ 7]));
252252

253253
$state::shuffle(v);
254-
$state::quarter_round(v, $R1, $R2, $vec(
255-
m[s[ 8]], m[s[10]], m[s[12]], m[s[14]]));
256-
$state::quarter_round(v, $R3, $R4, $vec(
257-
m[s[ 9]], m[s[11]], m[s[13]], m[s[15]]));
254+
$state::quarter_round(v, $R1, $R2, $vec::gather(m,
255+
s[ 8], s[10], s[12], s[14]));
256+
$state::quarter_round(v, $R3, $R4, $vec::gather(m,
257+
s[ 9], s[11], s[13], s[15]));
258258
$state::unshuffle(v);
259259
}
260260

src/simd.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ macro_rules! impl_bitxor {
6363
impl_bitxor!(u32x4);
6464
impl_bitxor!(u64x4);
6565

66-
pub trait Vector: Copy {
66+
pub trait Vector4<T>: Copy {
67+
fn gather(src: &[T], i0: usize, i1: usize, i2: usize, i3: usize) -> Self;
68+
6769
fn from_le(self) -> Self;
6870
fn to_le(self) -> Self;
6971

@@ -82,8 +84,14 @@ pub trait Vector: Copy {
8284
#[inline(always)] fn shuffle_right_3(self) -> Self { self.shuffle_left_1() }
8385
}
8486

85-
macro_rules! impl_vector_common {
87+
macro_rules! impl_vector4_common {
8688
($vec:ident, $word:ident, $bits:expr) => {
89+
#[inline(always)]
90+
fn gather(src: &[$word], i0: usize, i1: usize,
91+
i2: usize, i3: usize) -> Self {
92+
$vec(src[i0], src[i1], src[i2], src[i3])
93+
}
94+
8795
#[cfg(target_endian = "little")]
8896
#[inline(always)]
8997
fn from_le(self) -> Self { self }
@@ -173,8 +181,8 @@ fn u32x4_rotate_right_16(vec: u32x4) -> u32x4 {
173181
}
174182
}
175183

176-
impl Vector for u32x4 {
177-
impl_vector_common!(u32x4, u32, 32);
184+
impl Vector4<u32> for u32x4 {
185+
impl_vector4_common!(u32x4, u32, 32);
178186

179187
#[cfg(feature = "simd_opt")]
180188
#[cfg(any(target_arch = "arm", target_arch = "aarch64",
@@ -244,8 +252,9 @@ fn u64x4_rotate_right_u8(vec: u64x4, n: u8) -> u64x4 {
244252
u64x4(tmp0.0, tmp0.1, tmp1.0, tmp1.1)
245253
}
246254

247-
impl Vector for u64x4 {
248-
impl_vector_common!(u64x4, u64, 64);
255+
impl Vector4<u64> for u64x4 {
256+
impl_vector4_common!(u64x4, u64, 64);
257+
249258

250259
#[cfg(feature = "simd_opt")]
251260
#[cfg(any(all(target_arch = "arm", not(feature = "simd_asm")),

0 commit comments

Comments
 (0)