Skip to content

Commit 9e5a416

Browse files
committed
Port code to "SIMD groundwork part 1"
Port the SIMD code to rust-lang/rust#27169
1 parent 8d895f7 commit 9e5a416

File tree

4 files changed

+185
-45
lines changed

4 files changed

+185
-45
lines changed

Cargo.toml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,12 @@ license = "MIT"
1010

1111
[features]
1212
bench = []
13-
simd = ["simdty"]
13+
simd = []
1414
simd_opt = ["simd"]
1515
simd_asm = ["simd_opt"]
1616

1717
[dependencies]
1818
constant_time_eq = "0.1.0"
1919

20-
[dependencies.simdty]
21-
version = "0.0.3"
22-
optional = true
23-
2420
[dev-dependencies]
2521
rustc-serialize = "0.3.15"

src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,17 @@
2727
//! A pure Rust implementation of BLAKE2 based on the draft RFC.
2828
2929
#![cfg_attr(all(feature = "bench", test), feature(test))]
30-
#![cfg_attr(feature = "simd", feature(link_llvm_intrinsics, simd, simd_ffi))]
30+
#![cfg_attr(feature = "simd", feature(platform_intrinsics, simd_basics))]
31+
#![cfg_attr(feature = "simd_opt", feature(cfg_target_feature))]
3132
#![cfg_attr(feature = "simd_asm", feature(asm))]
3233

3334
#[cfg(all(feature = "bench", test))] extern crate test;
34-
#[cfg(feature = "simd")] extern crate simdty;
3535

3636
extern crate constant_time_eq;
3737

3838
mod as_bytes;
3939
mod bytes;
40+
mod simdty;
4041
mod simd;
4142

4243
#[macro_use]

src/simd.rs

Lines changed: 105 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -27,28 +27,64 @@
2727
#[cfg(feature = "simd_opt")]
2828
use std::mem::transmute;
2929

30-
#[cfg(feature = "simd")]
3130
pub use simdty::{u32x4, u64x4};
3231

33-
#[cfg(not(feature = "simd"))]
34-
#[derive(Clone, Copy, Debug)]
35-
#[repr(C)]
36-
pub struct u32x4(pub u32, pub u32, pub u32, pub u32);
32+
#[cfg(feature = "simd")]
33+
extern "platform-intrinsic" {
34+
fn simd_add<T>(x: T, y: T) -> T;
35+
fn simd_shl<T>(x: T, y: T) -> T;
36+
fn simd_shr<T>(x: T, y: T) -> T;
37+
fn simd_xor<T>(x: T, y: T) -> T;
38+
}
3739

38-
#[cfg(not(feature = "simd"))]
39-
#[derive(Clone, Copy, Debug)]
40-
#[repr(C)]
41-
pub struct u64x4(pub u64, pub u64, pub u64, pub u64);
40+
#[cfg(feature = "simd_opt")]
41+
extern "platform-intrinsic" {
42+
fn simd_shuffle8<T, Elem>(v: T, w: T,
43+
i0: u32, i1: u32, i2: u32, i3: u32,
44+
i4: u32, i5: u32, i6: u32, i7: u32) -> T;
4245

43-
#[cfg(not(feature = "simd"))]
44-
use std::ops::BitXor;
46+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
47+
fn simd_shuffle16<T, Elem>(v: T, w: T,
48+
i0: u32, i1: u32, i2: u32, i3: u32,
49+
i4: u32, i5: u32, i6: u32, i7: u32,
50+
i8: u32, i9: u32, i10: u32, i11: u32,
51+
i12: u32, i13: u32, i14: u32, i15: u32,
52+
) -> T;
53+
}
4554

46-
macro_rules! impl_bitxor {
55+
use std::ops::{Add, BitXor, Shl, Shr};
56+
57+
macro_rules! impl_ops {
4758
($vec:ident) => {
48-
#[cfg(not(feature = "simd"))]
59+
impl Add for $vec {
60+
type Output = Self;
61+
62+
#[cfg(feature = "simd")]
63+
#[inline(always)]
64+
fn add(self, rhs: Self) -> Self::Output {
65+
unsafe { simd_add(self, rhs) }
66+
}
67+
68+
#[cfg(not(feature = "simd"))]
69+
#[inline(always)]
70+
fn add(self, rhs: Self) -> Self::Output {
71+
$vec(self.0.wrapping_add(rhs.0),
72+
self.1.wrapping_add(rhs.1),
73+
self.2.wrapping_add(rhs.2),
74+
self.3.wrapping_add(rhs.3))
75+
}
76+
}
77+
4978
impl BitXor for $vec {
5079
type Output = Self;
5180

81+
#[cfg(feature = "simd")]
82+
#[inline(always)]
83+
fn bitxor(self, rhs: Self) -> Self::Output {
84+
unsafe { simd_xor(self, rhs) }
85+
}
86+
87+
#[cfg(not(feature = "simd"))]
5288
#[inline(always)]
5389
fn bitxor(self, rhs: Self) -> Self::Output {
5490
$vec(self.0 ^ rhs.0,
@@ -57,11 +93,49 @@ macro_rules! impl_bitxor {
5793
self.3 ^ rhs.3)
5894
}
5995
}
96+
97+
impl Shl<$vec> for $vec {
98+
type Output = Self;
99+
100+
#[cfg(feature = "simd")]
101+
#[inline(always)]
102+
fn shl(self, rhs: Self) -> Self::Output {
103+
unsafe { simd_shl(self, rhs) }
104+
}
105+
106+
#[cfg(not(feature = "simd"))]
107+
#[inline(always)]
108+
fn shl(self, rhs: Self) -> Self::Output {
109+
$vec(self.0 << rhs.0,
110+
self.1 << rhs.1,
111+
self.2 << rhs.2,
112+
self.3 << rhs.3)
113+
}
114+
}
115+
116+
impl Shr<$vec> for $vec {
117+
type Output = Self;
118+
119+
#[cfg(feature = "simd")]
120+
#[inline(always)]
121+
fn shr(self, rhs: Self) -> Self::Output {
122+
unsafe { simd_shr(self, rhs) }
123+
}
124+
125+
#[cfg(not(feature = "simd"))]
126+
#[inline(always)]
127+
fn shr(self, rhs: Self) -> Self::Output {
128+
$vec(self.0 >> rhs.0,
129+
self.1 >> rhs.1,
130+
self.2 >> rhs.2,
131+
self.3 >> rhs.3)
132+
}
133+
}
60134
}
61135
}
62136

63-
impl_bitxor!(u32x4);
64-
impl_bitxor!(u64x4);
137+
impl_ops!(u32x4);
138+
impl_ops!(u64x4);
65139

66140
pub trait Vector4<T>: Copy {
67141
fn gather(src: &[T], i0: usize, i1: usize, i2: usize, i3: usize) -> Self;
@@ -118,26 +192,16 @@ macro_rules! impl_vector4_common {
118192
self.3.to_le())
119193
}
120194

121-
#[cfg(feature = "simd")]
122195
#[inline(always)]
123196
fn wrapping_add(self, rhs: Self) -> Self { self + rhs }
124197

125-
#[cfg(not(feature = "simd"))]
126-
#[inline(always)]
127-
fn wrapping_add(self, rhs: Self) -> Self {
128-
$vec(self.0.wrapping_add(rhs.0),
129-
self.1.wrapping_add(rhs.1),
130-
self.2.wrapping_add(rhs.2),
131-
self.3.wrapping_add(rhs.3))
132-
}
133-
134198
#[cfg(feature = "simd")]
135199
#[inline(always)]
136200
fn rotate_right_any(self, n: u32) -> Self {
137201
let r = n as $word;
138202
let l = $bits - r;
139203

140-
(self >> $vec(r, r, r, r)) | (self << $vec(l, l, l, l))
204+
(self >> $vec(r, r, r, r)) ^ (self << $vec(l, l, l, l))
141205
}
142206

143207
#[cfg(not(feature = "simd"))]
@@ -174,10 +238,11 @@ fn u32x4_rotate_right_16(vec: u32x4) -> u32x4 {
174238
use simdty::u16x8;
175239
unsafe {
176240
let tmp: u16x8 = transmute(vec);
177-
transmute(u16x8(tmp.1, tmp.0,
178-
tmp.3, tmp.2,
179-
tmp.5, tmp.4,
180-
tmp.7, tmp.6))
241+
transmute(simd_shuffle8::<u16x8, u16>(tmp, tmp,
242+
1, 0,
243+
3, 2,
244+
5, 4,
245+
7, 6))
181246
}
182247
}
183248

@@ -205,10 +270,11 @@ fn u64x4_rotate_right_32(vec: u64x4) -> u64x4 {
205270
use simdty::u32x8;
206271
unsafe {
207272
let tmp: u32x8 = transmute(vec);
208-
transmute(u32x8(tmp.1, tmp.0,
209-
tmp.3, tmp.2,
210-
tmp.5, tmp.4,
211-
tmp.7, tmp.6))
273+
transmute(simd_shuffle8::<u32x8, u32>(tmp, tmp,
274+
1, 0,
275+
3, 2,
276+
5, 4,
277+
7, 6))
212278
}
213279
}
214280

@@ -219,10 +285,11 @@ fn u64x4_rotate_right_16(vec: u64x4) -> u64x4 {
219285
use simdty::u16x16;
220286
unsafe {
221287
let tmp: u16x16 = transmute(vec);
222-
transmute(u16x16(tmp.1, tmp.2, tmp.3, tmp.0,
223-
tmp.5, tmp.6, tmp.7, tmp.4,
224-
tmp.9, tmp.10, tmp.11, tmp.8,
225-
tmp.13, tmp.14, tmp.15, tmp.12))
288+
transmute(simd_shuffle16::<u16x16, u16>(tmp, tmp,
289+
1, 2, 3, 0,
290+
5, 6, 7, 4,
291+
9, 10, 11, 8,
292+
13, 14, 15, 12))
226293
}
227294
}
228295

src/simdty.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// Copyright (c) 2015 Cesar Eduardo Barros
2+
//
3+
// Permission is hereby granted, free of charge, to any
4+
// person obtaining a copy of this software and associated
5+
// documentation files (the "Software"), to deal in the
6+
// Software without restriction, including without
7+
// limitation the rights to use, copy, modify, merge,
8+
// publish, distribute, sublicense, and/or sell copies of
9+
// the Software, and to permit persons to whom the Software
10+
// is furnished to do so, subject to the following
11+
// conditions:
12+
//
13+
// The above copyright notice and this permission notice
14+
// shall be included in all copies or substantial portions
15+
// of the Software.
16+
//
17+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18+
// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19+
// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20+
// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21+
// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22+
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23+
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24+
// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25+
// DEALINGS IN THE SOFTWARE.
26+
27+
#[cfg(feature = "simd")]
28+
macro_rules! decl_vec {
29+
($($decl:item)*) => {
30+
$(
31+
#[allow(non_camel_case_types)]
32+
#[derive(Clone, Copy, Debug)]
33+
#[repr(simd)]
34+
$decl
35+
)*
36+
}
37+
}
38+
39+
#[cfg(not(feature = "simd"))]
40+
macro_rules! decl_vec {
41+
($($decl:item)*) => {
42+
$(
43+
#[derive(Clone, Copy, Debug)]
44+
#[repr(C)]
45+
$decl
46+
)*
47+
}
48+
}
49+
50+
decl_vec!{
51+
pub struct u32x4(pub u32, pub u32, pub u32, pub u32);
52+
pub struct u64x4(pub u64, pub u64, pub u64, pub u64);
53+
}
54+
55+
#[cfg(feature = "simd_opt")]
56+
decl_vec!{
57+
pub struct u16x8(pub u16, pub u16, pub u16, pub u16,
58+
pub u16, pub u16, pub u16, pub u16);
59+
pub struct u32x8(pub u32, pub u32, pub u32, pub u32,
60+
pub u32, pub u32, pub u32, pub u32);
61+
}
62+
63+
#[cfg(feature = "simd_opt")]
64+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
65+
decl_vec!{
66+
pub struct u16x16(pub u16, pub u16, pub u16, pub u16,
67+
pub u16, pub u16, pub u16, pub u16,
68+
pub u16, pub u16, pub u16, pub u16,
69+
pub u16, pub u16, pub u16, pub u16);
70+
}
71+
72+
#[cfg(feature = "simd_asm")]
73+
#[cfg(target_arch = "arm")]
74+
decl_vec!{
75+
pub struct u64x2(pub u64, pub u64);
76+
}

0 commit comments

Comments
 (0)