Skip to content

Commit 3fa561e

Browse files
authored
sha2: Add aarch64 backends (RustCrypto#490)
1 parent a16cb4c commit 3fa561e

File tree

4 files changed

+386
-3
lines changed

4 files changed

+386
-3
lines changed

.github/workflows/sha2.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
with:
2424
# Crate supports MSRV 1.41 without `oid` feature. We test true MSRV
2525
# in the `test-msrv` job.
26-
msrv: 1.57.0
26+
msrv: 1.59.0
2727

2828
# Builds for no_std platforms
2929
build:
@@ -32,7 +32,7 @@ jobs:
3232
strategy:
3333
matrix:
3434
rust:
35-
- 1.57
35+
- 1.59
3636
- stable
3737
target:
3838
- thumbv7em-none-eabi

sha2/src/sha256/aarch64.rs

+145-1
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,159 @@
11
//! SHA-256 `aarch64` backend.
22
3+
// Implementation adapted from mbedtls.
4+
35
// TODO: stdarch intrinsics: RustCrypto/hashes#257
46

7+
use core::arch::{aarch64::*, asm};
8+
9+
use crate::consts::K32;
10+
511
cpufeatures::new!(sha2_hwcap, "sha2");
612

713
pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
814
// TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
915
// after stabilization
1016
if sha2_hwcap::get() {
11-
sha2_asm::compress256(state, blocks);
17+
unsafe { sha256_compress(state, blocks) }
1218
} else {
1319
super::soft::compress(state, blocks);
1420
}
1521
}
22+
23+
#[target_feature(enable = "sha2")]
24+
unsafe fn sha256_compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
25+
// SAFETY: Requires the sha2 feature.
26+
27+
// Load state into vectors.
28+
let mut abcd = vld1q_u32(state[0..4].as_ptr());
29+
let mut efgh = vld1q_u32(state[4..8].as_ptr());
30+
31+
// Iterate through the message blocks.
32+
for block in blocks {
33+
// Keep original state values.
34+
let abcd_orig = abcd;
35+
let efgh_orig = efgh;
36+
37+
// Load the message block into vectors, assuming little endianness.
38+
let mut s0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[0..16].as_ptr())));
39+
let mut s1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[16..32].as_ptr())));
40+
let mut s2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[32..48].as_ptr())));
41+
let mut s3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[48..64].as_ptr())));
42+
43+
// Rounds 0 to 3
44+
let mut tmp = vaddq_u32(s0, vld1q_u32(&K32[0]));
45+
let mut abcd_prev = abcd;
46+
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
47+
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
48+
49+
// Rounds 4 to 7
50+
tmp = vaddq_u32(s1, vld1q_u32(&K32[4]));
51+
abcd_prev = abcd;
52+
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
53+
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
54+
55+
// Rounds 8 to 11
56+
tmp = vaddq_u32(s2, vld1q_u32(&K32[8]));
57+
abcd_prev = abcd;
58+
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
59+
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
60+
61+
// Rounds 12 to 15
62+
tmp = vaddq_u32(s3, vld1q_u32(&K32[12]));
63+
abcd_prev = abcd;
64+
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
65+
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
66+
67+
for t in (16..64).step_by(16) {
68+
// Rounds t to t + 3
69+
s0 = vsha256su1q_u32(vsha256su0q_u32(s0, s1), s2, s3);
70+
tmp = vaddq_u32(s0, vld1q_u32(&K32[t]));
71+
abcd_prev = abcd;
72+
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
73+
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
74+
75+
// Rounds t + 4 to t + 7
76+
s1 = vsha256su1q_u32(vsha256su0q_u32(s1, s2), s3, s0);
77+
tmp = vaddq_u32(s1, vld1q_u32(&K32[t + 4]));
78+
abcd_prev = abcd;
79+
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
80+
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
81+
82+
// Rounds t + 8 to t + 11
83+
s2 = vsha256su1q_u32(vsha256su0q_u32(s2, s3), s0, s1);
84+
tmp = vaddq_u32(s2, vld1q_u32(&K32[t + 8]));
85+
abcd_prev = abcd;
86+
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
87+
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
88+
89+
// Rounds t + 12 to t + 15
90+
s3 = vsha256su1q_u32(vsha256su0q_u32(s3, s0), s1, s2);
91+
tmp = vaddq_u32(s3, vld1q_u32(&K32[t + 12]));
92+
abcd_prev = abcd;
93+
abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
94+
efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
95+
}
96+
97+
// Add the block-specific state to the original state.
98+
abcd = vaddq_u32(abcd, abcd_orig);
99+
efgh = vaddq_u32(efgh, efgh_orig);
100+
}
101+
102+
// Store vectors into state.
103+
vst1q_u32(state[0..4].as_mut_ptr(), abcd);
104+
vst1q_u32(state[4..8].as_mut_ptr(), efgh);
105+
}
106+
107+
// TODO remove these polyfills once SHA2 intrinsics land
108+
109+
#[inline(always)]
110+
unsafe fn vsha256hq_u32(
111+
mut hash_efgh: uint32x4_t,
112+
hash_abcd: uint32x4_t,
113+
wk: uint32x4_t,
114+
) -> uint32x4_t {
115+
asm!(
116+
"SHA256H {:q}, {:q}, {:v}.4S",
117+
inout(vreg) hash_efgh, in(vreg) hash_abcd, in(vreg) wk,
118+
options(pure, nomem, nostack, preserves_flags)
119+
);
120+
hash_efgh
121+
}
122+
123+
#[inline(always)]
124+
unsafe fn vsha256h2q_u32(
125+
mut hash_efgh: uint32x4_t,
126+
hash_abcd: uint32x4_t,
127+
wk: uint32x4_t,
128+
) -> uint32x4_t {
129+
asm!(
130+
"SHA256H2 {:q}, {:q}, {:v}.4S",
131+
inout(vreg) hash_efgh, in(vreg) hash_abcd, in(vreg) wk,
132+
options(pure, nomem, nostack, preserves_flags)
133+
);
134+
hash_efgh
135+
}
136+
137+
#[inline(always)]
138+
unsafe fn vsha256su0q_u32(mut w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t {
139+
asm!(
140+
"SHA256SU0 {:v}.4S, {:v}.4S",
141+
inout(vreg) w0_3, in(vreg) w4_7,
142+
options(pure, nomem, nostack, preserves_flags)
143+
);
144+
w0_3
145+
}
146+
147+
#[inline(always)]
148+
unsafe fn vsha256su1q_u32(
149+
mut tw0_3: uint32x4_t,
150+
w8_11: uint32x4_t,
151+
w12_15: uint32x4_t,
152+
) -> uint32x4_t {
153+
asm!(
154+
"SHA256SU1 {:v}.4S, {:v}.4S, {:v}.4S",
155+
inout(vreg) tw0_3, in(vreg) w8_11, in(vreg) w12_15,
156+
options(pure, nomem, nostack, preserves_flags)
157+
);
158+
tw0_3
159+
}

sha2/src/sha512.rs

+4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ cfg_if::cfg_if! {
1515
}
1616
mod x86;
1717
use x86::compress;
18+
} else if #[cfg(all(feature = "asm", target_arch = "aarch64"))] {
19+
mod soft;
20+
mod aarch64;
21+
use aarch64::compress;
1822
} else {
1923
mod soft;
2024
use soft::compress;

0 commit comments

Comments
 (0)