Skip to content

Commit 18c436d

Browse files
committed
Enable VPCLMULQDQ and AVX512 on Rust 1.89+
AVX512 support has landed in 1.89.0, so we can deprecate the feature flag and instead rely on the Rust version for AVX512 support, including VPCLMULQDQ. https://releases.rs/docs/1.89.0/ rust-lang/rust#138940
1 parent 1b73252 commit 18c436d

File tree

8 files changed

+156
-106
lines changed

8 files changed

+156
-106
lines changed

Cargo.lock

Lines changed: 8 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ digest = { version = "0.10", features = ["alloc"] }
2424
rand = "0.9"
2525
libc = "0.2.171"
2626
regex = "1.11.1"
27+
rustversion = "1.0"
2728

2829
[dev-dependencies]
2930
criterion = "0.5"
@@ -44,13 +45,8 @@ harness = false
4445
[features]
4546
alloc = []
4647

47-
# enable VPCLMULQDQ support in Rust for x86_64 using nightly toolchain builds
48-
vpclmulqdq = []
49-
50-
# enable AVX512 support in Rust for x86_64 using nightly toolchain builds
51-
avx512 = []
52-
53-
# these features aren't in use, are deprecated, and will be removed in the next MAJOR version
48+
# the features below aren't in use, are deprecated, and will be removed in the next MAJOR version
49+
vpclmulqdq = [] # deprecated, VPCLMULQDQ support landed in 1.89.0
5450
optimize_crc32_auto = [] # deprecated
5551
optimize_crc32_neon_eor3_v9s3x2e_s3 = [] # deprecated
5652
optimize_crc32_neon_v12e_v1 = [] # deprecated

src/algorithm.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,6 @@ use crate::{crc32, crc64};
2727
any(target_arch = "x86", target_arch = "x86_64"),
2828
target_feature(enable = "sse2,sse4.1,pclmulqdq")
2929
)]
30-
#[cfg_attr(
31-
all(target_arch = "x86_64", feature = "vpclmulqdq"),
32-
target_feature(enable = "avx2,vpclmulqdq,avx512f,avx512vl")
33-
)]
3430
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon,aes"))]
3531
pub unsafe fn update<T: ArchOps, W: EnhancedCrcWidth>(
3632
state: W::Value,
@@ -120,10 +116,6 @@ where
120116
any(target_arch = "x86", target_arch = "x86_64"),
121117
target_feature(enable = "sse2,sse4.1,pclmulqdq")
122118
)]
123-
#[cfg_attr(
124-
all(target_arch = "x86_64", feature = "vpclmulqdq"),
125-
target_feature(enable = "avx2,vpclmulqdq,avx512f,avx512vl")
126-
)]
127119
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon,aes"))]
128120
unsafe fn process_large_aligned<T: ArchOps, W: EnhancedCrcWidth>(
129121
bytes: &[u8],

src/arch/mod.rs

Lines changed: 95 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
//! It dispatches to the appropriate architecture-specific implementation
66
//! based on the target architecture.
77
8+
#[cfg(target_arch = "aarch64")]
9+
use std::arch::is_aarch64_feature_detected;
10+
811
#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
912
use crate::algorithm;
1013

@@ -14,13 +17,14 @@ use crate::structs::CrcParams;
1417
use crate::structs::{Width32, Width64};
1518

1619
#[cfg(target_arch = "aarch64")]
17-
use crate::arch::aarch64::AArch64Ops;
20+
use aarch64::AArch64Ops;
1821

1922
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
20-
use crate::arch::x86::X86Ops;
23+
use x86::X86Ops;
2124

22-
#[cfg(all(target_arch = "x86_64", feature = "vpclmulqdq"))]
23-
use crate::arch::vpclmulqdq::Vpclmulqdq512Ops;
25+
#[cfg(target_arch = "x86_64")]
26+
#[rustversion::since(1.89)]
27+
use vpclmulqdq::Vpclmulqdq512Ops;
2428

2529
mod aarch64;
2630
mod software;
@@ -33,84 +37,118 @@ mod x86;
3337
/// # Safety
3438
/// May use native CPU features
3539
#[inline]
36-
#[cfg_attr(
37-
any(target_arch = "x86", target_arch = "x86_64"),
38-
target_feature(enable = "sse2,sse4.1,pclmulqdq")
39-
)]
40-
#[cfg_attr(
41-
all(target_arch = "x86_64", feature = "vpclmulqdq"),
42-
target_feature(enable = "avx2,vpclmulqdq,avx512f,avx512vl")
43-
)]
44-
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon,aes"))]
40+
#[cfg(target_arch = "aarch64")]
41+
#[target_feature(enable = "neon,aes")]
4542
pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
46-
#[cfg(target_arch = "aarch64")]
47-
{
48-
let ops = AArch64Ops;
43+
let ops = AArch64Ops;
4944

50-
match params.width {
51-
64 => algorithm::update::<AArch64Ops, Width64>(state, bytes, params, &ops),
52-
32 => {
53-
algorithm::update::<AArch64Ops, Width32>(state as u32, bytes, params, &ops) as u64
54-
}
55-
_ => panic!("Unsupported CRC width: {}", params.width),
56-
}
45+
match params.width {
46+
64 => algorithm::update::<AArch64Ops, Width64>(state, bytes, params, &ops),
47+
32 => algorithm::update::<AArch64Ops, Width32>(state as u32, bytes, params, &ops) as u64,
48+
_ => panic!("Unsupported CRC width: {}", params.width),
5749
}
50+
}
5851

59-
#[cfg(all(target_arch = "x86_64", feature = "vpclmulqdq"))]
60-
{
61-
use std::arch::is_x86_feature_detected;
62-
63-
if bytes.len() >= 256 && is_x86_feature_detected!("vpclmulqdq") {
64-
let ops = Vpclmulqdq512Ops::new();
65-
66-
return match params.width {
67-
64 => algorithm::update::<Vpclmulqdq512Ops, Width64>(state, bytes, params, &ops),
68-
32 => algorithm::update::<Vpclmulqdq512Ops, Width32>(
69-
state as u32,
70-
bytes,
71-
params,
72-
&ops,
73-
) as u64,
74-
_ => panic!("Unsupported CRC width: {}", params.width),
75-
};
76-
}
52+
#[rustversion::before(1.89)]
53+
#[inline]
54+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
55+
#[target_feature(enable = "sse2,sse4.1,pclmulqdq")]
56+
pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
57+
let ops = X86Ops;
58+
59+
match params.width {
60+
64 => algorithm::update::<X86Ops, Width64>(state, bytes, params, &ops),
61+
32 => algorithm::update::<X86Ops, Width32>(state as u32, bytes, params, &ops) as u64,
62+
_ => panic!("Unsupported CRC width: {}", params.width),
7763
}
64+
}
7865

79-
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
80-
{
81-
let ops = X86Ops;
66+
#[rustversion::since(1.89)]
67+
#[inline]
68+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
69+
#[target_feature(enable = "sse2,sse4.1,pclmulqdq")]
70+
pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
71+
use std::arch::is_x86_feature_detected;
72+
73+
if bytes.len() >= 256 && is_x86_feature_detected!("vpclmulqdq") {
74+
let ops = Vpclmulqdq512Ops::new();
8275

83-
match params.width {
84-
64 => algorithm::update::<X86Ops, Width64>(state, bytes, params, &ops),
85-
32 => algorithm::update::<X86Ops, Width32>(state as u32, bytes, params, &ops) as u64,
76+
return match params.width {
77+
64 => algorithm::update::<Vpclmulqdq512Ops, Width64>(state, bytes, params, &ops),
78+
32 => algorithm::update::<Vpclmulqdq512Ops, Width32>(state as u32, bytes, params, &ops)
79+
as u64,
8680
_ => panic!("Unsupported CRC width: {}", params.width),
81+
};
82+
}
83+
84+
// fallback to the standard x86 SSE implementation
85+
86+
let ops = X86Ops;
87+
88+
match params.width {
89+
64 => algorithm::update::<X86Ops, Width64>(state, bytes, params, &ops),
90+
32 => algorithm::update::<X86Ops, Width32>(state as u32, bytes, params, &ops) as u64,
91+
_ => panic!("Unsupported CRC width: {}", params.width),
92+
}
93+
}
94+
95+
#[inline]
96+
#[cfg(all(
97+
not(target_arch = "x86"),
98+
not(target_arch = "x86_64"),
99+
not(target_arch = "aarch64")
100+
))]
101+
pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
102+
software::update(state, bytes, params)
103+
}
104+
105+
#[rustversion::before(1.89)]
106+
pub fn get_target() -> String {
107+
#[cfg(target_arch = "aarch64")]
108+
{
109+
if is_aarch64_feature_detected!("sha3") {
110+
return "aarch64-neon-eor3-pclmulqdq".to_string();
87111
}
112+
113+
"aarch64-neon-pclmulqdq".to_string()
88114
}
89115

90-
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
91-
return software::update(state, bytes, params);
116+
#[allow(unreachable_code)]
117+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
118+
return "x86-sse-pclmulqdq".to_string();
119+
120+
#[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")))]
121+
return "software-fallback-tables".to_string();
92122
}
93123

124+
#[rustversion::since(1.89)]
94125
pub fn get_target() -> String {
95-
#[cfg(all(target_arch = "aarch64", target_feature = "sha3"))]
96-
return "internal-aarch64-neon-eor3".to_string();
126+
#[cfg(target_arch = "aarch64")]
127+
{
128+
if is_aarch64_feature_detected!("sha3") {
129+
return "aarch64-neon-eor3-pclmulqdq".to_string();
130+
}
97131

98-
#[cfg(all(target_arch = "aarch64", not(target_feature = "sha3")))]
99-
return "internal-aarch64-neon".to_string();
132+
"aarch64-neon-pclmulqdq".to_string()
133+
}
100134

101-
#[cfg(all(target_arch = "x86_64", feature = "vpclmulqdq"))]
135+
#[cfg(target_arch = "x86_64")]
102136
{
103137
if is_x86_feature_detected!("vpclmulqdq") {
104-
return "internal-x86_64-avx512-vpclmulqdq".to_string();
138+
return "x86_64-avx512-vpclmulqdq".to_string();
139+
}
140+
141+
if is_x86_feature_detected!("avx2") {
142+
return "x86_64-avx2-pclmulqdq".to_string();
105143
}
106144
}
107145

108146
#[allow(unreachable_code)]
109147
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
110-
return "internal-x86-sse-pclmulqdq".to_string();
148+
return "x86-sse-pclmulqdq".to_string();
111149

112150
#[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")))]
113-
return "software-fallback".to_string();
151+
return "software-fallback-tables".to_string();
114152
}
115153

116154
#[cfg(test)]

0 commit comments

Comments
 (0)