Skip to content

Commit e41d1cb

Browse files
authored
Merge pull request #592 from heiher/loong-lsx
Add 128-bit SIMD implementation for LoongArch
2 parents 25365fc + a35580f commit e41d1cb

File tree

3 files changed

+149
-0
lines changed

3 files changed

+149
-0
lines changed

src/control/group/lsx.rs

+137
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
use super::super::{BitMask, Tag};
2+
use core::mem;
3+
use core::num::NonZeroU16;
4+
5+
use core::arch::loongarch64::*;
6+
use mem::transmute;
7+
8+
pub(crate) type BitMaskWord = u16;
9+
pub(crate) type NonZeroBitMaskWord = NonZeroU16;
10+
pub(crate) const BITMASK_STRIDE: usize = 1;
11+
pub(crate) const BITMASK_MASK: BitMaskWord = 0xffff;
12+
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0;
13+
14+
/// Abstraction over a group of control tags which can be scanned in
15+
/// parallel.
16+
///
17+
/// This implementation uses a 128-bit LSX value.
18+
#[derive(Copy, Clone)]
19+
pub(crate) struct Group(v16i8);
20+
21+
// FIXME: https://github.com/rust-lang/rust-clippy/issues/3859
22+
#[allow(clippy::use_self)]
23+
impl Group {
24+
/// Number of bytes in the group.
25+
pub(crate) const WIDTH: usize = mem::size_of::<Self>();
26+
27+
/// Returns a full group of empty tags, suitable for use as the initial
28+
/// value for an empty hash table.
29+
///
30+
/// This is guaranteed to be aligned to the group size.
31+
#[inline]
32+
#[allow(clippy::items_after_statements)]
33+
pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] {
34+
#[repr(C)]
35+
struct AlignedTags {
36+
_align: [Group; 0],
37+
tags: [Tag; Group::WIDTH],
38+
}
39+
const ALIGNED_TAGS: AlignedTags = AlignedTags {
40+
_align: [],
41+
tags: [Tag::EMPTY; Group::WIDTH],
42+
};
43+
&ALIGNED_TAGS.tags
44+
}
45+
46+
/// Loads a group of tags starting at the given address.
47+
#[inline]
48+
#[allow(clippy::cast_ptr_alignment)] // unaligned load
49+
pub(crate) unsafe fn load(ptr: *const Tag) -> Self {
50+
Group(lsx_vld::<0>(ptr.cast()))
51+
}
52+
53+
/// Loads a group of tags starting at the given address, which must be
54+
/// aligned to `mem::align_of::<Group>()`.
55+
#[inline]
56+
#[allow(clippy::cast_ptr_alignment)]
57+
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
58+
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
59+
Group(lsx_vld::<0>(ptr.cast()))
60+
}
61+
62+
/// Stores the group of tags to the given address, which must be
63+
/// aligned to `mem::align_of::<Group>()`.
64+
#[inline]
65+
#[allow(clippy::cast_ptr_alignment)]
66+
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
67+
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
68+
lsx_vst::<0>(self.0, ptr.cast());
69+
}
70+
71+
/// Returns a `BitMask` indicating all tags in the group which have
72+
/// the given value.
73+
#[inline]
74+
pub(crate) fn match_tag(self, tag: Tag) -> BitMask {
75+
#[allow(clippy::missing_transmute_annotations)]
76+
unsafe {
77+
let cmp = lsx_vseq_b(self.0, lsx_vreplgr2vr_b(tag.0 as i32));
78+
BitMask(lsx_vpickve2gr_hu::<0>(transmute(lsx_vmskltz_b(cmp))) as u16)
79+
}
80+
}
81+
82+
/// Returns a `BitMask` indicating all tags in the group which are
83+
/// `EMPTY`.
84+
#[inline]
85+
pub(crate) fn match_empty(self) -> BitMask {
86+
#[allow(clippy::missing_transmute_annotations)]
87+
unsafe {
88+
let cmp = lsx_vseqi_b::<{ Tag::EMPTY.0 as i8 as i32 }>(self.0);
89+
BitMask(lsx_vpickve2gr_hu::<0>(transmute(lsx_vmskltz_b(cmp))) as u16)
90+
}
91+
}
92+
93+
/// Returns a `BitMask` indicating all tags in the group which are
94+
/// `EMPTY` or `DELETED`.
95+
#[inline]
96+
pub(crate) fn match_empty_or_deleted(self) -> BitMask {
97+
#[allow(clippy::missing_transmute_annotations)]
98+
unsafe {
99+
// A tag is EMPTY or DELETED iff the high bit is set
100+
BitMask(lsx_vpickve2gr_hu::<0>(transmute(lsx_vmskltz_b(self.0))) as u16)
101+
}
102+
}
103+
104+
/// Returns a `BitMask` indicating all tags in the group which are full.
105+
#[inline]
106+
pub(crate) fn match_full(&self) -> BitMask {
107+
#[allow(clippy::missing_transmute_annotations)]
108+
unsafe {
109+
// A tag is EMPTY or DELETED iff the high bit is set
110+
BitMask(lsx_vpickve2gr_hu::<0>(transmute(lsx_vmskgez_b(self.0))) as u16)
111+
}
112+
}
113+
114+
/// Performs the following transformation on all tags in the group:
115+
/// - `EMPTY => EMPTY`
116+
/// - `DELETED => EMPTY`
117+
/// - `FULL => DELETED`
118+
#[inline]
119+
pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self {
120+
// Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
121+
// and high_bit = 0 (FULL) to 1000_0000
122+
//
123+
// Here's this logic expanded to concrete values:
124+
// let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false)
125+
// 1111_1111 | 1000_0000 = 1111_1111
126+
// 0000_0000 | 1000_0000 = 1000_0000
127+
#[allow(clippy::missing_transmute_annotations)]
128+
unsafe {
129+
let zero = lsx_vreplgr2vr_b(0);
130+
let special = lsx_vslt_b(self.0, zero);
131+
Group(transmute(lsx_vor_v(
132+
transmute(special),
133+
transmute(lsx_vreplgr2vr_b(Tag::DELETED.0 as i32)),
134+
)))
135+
}
136+
}
137+
}

src/control/group/mod.rs

+8
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ cfg_if! {
2424
))] {
2525
mod neon;
2626
use neon as imp;
27+
} else if #[cfg(all(
28+
feature = "nightly",
29+
target_arch = "loongarch64",
30+
target_feature = "lsx",
31+
not(miri),
32+
))] {
33+
mod lsx;
34+
use lsx as imp;
2735
} else {
2836
mod generic;
2937
use generic as imp;

src/lib.rs

+4
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@
4242
feature = "nightly",
4343
allow(clippy::incompatible_msrv, internal_features)
4444
)]
45+
#![cfg_attr(
46+
all(feature = "nightly", target_arch = "loongarch64"),
47+
feature(stdarch_loongarch)
48+
)]
4549

4650
/// Default hasher for [`HashMap`] and [`HashSet`].
4751
#[cfg(feature = "default-hasher")]

0 commit comments

Comments
 (0)