Skip to content

Commit 20adbee

Browse files
authored
Auto merge of #452 - berkus:fix/simd-arm-shuffle, r=jdm
Fix arm simd shuffling arguments According to packed_simd docs, the syntax for simd_shuffle4() is as follows: "The indices must be in range [0, M * N) where M is the number of input vectors (1 or 2)and N is the number of lanes of the input vectors. The indices i in range [0, N) refer to the i-th element of vec0, while the indices in range [N, 2*N) refer to the i - N-th element of vec1." I did not find implementation or documentation for simd_shuffle4() but I believe packed_simd implements exactly the same interface. Plus, implementing this change has fixed font-kit glyph output on an Apple M1 mac (64-bit arm). Closes #450
2 parents f1f9df5 + bd6d015 commit 20adbee

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

simd/src/arm/mod.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ impl F32x2 {
129129

130130
#[inline]
131131
pub fn concat_xy_xy(self, other: F32x2) -> F32x4 {
132-
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 0, 1])) }
132+
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 2, 3])) }
133133
}
134134
}
135135

@@ -314,17 +314,17 @@ impl F32x4 {
314314

315315
#[inline]
316316
pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
317-
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 0, 1])) }
317+
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 2, 3])) }
318318
}
319319

320320
#[inline]
321321
pub fn concat_xy_zw(self, other: F32x4) -> F32x4 {
322-
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 2, 3])) }
322+
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 6, 7])) }
323323
}
324324

325325
#[inline]
326326
pub fn concat_zw_zw(self, other: F32x4) -> F32x4 {
327-
unsafe { F32x4(simd_shuffle4(self.0, other.0, [2, 3, 2, 3])) }
327+
unsafe { F32x4(simd_shuffle4(self.0, other.0, [2, 3, 6, 7])) }
328328
}
329329

330330
// Conversions
@@ -461,7 +461,7 @@ impl I32x2 {
461461

462462
#[inline]
463463
pub fn concat_xy_xy(self, other: I32x2) -> I32x4 {
464-
unsafe { I32x4(simd_shuffle4(self.0, other.0, [0, 1, 0, 1])) }
464+
unsafe { I32x4(simd_shuffle4(self.0, other.0, [0, 1, 2, 3])) }
465465
}
466466

467467
// Conversions
@@ -471,7 +471,7 @@ impl I32x2 {
471471
pub fn to_f32x2(self) -> F32x2 {
472472
unsafe { F32x2(simd_cast(self.0)) }
473473
}
474-
474+
475475
#[inline]
476476
pub fn to_i32x4(self) -> I32x4 {
477477
self.concat_xy_xy(I32x2::default())

0 commit comments

Comments
 (0)