Skip to content

Commit c25dcdf

Browse files
authored
[SYCL][ESIMD] Update ESIMD tests and add raw send support. (#2482)
* [SYCL][ESIMD] Update ESIMD tests and add raw send support.
1 parent 056f790 commit c25dcdf

File tree

8 files changed

+1439
-3
lines changed

8 files changed

+1439
-3
lines changed

Diff for: sycl/include/CL/sycl/INTEL/esimd/detail/esimd_memory_intrin.hpp

+304
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,151 @@ __esimd_media_block_store(unsigned modififer, TACC handle, unsigned plane,
227227
unsigned width, unsigned x, unsigned y,
228228
sycl::INTEL::gpu::vector_type_t<Ty, M * N> vals);
229229

230+
/// \brief esimd_get_value
231+
///
232+
/// @param sid the SYCL accessor.
233+
///
234+
/// Returns the binding table index value.
235+
///
236+
template <typename SurfIndAliasTy>
237+
SYCL_EXTERNAL uint32_t __esimd_get_value(SurfIndAliasTy sid);
238+
239+
/// \brief Raw sends load.
240+
///
241+
/// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
242+
///
243+
/// @param execSize the execution size, which must be a compile time constant.
244+
///
245+
/// @param pred the predicate to specify enabled channels.
246+
///
247+
/// @param numSrc0 the number of GRFs for source-0, which must be a compile time
248+
/// constant.
249+
///
250+
/// @param numSrc1 the number of GRFs for source-1, which must be a compile time
251+
/// constant.
252+
///
253+
/// @param numDst the number of GRFs for destination, which must be a compile
254+
/// time constant.
255+
///
256+
/// @param sfid the shared function ID, which must be a compile time constant.
257+
///
258+
/// @param exDesc the extended message descriptor.
259+
///
260+
/// @param msgDesc the message descriptor.
261+
///
262+
/// @param msgSrc0 the first source operand of send message.
263+
///
264+
/// @param msgSrc1 the second source operand of send message.
265+
///
266+
/// @param msgDst the destination operand of send message.
267+
///
268+
/// Returns a simd vector of type Ty1 and size N1.
269+
///
270+
template <typename Ty1, int N1, typename Ty2, int N2, typename Ty3, int N3,
271+
int N = 16>
272+
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<Ty1, N1>
273+
__esimd_raw_sends_load(uint8_t modifier, uint8_t execSize,
274+
sycl::INTEL::gpu::vector_type_t<uint16_t, N> pred,
275+
uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst,
276+
uint8_t sfid, uint32_t exDesc, uint32_t msgDesc,
277+
sycl::INTEL::gpu::vector_type_t<Ty2, N2> msgSrc0,
278+
sycl::INTEL::gpu::vector_type_t<Ty3, N3> msgSrc1,
279+
sycl::INTEL::gpu::vector_type_t<Ty1, N1> msgDst);
280+
281+
/// \brief Raw send load.
282+
///
283+
/// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
284+
///
285+
/// @param execSize the execution size, which must be a compile time constant.
286+
///
287+
/// @param pred the predicate to specify enabled channels.
288+
///
289+
/// @param numSrc0 the number of GRFs for source-0, which must be a compile time
290+
/// constant.
291+
///
292+
/// @param numDst the number of GRFs for destination, which must be a compile
293+
/// time constant.
294+
///
295+
/// @param sfid the shared function ID, which must be a compile time constant.
296+
///
297+
/// @param exDesc the extended message descriptor.
298+
///
299+
/// @param msgDesc the message descriptor.
300+
///
301+
/// @param msgSrc0 the first source operand of send message.
302+
///
303+
/// @param msgDst the destination operand of send message.
304+
///
305+
/// Returns a simd vector of type Ty1 and size N1.
306+
///
307+
template <typename Ty1, int N1, typename Ty2, int N2, int N = 16>
308+
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<Ty1, N1>
309+
__esimd_raw_send_load(uint8_t modifier, uint8_t execSize,
310+
sycl::INTEL::gpu::vector_type_t<uint16_t, N> pred,
311+
uint8_t numSrc0, uint8_t numDst, uint8_t sfid,
312+
uint32_t exDesc, uint32_t msgDesc,
313+
sycl::INTEL::gpu::vector_type_t<Ty2, N2> msgSrc0,
314+
sycl::INTEL::gpu::vector_type_t<Ty1, N1> msgDst);
315+
316+
/// \brief Raw sends store.
317+
///
318+
/// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
319+
///
320+
/// @param execSize the execution size, which must be a compile time constant.
321+
///
322+
/// @param pred the predicate to specify enabled channels.
323+
///
324+
/// @param numSrc0 the number of GRFs for source-0, which must be a compile time
325+
/// constant.
326+
///
327+
/// @param numSrc1 the number of GRFs for source-1, which must be a compile time
328+
/// constant.
329+
///
330+
/// @param sfid the shared function ID, which must be a compile time constant.
331+
///
332+
/// @param exDesc the extended message descriptor.
333+
///
334+
/// @param msgDesc the message descriptor.
335+
///
336+
/// @param msgSrc0 the first source operand of send message.
337+
///
338+
/// @param msgSrc1 the second source operand of send message.
339+
///
340+
template <typename Ty1, int N1, typename Ty2, int N2, int N = 16>
341+
SYCL_EXTERNAL void
342+
__esimd_raw_sends_store(uint8_t modifier, uint8_t execSize,
343+
sycl::INTEL::gpu::vector_type_t<uint16_t, N> pred,
344+
uint8_t numSrc0, uint8_t numSrc1, uint8_t sfid,
345+
uint32_t exDesc, uint32_t msgDesc,
346+
sycl::INTEL::gpu::vector_type_t<Ty1, N1> msgSrc0,
347+
sycl::INTEL::gpu::vector_type_t<Ty2, N2> msgSrc1);
348+
349+
/// \brief Raw send store.
350+
///
351+
/// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
352+
///
353+
/// @param execSize the execution size, which must be a compile time constant.
354+
///
355+
/// @param pred the predicate to specify enabled channels.
356+
///
357+
/// @param numSrc0 the number of GRFs for source-0, which must be a compile time
358+
/// constant.
359+
///
360+
/// @param sfid the shared function ID, which must be a compile time constant.
361+
///
362+
/// @param exDesc the extended message descriptor.
363+
///
364+
/// @param msgDesc the message descriptor.
365+
///
366+
/// @param msgSrc0 the first source operand of send message.
367+
///
368+
template <typename Ty1, int N1, int N = 16>
369+
SYCL_EXTERNAL void
370+
__esimd_raw_send_store(uint8_t modifier, uint8_t execSize,
371+
sycl::INTEL::gpu::vector_type_t<uint16_t, N> pred,
372+
uint8_t numSrc0, uint8_t sfid, uint32_t exDesc,
373+
uint32_t msgDesc,
374+
sycl::INTEL::gpu::vector_type_t<Ty1, N1> msgSrc0);
230375
#ifndef __SYCL_DEVICE_ONLY__
231376

232377
template <typename Ty, int N, int NumBlk, sycl::INTEL::gpu::CacheHint L1H,
@@ -660,4 +805,163 @@ __esimd_block_write(SurfIndAliasTy surf_ind, uint32_t offset,
660805
throw cl::sycl::feature_not_supported();
661806
}
662807

808+
/// \brief esimd_get_value
809+
///
810+
/// @param acc the SYCL accessor.
811+
///
812+
/// Returns the binding table index value.
813+
///
814+
template <typename AccessorTy>
815+
SYCL_EXTERNAL uint32_t __esimd_get_value(AccessorTy acc) {
816+
throw cl::sycl::feature_not_supported();
817+
return 0;
818+
}
819+
820+
/// \brief Raw sends load.
821+
///
822+
/// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
823+
///
824+
/// @param execSize the execution size, which must be a compile time constant.
825+
///
826+
/// @param pred the predicate to specify enabled channels.
827+
///
828+
/// @param numSrc0 the number of GRFs for source-0, which must be a compile time
829+
/// constant.
830+
///
831+
/// @param numSrc1 the number of GRFs for source-1, which must be a compile time
832+
/// constant.
833+
///
834+
/// @param numDst the number of GRFs for destination, which must be a compile
835+
/// time constant.
836+
///
837+
/// @param sfid the shared function ID, which must be a compile time constant.
838+
///
839+
/// @param exDesc the extended message descriptor.
840+
///
841+
/// @param msgDesc the message descriptor.
842+
///
843+
/// @param msgSrc0 the first source operand of send message.
844+
///
845+
/// @param msgSrc1 the second source operand of send message.
846+
///
847+
/// @param msgDst the destination operand of send message.
848+
///
849+
/// Returns a simd vector of type Ty1 and size N1.
850+
///
851+
template <typename Ty1, int N1, typename Ty2, int N2, typename Ty3, int N3,
852+
int N>
853+
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<Ty1, N1>
854+
__esimd_raw_sends_load(uint8_t modifier, uint8_t execSize,
855+
sycl::INTEL::gpu::vector_type_t<uint16_t, N> pred,
856+
uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst,
857+
uint8_t sfid, uint32_t exDesc, uint32_t msgDesc,
858+
sycl::INTEL::gpu::vector_type_t<Ty2, N2> msgSrc0,
859+
sycl::INTEL::gpu::vector_type_t<Ty3, N3> msgSrc1,
860+
sycl::INTEL::gpu::vector_type_t<Ty1, N1> msgDst) {
861+
throw cl::sycl::feature_not_supported();
862+
return 0;
863+
}
864+
865+
/// \brief Raw send load.
866+
///
867+
/// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
868+
///
869+
/// @param execSize the execution size, which must be a compile time constant.
870+
///
871+
/// @param pred the predicate to specify enabled channels.
872+
///
873+
/// @param numSrc0 the number of GRFs for source-0, which must be a compile time
874+
/// constant.
875+
///
876+
/// @param numDst the number of GRFs for destination, which must be a compile
877+
/// time constant.
878+
///
879+
/// @param sfid the shared function ID, which must be a compile time constant.
880+
///
881+
/// @param exDesc the extended message descriptor.
882+
///
883+
/// @param msgDesc the message descriptor.
884+
///
885+
/// @param msgSrc0 the first source operand of send message.
886+
///
887+
/// @param msgDst the destination operand of send message.
888+
///
889+
/// Returns a simd vector of type Ty1 and size N1.
890+
///
891+
template <typename Ty1, int N1, typename Ty2, int N2, int N>
892+
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<Ty1, N1>
893+
__esimd_raw_send_load(uint8_t modifier, uint8_t execSize,
894+
sycl::INTEL::gpu::vector_type_t<uint16_t, N> pred,
895+
uint8_t numSrc0, uint8_t numDst, uint8_t sfid,
896+
uint32_t exDesc, uint32_t msgDesc,
897+
sycl::INTEL::gpu::vector_type_t<Ty2, N2> msgSrc0,
898+
sycl::INTEL::gpu::vector_type_t<Ty1, N1> msgDst) {
899+
throw cl::sycl::feature_not_supported();
900+
return 0;
901+
}
902+
903+
/// \brief Raw sends store.
904+
///
905+
/// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
906+
///
907+
/// @param execSize the execution size, which must be a compile time constant.
908+
///
909+
/// @param pred the predicate to specify enabled channels.
910+
///
911+
/// @param numSrc0 the number of GRFs for source-0, which must be a compile time
912+
/// constant.
913+
///
914+
/// @param numSrc1 the number of GRFs for source-1, which must be a compile time
915+
/// constant.
916+
///
917+
/// @param sfid the shared function ID, which must be a compile time constant.
918+
///
919+
/// @param exDesc the extended message descriptor.
920+
///
921+
/// @param msgDesc the message descriptor.
922+
///
923+
/// @param msgSrc0 the first source operand of send message.
924+
///
925+
/// @param msgSrc1 the second source operand of send message.
926+
///
927+
template <typename Ty1, int N1, typename Ty2, int N2, int N>
928+
SYCL_EXTERNAL void
929+
__esimd_raw_sends_store(uint8_t modifier, uint8_t execSize,
930+
sycl::INTEL::gpu::vector_type_t<uint16_t, N> pred,
931+
uint8_t numSrc0, uint8_t numSrc1, uint8_t sfid,
932+
uint32_t exDesc, uint32_t msgDesc,
933+
sycl::INTEL::gpu::vector_type_t<Ty1, N1> msgSrc0,
934+
sycl::INTEL::gpu::vector_type_t<Ty2, N2> msgSrc1) {
935+
throw cl::sycl::feature_not_supported();
936+
}
937+
938+
/// \brief Raw send store.
939+
///
940+
/// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
941+
///
942+
/// @param execSize the execution size, which must be a compile time constant.
943+
///
944+
/// @param pred the predicate to specify enabled channels.
945+
///
946+
/// @param numSrc0 the number of GRFs for source-0, which must be a compile time
947+
/// constant.
948+
///
949+
/// @param sfid the shared function ID, which must be a compile time constant.
950+
///
951+
/// @param exDesc the extended message descriptor.
952+
///
953+
/// @param msgDesc the message descriptor.
954+
///
955+
/// @param msgSrc0 the first source operand of send message.
956+
///
957+
template <typename Ty1, int N1, int N>
958+
SYCL_EXTERNAL void
959+
__esimd_raw_send_store(uint8_t modifier, uint8_t execSize,
960+
sycl::INTEL::gpu::vector_type_t<uint16_t, N> pred,
961+
uint8_t numSrc0, uint8_t sfid, uint32_t exDesc,
962+
uint32_t msgDesc,
963+
sycl::INTEL::gpu::vector_type_t<Ty1, N1> msgSrc0) {
964+
throw cl::sycl::feature_not_supported();
965+
}
966+
663967
#endif // __SYCL_DEVICE_ONLY__

Diff for: sycl/include/CL/sycl/INTEL/esimd/esimd_math.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -1826,7 +1826,7 @@ template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_min {
18261826

18271827
template <typename T0, typename T1, int SZ,
18281828
template <typename RT, typename T, int N> class OpType>
1829-
T1 esimd_reduce_single(simd<T1, SZ> v) {
1829+
T0 esimd_reduce_single(simd<T1, SZ> v) {
18301830
if constexpr (SZ == 1) {
18311831
return v[0];
18321832
} else {
@@ -1842,7 +1842,7 @@ T1 esimd_reduce_single(simd<T1, SZ> v) {
18421842

18431843
template <typename T0, typename T1, int N1, int N2,
18441844
template <typename RT, typename T, int N> class OpType>
1845-
T1 esimd_reduce_pair(simd<T1, N1> v1, simd<T1, N2> v2) {
1845+
T0 esimd_reduce_pair(simd<T1, N1> v1, simd<T1, N2> v2) {
18461846
if constexpr (N1 == N2) {
18471847
simd<T0, N1> tmp = OpType<T0, T1, N1>()(v1, v2);
18481848
return esimd_reduce_single<T0, T0, N1, OpType>(tmp);
@@ -1867,7 +1867,7 @@ T1 esimd_reduce_pair(simd<T1, N1> v1, simd<T1, N2> v2) {
18671867

18681868
template <typename T0, typename T1, int SZ,
18691869
template <typename RT, typename T, int N> class OpType>
1870-
T1 esimd_reduce(simd<T1, SZ> v) {
1870+
T0 esimd_reduce(simd<T1, SZ> v) {
18711871
constexpr bool isPowerOf2 = __esimd::isPowerOf2(SZ);
18721872
if constexpr (isPowerOf2) {
18731873
return esimd_reduce_single<T0, T1, SZ, OpType>(v);

0 commit comments

Comments
 (0)