Skip to content

Commit 50f9bbf

Browse files
committed
update
1 parent 467b301 commit 50f9bbf

File tree

2 files changed

+30
-8
lines changed

2 files changed

+30
-8
lines changed

include/cute/arch/xe_copy_1B.hpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,13 +484,35 @@ struct XE_2D_U8x32x8_LD_T {
484484
#if defined(SYCL_INTEL_TARGET)
485485
static_assert(sizeof(T) == 1, "Expected T to have size 1");
486486
*reinterpret_cast<intel::ushort8 *>(dst) =
487-
__builtin_IB_subgroup_block_read_cacheopts_transpose_u8_m32k8(
487+
__builtin_IB_subgroup_block_read_cacheopts_transpose_u8_m32k8(
488488
(intptr_t)(baseoffset), width - 1, height - 1, pitch - 1, coord);
489489
#else
490490
CUTE_INVALID_CONTROL_PATH("Trying to use block loads on non-PVC hardware");
491491
#endif
492492
}
493493
};
494+
495+
struct XE_2D_U8x16x32_LD_T {
496+
using BlockShape = Shape<_32, _16>;
497+
using inst_dtype = uint32_t;
498+
static constexpr bool is_transpose = true;
499+
500+
template <class T>
501+
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
502+
int height, int pitch, intel::coord_t coord,
503+
T *dst) {
504+
#if defined(SYCL_INTEL_TARGET)
505+
static_assert(sizeof(T) == 1, "Expected T to have size 2");
506+
*reinterpret_cast<intel::uint8 *>(dst) =
507+
__builtin_IB_subgroup_block_read_flat_transpose_u32_k8(
508+
(intptr_t)(baseoffset), width - 1, height - 1, pitch - 1, coord);
509+
#else
510+
CUTE_INVALID_CONTROL_PATH("Trying to use block loads on non-PVC hardware");
511+
#endif
512+
}
513+
};
514+
515+
494516
struct XE_2D_U4x16x16_LD_T {
495517
using BlockShape = Shape<_16, _16>;
496518
using inst_dtype = uint32_t;

include/cute/atom/copy_traits_xe.hpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1417,7 +1417,7 @@ struct Copy_Traits_<XE_2D_TF32x8x8_LD_T, args_t...>
14171417
};
14181418

14191419
template <class... args_t>
1420-
struct Copy_Traits<XE_2D_U32x1x16_LD_N, args_t...>
1420+
struct Copy_Traits_<XE_2D_U32x1x16_LD_N, args_t...>
14211421
: XE_2D_LD_Unpack<XE_2D_U32x1x16_LD_N, args_t...> {
14221422
using ThrID = Layout<_16>;
14231423
// Map from (src-thr,src-val) to bit
@@ -1707,8 +1707,8 @@ struct Copy_Traits_<XE_2D_U16x16x16_LD_T, args_t...>
17071707
};
17081708

17091709
template <class... args_t>
1710-
struct Copy_Traits_<XE_2D_U8x32x16_LD_T, args_t...>
1711-
: XE_2D_LD_Unpack<XE_2D_U8x32x16_LD_T, args_t...> {
1710+
struct Copy_Traits_<XE_2D_U8x16x32_LD_T, args_t...>
1711+
: XE_2D_LD_Unpack<XE_2D_U8x16x32_LD_T, args_t...> {
17121712
using ThrID = Layout<_16>;
17131713
// Map from (src-thr,src-val) to bit
17141714
using SrcLayout = Layout<Shape <_16,_16>,
@@ -1721,7 +1721,7 @@ struct Copy_Traits_<XE_2D_U8x32x16_LD_T, args_t...>
17211721

17221722
template <class... ArgT>
17231723
Copy_Traits_(ArgT... args)
1724-
: XE_2D_LD_Unpack<XE_2D_U8x32x16_LD_T, args_t...>(args...) {}
1724+
: XE_2D_LD_Unpack<XE_2D_U8x16x32_LD_T, args_t...>(args...) {}
17251725
};
17261726

17271727
template <class... args_t>
@@ -1743,7 +1743,7 @@ struct Copy_Traits_<XE_2D_U8x32x8_LD_T, args_t...>
17431743
};
17441744

17451745
template <class... args_t>
1746-
struct Copy_Traits<XE_2D_U8x32x4_LD_T, args_t...>
1746+
struct Copy_Traits_<XE_2D_U8x32x4_LD_T, args_t...>
17471747
: XE_2D_LD_Unpack<XE_2D_U8x32x4_LD_T, args_t...> {
17481748
using ThrID = Layout<_16>;
17491749
// Map from (src-thr,src-val) to bit
@@ -1756,10 +1756,9 @@ struct Copy_Traits<XE_2D_U8x32x4_LD_T, args_t...>
17561756
using RefLayout = DstLayout;
17571757

17581758
template <class... ArgT>
1759-
Copy_Traits(ArgT... args)
1759+
Copy_Traits_(ArgT... args)
17601760
: XE_2D_LD_Unpack<XE_2D_U8x32x4_LD_T, args_t...>(args...) {}
17611761
};
1762-
};
17631762

17641763
// template<class... args_t>
17651764
// struct Copy_Traits<XE_2D_U32x16x1_LD_T, args_t...>
@@ -2307,6 +2306,7 @@ COPY_TRAIT_LD_DEF(XE_2D_U8x16x64_LD_N)
23072306
COPY_TRAIT_LD_DEF(XE_2D_U8x32x64_LD_N)
23082307
COPY_TRAIT_LD_DEF(XE_2D_U8x32x8_LD_T)
23092308
COPY_TRAIT_LD_DEF(XE_2D_U8x32x4_LD_T)
2309+
COPY_TRAIT_LD_DEF(XE_2D_U8x16x32_LD_T)
23102310
COPY_TRAIT_LD_DEF(XE_2D_U16x1x16_LD_N)
23112311
COPY_TRAIT_LD_DEF(XE_2D_U16x2x16_LD_N)
23122312
COPY_TRAIT_LD_DEF(XE_2D_U16x4x16_LD_N)

0 commit comments

Comments
 (0)