Skip to content

Commit 9c7ea78

Browse files
committed
update
1 parent a222fa9 commit 9c7ea78

File tree

2 files changed

+30
-8
lines changed

2 files changed

+30
-8
lines changed

include/cute/arch/xe_copy_1B.hpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,13 +484,35 @@ struct XE_2D_U8x32x8_LD_T {
484484
#if defined(SYCL_INTEL_TARGET)
485485
static_assert(sizeof(T) == 1, "Expected T to have size 1");
486486
*reinterpret_cast<intel::ushort8 *>(dst) =
487-
__builtin_IB_subgroup_block_read_cacheopts_transpose_u8_m32k8(
487+
__builtin_IB_subgroup_block_read_cacheopts_transpose_u8_m32k8(
488488
(intptr_t)(baseoffset), width - 1, height - 1, pitch - 1, coord);
489489
#else
490490
CUTE_INVALID_CONTROL_PATH("Trying to use block loads on non-PVC hardware");
491491
#endif
492492
}
493493
};
494+
495+
struct XE_2D_U8x16x32_LD_T {
496+
using BlockShape = Shape<_32, _16>;
497+
using inst_dtype = uint32_t;
498+
static constexpr bool is_transpose = true;
499+
500+
template <class T>
501+
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
502+
int height, int pitch, intel::coord_t coord,
503+
T *dst) {
504+
#if defined(SYCL_INTEL_TARGET)
505+
static_assert(sizeof(T) == 1, "Expected T to have size 2");
506+
*reinterpret_cast<intel::uint8 *>(dst) =
507+
__builtin_IB_subgroup_block_read_flat_transpose_u32_k8(
508+
(intptr_t)(baseoffset), width - 1, height - 1, pitch - 1, coord);
509+
#else
510+
CUTE_INVALID_CONTROL_PATH("Trying to use block loads on non-PVC hardware");
511+
#endif
512+
}
513+
};
514+
515+
494516
struct XE_2D_U4x16x16_LD_T {
495517
using BlockShape = Shape<_16, _16>;
496518
using inst_dtype = uint32_t;

include/cute/atom/copy_traits_xe.hpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1422,7 +1422,7 @@ struct Copy_Traits_<XE_2D_TF32x8x8_LD_T, args_t...>
14221422
};
14231423

14241424
template <class... args_t>
1425-
struct Copy_Traits<XE_2D_U32x1x16_LD_N, args_t...>
1425+
struct Copy_Traits_<XE_2D_U32x1x16_LD_N, args_t...>
14261426
: XE_2D_LD_Unpack<XE_2D_U32x1x16_LD_N, args_t...> {
14271427
using ThrID = Layout<_16>;
14281428
// Map from (src-thr,src-val) to bit
@@ -1712,8 +1712,8 @@ struct Copy_Traits_<XE_2D_U16x16x16_LD_T, args_t...>
17121712
};
17131713

17141714
template <class... args_t>
1715-
struct Copy_Traits_<XE_2D_U8x32x16_LD_T, args_t...>
1716-
: XE_2D_LD_Unpack<XE_2D_U8x32x16_LD_T, args_t...> {
1715+
struct Copy_Traits_<XE_2D_U8x16x32_LD_T, args_t...>
1716+
: XE_2D_LD_Unpack<XE_2D_U8x16x32_LD_T, args_t...> {
17171717
using ThrID = Layout<_16>;
17181718
// Map from (src-thr,src-val) to bit
17191719
using SrcLayout = Layout<Shape <_16,_16>,
@@ -1726,7 +1726,7 @@ struct Copy_Traits_<XE_2D_U8x32x16_LD_T, args_t...>
17261726

17271727
template <class... ArgT>
17281728
Copy_Traits_(ArgT... args)
1729-
: XE_2D_LD_Unpack<XE_2D_U8x32x16_LD_T, args_t...>(args...) {}
1729+
: XE_2D_LD_Unpack<XE_2D_U8x16x32_LD_T, args_t...>(args...) {}
17301730
};
17311731

17321732
template <class... args_t>
@@ -1748,7 +1748,7 @@ struct Copy_Traits_<XE_2D_U8x32x8_LD_T, args_t...>
17481748
};
17491749

17501750
template <class... args_t>
1751-
struct Copy_Traits<XE_2D_U8x32x4_LD_T, args_t...>
1751+
struct Copy_Traits_<XE_2D_U8x32x4_LD_T, args_t...>
17521752
: XE_2D_LD_Unpack<XE_2D_U8x32x4_LD_T, args_t...> {
17531753
using ThrID = Layout<_16>;
17541754
// Map from (src-thr,src-val) to bit
@@ -1761,10 +1761,9 @@ struct Copy_Traits<XE_2D_U8x32x4_LD_T, args_t...>
17611761
using RefLayout = DstLayout;
17621762

17631763
template <class... ArgT>
1764-
Copy_Traits(ArgT... args)
1764+
Copy_Traits_(ArgT... args)
17651765
: XE_2D_LD_Unpack<XE_2D_U8x32x4_LD_T, args_t...>(args...) {}
17661766
};
1767-
};
17681767

17691768
// template<class... args_t>
17701769
// struct Copy_Traits<XE_2D_U32x16x1_LD_T, args_t...>
@@ -2312,6 +2311,7 @@ COPY_TRAIT_LD_DEF(XE_2D_U8x16x64_LD_N)
23122311
COPY_TRAIT_LD_DEF(XE_2D_U8x32x64_LD_N)
23132312
COPY_TRAIT_LD_DEF(XE_2D_U8x32x8_LD_T)
23142313
COPY_TRAIT_LD_DEF(XE_2D_U8x32x4_LD_T)
2314+
COPY_TRAIT_LD_DEF(XE_2D_U8x16x32_LD_T)
23152315
COPY_TRAIT_LD_DEF(XE_2D_U16x1x16_LD_N)
23162316
COPY_TRAIT_LD_DEF(XE_2D_U16x2x16_LD_N)
23172317
COPY_TRAIT_LD_DEF(XE_2D_U16x4x16_LD_N)

0 commit comments

Comments
 (0)