@@ -1399,7 +1399,25 @@ struct Copy_Traits_<XE_2D_TF32x32x16_LD_N, args_t...>
1399
1399
};
1400
1400
1401
1401
template <class ... args_t >
1402
- struct Copy_Traits_ <XE_2D_U32x1x16_LD_N, args_t ...>
1402
+ struct Copy_Traits_ <XE_2D_TF32x8x8_LD_T, args_t ...>
1403
+ : XE_2D_LD_Unpack<XE_2D_TF32x8x8_LD_T, args_t ...> {
1404
+ using ThrID = Layout<_16>;
1405
+ // Map from (src-thr,src-val) to bit
1406
+ using SrcLayout = Layout<Shape <_16, Shape <_4, _32>>,
1407
+ Stride< _0, Stride<_32, _1>>>;
1408
+ // Map from (dst-thr,dst-val) to bit
1409
+ using DstLayout = Layout<Shape <_16, Shape <_4, _32>>,
1410
+ Stride< _32, Stride<_32, _1>>>;
1411
+ // Reference map from (thr,val) to bit
1412
+ using RefLayout = DstLayout;
1413
+
1414
+ template <class ... ArgTs>
1415
+ Copy_Traits_ (ArgTs... args)
1416
+ : XE_2D_LD_Unpack<XE_2D_TF32x8x8_LD_T, args_t ...>(args...) {}
1417
+ };
1418
+
1419
+ template <class ... args_t >
1420
+ struct Copy_Traits <XE_2D_U32x1x16_LD_N, args_t ...>
1403
1421
: XE_2D_LD_Unpack<XE_2D_U32x1x16_LD_N, args_t ...> {
1404
1422
using ThrID = Layout<_16>;
1405
1423
// Map from (src-thr,src-val) to bit
@@ -1688,6 +1706,61 @@ struct Copy_Traits_<XE_2D_U16x16x16_LD_T, args_t...>
1688
1706
: XE_2D_LD_Unpack<XE_2D_U16x16x16_LD_T, args_t ...>(args...) {}
1689
1707
};
1690
1708
1709
+ template <class ... args_t >
1710
+ struct Copy_Traits_ <XE_2D_U8x32x16_LD_T, args_t ...>
1711
+ : XE_2D_LD_Unpack<XE_2D_U8x32x16_LD_T, args_t ...> {
1712
+ using ThrID = Layout<_16>;
1713
+ // Map from (src-thr,src-val) to bit
1714
+ using SrcLayout = Layout<Shape <_16,_16>,
1715
+ Stride< _0, _1>>;
1716
+ // Map from (dst-thr,dst-val) to bit
1717
+ using DstLayout = Layout<Shape < _16,Shape <_16,_16>>,
1718
+ Stride<_256,Stride< _1,_16>>>;
1719
+ // Reference map from (thr,val) to bit
1720
+ using RefLayout = DstLayout;
1721
+
1722
+ template <class ... ArgT>
1723
+ Copy_Traits_ (ArgT... args)
1724
+ : XE_2D_LD_Unpack<XE_2D_U8x32x16_LD_T, args_t ...>(args...) {}
1725
+ };
1726
+
1727
+ template <class ... args_t >
1728
+ struct Copy_Traits_ <XE_2D_U8x32x8_LD_T, args_t ...>
1729
+ : XE_2D_LD_Unpack<XE_2D_U8x32x8_LD_T, args_t ...> {
1730
+ using ThrID = Layout<_16>;
1731
+ // Map from (src-thr,src-val) to bit
1732
+ using SrcLayout = Layout<Shape <_16,Shape <_8, _2, _8>>,
1733
+ Stride<_0, Stride<_1, _8, _16>>>;
1734
+ // Map from (dst-thr,dst-val) to bit
1735
+ using DstLayout = Layout<Shape < _16,Shape <_8, _2, _8>>,
1736
+ Stride<_256,Stride<_1, _8, _16>>>;
1737
+ // Reference map from (thr,val) to bit
1738
+ using RefLayout = DstLayout;
1739
+
1740
+ template <class ... ArgT>
1741
+ Copy_Traits_ (ArgT... args)
1742
+ : XE_2D_LD_Unpack<XE_2D_U8x32x8_LD_T, args_t ...>(args...) {}
1743
+ };
1744
+
1745
+ template <class ... args_t >
1746
+ struct Copy_Traits <XE_2D_U8x32x4_LD_T, args_t ...>
1747
+ : XE_2D_LD_Unpack<XE_2D_U8x32x4_LD_T, args_t ...> {
1748
+ using ThrID = Layout<_16>;
1749
+ // Map from (src-thr,src-val) to bit
1750
+ using SrcLayout = Layout<Shape <_16,Shape <_8, _2, _4>>,
1751
+ Stride<_0, Stride<_1, _8, _16>>>;
1752
+ // Map from (dst-thr,dst-val) to bit
1753
+ using DstLayout = Layout<Shape < _16,Shape <_8, _2, _4>>,
1754
+ Stride<_256,Stride<_1, _8, _16>>>;
1755
+ // Reference map from (thr,val) to bit
1756
+ using RefLayout = DstLayout;
1757
+
1758
+ template <class ... ArgT>
1759
+ Copy_Traits (ArgT... args)
1760
+ : XE_2D_LD_Unpack<XE_2D_U8x32x4_LD_T, args_t ...>(args...) {}
1761
+ };
1762
+ };
1763
+
1691
1764
// template<class... args_t>
1692
1765
// struct Copy_Traits<XE_2D_U32x16x1_LD_T, args_t...>
1693
1766
// : XE_2D_LD_Unpack<XE_2D_U32x16x1_LD_T, args_t...> {
@@ -2232,6 +2305,8 @@ COPY_TRAIT_LD_DEF(XE_2D_U8x16x32_LD_N)
2232
2305
COPY_TRAIT_LD_DEF(XE_2D_U8x32x32_LD_N)
2233
2306
COPY_TRAIT_LD_DEF(XE_2D_U8x16x64_LD_N)
2234
2307
COPY_TRAIT_LD_DEF(XE_2D_U8x32x64_LD_N)
2308
+ COPY_TRAIT_LD_DEF(XE_2D_U8x32x8_LD_T)
2309
+ COPY_TRAIT_LD_DEF(XE_2D_U8x32x4_LD_T)
2235
2310
COPY_TRAIT_LD_DEF(XE_2D_U16x1x16_LD_N)
2236
2311
COPY_TRAIT_LD_DEF(XE_2D_U16x2x16_LD_N)
2237
2312
COPY_TRAIT_LD_DEF(XE_2D_U16x4x16_LD_N)
@@ -2274,6 +2349,7 @@ COPY_TRAIT_LD_DEF(XE_2D_U16x16x32_LD_V)
2274
2349
COPY_TRAIT_LD_DEF(XE_2D_U16x16x16_LD_T)
2275
2350
COPY_TRAIT_LD_DEF(XE_2D_TF32x16x16_LD_N)
2276
2351
COPY_TRAIT_LD_DEF(XE_2D_TF32x32x16_LD_N)
2352
+ COPY_TRAIT_LD_DEF(XE_2D_TF32x8x8_LD_T)
2277
2353
COPY_TRAIT_LD_DEF(XE_2D_U4x32x64_LD_N)
2278
2354
COPY_TRAIT_LD_DEF(XE_2D_U4x16x64_LD_N)
2279
2355
COPY_TRAIT_LD_DEF(XE_2D_U4x32x16_LD_T)
0 commit comments