@@ -1404,7 +1404,25 @@ struct Copy_Traits_<XE_2D_TF32x32x16_LD_N, args_t...>
1404
1404
};
1405
1405
1406
1406
template <class ... args_t >
1407
- struct Copy_Traits_ <XE_2D_U32x1x16_LD_N, args_t ...>
1407
+ struct Copy_Traits_ <XE_2D_TF32x8x8_LD_T, args_t ...>
1408
+ : XE_2D_LD_Unpack<XE_2D_TF32x8x8_LD_T, args_t ...> {
1409
+ using ThrID = Layout<_16>;
1410
+ // Map from (src-thr,src-val) to bit
1411
+ using SrcLayout = Layout<Shape <_16, Shape <_4, _32>>,
1412
+ Stride< _0, Stride<_32, _1>>>;
1413
+ // Map from (dst-thr,dst-val) to bit
1414
+ using DstLayout = Layout<Shape <_16, Shape <_4, _32>>,
1415
+ Stride< _32, Stride<_32, _1>>>;
1416
+ // Reference map from (thr,val) to bit
1417
+ using RefLayout = DstLayout;
1418
+
1419
+ template <class ... ArgTs>
1420
+ Copy_Traits_ (ArgTs... args)
1421
+ : XE_2D_LD_Unpack<XE_2D_TF32x8x8_LD_T, args_t ...>(args...) {}
1422
+ };
1423
+
1424
+ template <class ... args_t >
1425
+ struct Copy_Traits <XE_2D_U32x1x16_LD_N, args_t ...>
1408
1426
: XE_2D_LD_Unpack<XE_2D_U32x1x16_LD_N, args_t ...> {
1409
1427
using ThrID = Layout<_16>;
1410
1428
// Map from (src-thr,src-val) to bit
@@ -1693,6 +1711,61 @@ struct Copy_Traits_<XE_2D_U16x16x16_LD_T, args_t...>
1693
1711
: XE_2D_LD_Unpack<XE_2D_U16x16x16_LD_T, args_t ...>(args...) {}
1694
1712
};
1695
1713
1714
+ template <class ... args_t >
1715
+ struct Copy_Traits_ <XE_2D_U8x32x16_LD_T, args_t ...>
1716
+ : XE_2D_LD_Unpack<XE_2D_U8x32x16_LD_T, args_t ...> {
1717
+ using ThrID = Layout<_16>;
1718
+ // Map from (src-thr,src-val) to bit
1719
+ using SrcLayout = Layout<Shape <_16,_16>,
1720
+ Stride< _0, _1>>;
1721
+ // Map from (dst-thr,dst-val) to bit
1722
+ using DstLayout = Layout<Shape < _16,Shape <_16,_16>>,
1723
+ Stride<_256,Stride< _1,_16>>>;
1724
+ // Reference map from (thr,val) to bit
1725
+ using RefLayout = DstLayout;
1726
+
1727
+ template <class ... ArgT>
1728
+ Copy_Traits_ (ArgT... args)
1729
+ : XE_2D_LD_Unpack<XE_2D_U8x32x16_LD_T, args_t ...>(args...) {}
1730
+ };
1731
+
1732
+ template <class ... args_t >
1733
+ struct Copy_Traits_ <XE_2D_U8x32x8_LD_T, args_t ...>
1734
+ : XE_2D_LD_Unpack<XE_2D_U8x32x8_LD_T, args_t ...> {
1735
+ using ThrID = Layout<_16>;
1736
+ // Map from (src-thr,src-val) to bit
1737
+ using SrcLayout = Layout<Shape <_16,Shape <_8, _2, _8>>,
1738
+ Stride<_0, Stride<_1, _8, _16>>>;
1739
+ // Map from (dst-thr,dst-val) to bit
1740
+ using DstLayout = Layout<Shape < _16,Shape <_8, _2, _8>>,
1741
+ Stride<_256,Stride<_1, _8, _16>>>;
1742
+ // Reference map from (thr,val) to bit
1743
+ using RefLayout = DstLayout;
1744
+
1745
+ template <class ... ArgT>
1746
+ Copy_Traits_ (ArgT... args)
1747
+ : XE_2D_LD_Unpack<XE_2D_U8x32x8_LD_T, args_t ...>(args...) {}
1748
+ };
1749
+
1750
+ template <class ... args_t >
1751
+ struct Copy_Traits <XE_2D_U8x32x4_LD_T, args_t ...>
1752
+ : XE_2D_LD_Unpack<XE_2D_U8x32x4_LD_T, args_t ...> {
1753
+ using ThrID = Layout<_16>;
1754
+ // Map from (src-thr,src-val) to bit
1755
+ using SrcLayout = Layout<Shape <_16,Shape <_8, _2, _4>>,
1756
+ Stride<_0, Stride<_1, _8, _16>>>;
1757
+ // Map from (dst-thr,dst-val) to bit
1758
+ using DstLayout = Layout<Shape < _16,Shape <_8, _2, _4>>,
1759
+ Stride<_256,Stride<_1, _8, _16>>>;
1760
+ // Reference map from (thr,val) to bit
1761
+ using RefLayout = DstLayout;
1762
+
1763
+ template <class ... ArgT>
1764
+ Copy_Traits (ArgT... args)
1765
+ : XE_2D_LD_Unpack<XE_2D_U8x32x4_LD_T, args_t ...>(args...) {}
1766
+ };
1767
+ };
1768
+
1696
1769
// template<class... args_t>
1697
1770
// struct Copy_Traits<XE_2D_U32x16x1_LD_T, args_t...>
1698
1771
// : XE_2D_LD_Unpack<XE_2D_U32x16x1_LD_T, args_t...> {
@@ -2237,6 +2310,8 @@ COPY_TRAIT_LD_DEF(XE_2D_U8x16x32_LD_N)
2237
2310
COPY_TRAIT_LD_DEF(XE_2D_U8x32x32_LD_N)
2238
2311
COPY_TRAIT_LD_DEF(XE_2D_U8x16x64_LD_N)
2239
2312
COPY_TRAIT_LD_DEF(XE_2D_U8x32x64_LD_N)
2313
+ COPY_TRAIT_LD_DEF(XE_2D_U8x32x8_LD_T)
2314
+ COPY_TRAIT_LD_DEF(XE_2D_U8x32x4_LD_T)
2240
2315
COPY_TRAIT_LD_DEF(XE_2D_U16x1x16_LD_N)
2241
2316
COPY_TRAIT_LD_DEF(XE_2D_U16x2x16_LD_N)
2242
2317
COPY_TRAIT_LD_DEF(XE_2D_U16x4x16_LD_N)
@@ -2279,6 +2354,7 @@ COPY_TRAIT_LD_DEF(XE_2D_U16x16x32_LD_V)
2279
2354
COPY_TRAIT_LD_DEF(XE_2D_U16x16x16_LD_T)
2280
2355
COPY_TRAIT_LD_DEF(XE_2D_TF32x16x16_LD_N)
2281
2356
COPY_TRAIT_LD_DEF(XE_2D_TF32x32x16_LD_N)
2357
+ COPY_TRAIT_LD_DEF(XE_2D_TF32x8x8_LD_T)
2282
2358
COPY_TRAIT_LD_DEF(XE_2D_U4x32x64_LD_N)
2283
2359
COPY_TRAIT_LD_DEF(XE_2D_U4x16x64_LD_N)
2284
2360
COPY_TRAIT_LD_DEF(XE_2D_U4x32x16_LD_T)
0 commit comments