Skip to content

Commit bc80618

Browse files
committed
*fix crash on CPU without AVX-512 support.
1 parent 90bd4b6 commit bc80618

File tree

2 files changed

+8
-9
lines changed

2 files changed

+8
-9
lines changed

src/Simd/SimdTile.h

+6-8
Original file line numberDiff line numberDiff line change
@@ -130,17 +130,15 @@ namespace Simd
130130
#ifdef SIMD_AMXBF16_ENABLE
131131
namespace AmxBf16
132132
{
133-
const static TileConf ZeroConf = TileConf(true);
134-
const static TileConf FullConf = TileConf(false);
135-
136133
SIMD_INLINE void SetTileConfFull()
137134
{
138-
_tile_loadconfig(&FullConf);
135+
TileConf conf = TileConf(false);
136+
_tile_loadconfig(&conf);
139137
}
140138

141139
SIMD_INLINE void SetTileConf2x2(size_t rows, size_t cols)
142140
{
143-
TileConf conf = FullConf;
141+
TileConf conf = TileConf(false);
144142
uint8_t tailR = uint8_t(rows - 16);
145143
conf.rows[2] = tailR;
146144
conf.rows[3] = tailR;
@@ -154,7 +152,7 @@ namespace Simd
154152

155153
SIMD_INLINE void SetTileConf2x1(size_t rows, size_t cols)
156154
{
157-
TileConf conf = FullConf;
155+
TileConf conf = TileConf(false);
158156
uint8_t tailR = uint8_t(rows - 16);
159157
conf.rows[2] = tailR;
160158
conf.rows[5] = tailR;
@@ -167,7 +165,7 @@ namespace Simd
167165

168166
SIMD_INLINE void SetTileConf1x2(size_t rows, size_t cols)
169167
{
170-
TileConf conf = FullConf;
168+
TileConf conf = TileConf(false);
171169
uint8_t tailR = uint8_t(rows);
172170
conf.rows[0] = tailR;
173171
conf.rows[1] = tailR;
@@ -180,7 +178,7 @@ namespace Simd
180178

181179
SIMD_INLINE void SetTileConf1x1(size_t rows, size_t cols)
182180
{
183-
TileConf conf = FullConf;
181+
TileConf conf = TileConf(false);
184182
uint8_t tailR = uint8_t(rows);
185183
conf.rows[0] = tailR;
186184
conf.rows[4] = tailR;

src/Test/TestSynetDeconvolution16b.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ namespace Test
195195
#endif
196196
#else
197197
#if 1
198-
result = result && SynetDeconvolution16bForwardAutoTest(eps, Param(1, 720, 192, 256, 64, _4, _1, _2, _1, _1, 1, aId, tT, f32, f32), c, f1, f2);
198+
//result = result && SynetDeconvolution16bForwardAutoTest(eps, Param(1, 720, 192, 256, 64, _4, _1, _2, _1, _1, 1, aId, tT, f32, f32), c, f1, f2);
199+
result = result && SynetDeconvolution16bForwardAutoTest(eps, Param(1, 72, 24, 32, 64, _4, _1, _2, _1, _1, 1, aId, tT, f32, f32), c, f1, f2);
199200
//result = result && SynetDeconvolution16bForwardAutoTest(eps, Param(1, 24, 12, 16, 32, _2, _1, _1, _1, _1, 1, aId, tT, f32, f32), c, f1, f2);
200201
#endif
201202
#endif

0 commit comments

Comments
 (0)