Skip to content

Commit 90bd4b6

Browse files
committed
*fix bug: Error in Base implementation of class SynetConvolution16bNchwGemm.
1 parent 639c1aa commit 90bd4b6

File tree

4 files changed

+16
-9
lines changed

4 files changed

+16
-9
lines changed

docs/2025.html

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,11 @@ <h5>Improving</h5>
4949
<li>SSE4.1, AVX2, AVX-512BW optimizations of class ResizerFloatBilinear.</li>
5050
<li>AMX-BF16 optimizations of class SynetConvolution16bNchwGemm.</li>
5151
<li>AMX-BF16 optimizations of class SynetConvolution16bNhwcGemm.</li>
52-
<</ul>
52+
</ul>
53+
<h5>Bug fixing</h5>
54+
<ul>
55+
<li>Error in Base implementation of class SynetConvolution16bNchwGemm.</li>
56+
</ul>
5357

5458
<h4>Test framework</h4>
5559
<h5>New features</h5>

src/Simd/SimdAmxBf16DescrIntCdu.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -254,12 +254,12 @@ namespace Simd
254254
_tile_zero(3);
255255
for (size_t k = 0; k < K; k += 64)
256256
{
257-
_tile_stream_loadd(4, ad00 + k, adStride);
257+
_tile_stream_loadd(4, ad00 + k, (int)adStride);
258258
_tile_loadd(6, bd00 + k * 32, 128);
259259
_tile_dpbuud(0, 4, 6);
260260
_tile_loadd(7, bd64 + k * 32, 128);
261261
_tile_dpbuud(1, 4, 7);
262-
_tile_stream_loadd(5, ad16 + k, adStride);
262+
_tile_stream_loadd(5, ad16 + k, (int)adStride);
263263
_tile_dpbuud(2, 5, 6);
264264
_tile_dpbuud(3, 5, 7);
265265
}
@@ -299,10 +299,10 @@ namespace Simd
299299
_tile_zero(2);
300300
for (size_t k = 0; k < K; k += 64)
301301
{
302-
_tile_stream_loadd(4, ad00 + k, adStride);
302+
_tile_stream_loadd(4, ad00 + k, (int)adStride);
303303
_tile_loadd(6, bd00 + k * 32, 128);
304304
_tile_dpbuud(0, 4, 6);
305-
_tile_stream_loadd(5, ad16 + k, adStride);
305+
_tile_stream_loadd(5, ad16 + k, (int)adStride);
306306
_tile_dpbuud(2, 5, 6);
307307
}
308308
SIMD_ALIGNED(64) int32_t buf[32][16];
@@ -338,7 +338,7 @@ namespace Simd
338338
_tile_zero(1);
339339
for (size_t k = 0; k < K; k += 64)
340340
{
341-
_tile_stream_loadd(4, ad00 + k, adStride);
341+
_tile_stream_loadd(4, ad00 + k, (int)adStride);
342342
_tile_loadd(6, bd00 + k * 32, 128);
343343
_tile_dpbuud(0, 4, 6);
344344
_tile_loadd(7, bd64 + k * 32, 128);
@@ -371,7 +371,7 @@ namespace Simd
371371
_tile_zero(0);
372372
for (size_t k = 0; k < K; k += 64)
373373
{
374-
_tile_stream_loadd(4, ad00 + k, adStride);
374+
_tile_stream_loadd(4, ad00 + k, (int)adStride);
375375
_tile_loadd(6, bd00 + k * 32, 128);
376376
_tile_dpbuud(0, 4, 6);
377377
}

src/Simd/SimdBaseSynetConvolution16bNchwGemm.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,6 @@ namespace Simd
313313
{
314314
const ConvParam& p = _param;
315315
const AlgParam& a = _alg;
316-
const float* bias = _bias.data, * params = _params.data;
317316
for (size_t yBeg = 0; yBeg < p.dstH;)
318317
{
319318
size_t yEnd = Simd::Min(yBeg + a.macroH, p.dstH);
@@ -325,6 +324,7 @@ namespace Simd
325324
if (_is1x1)
326325
_convert(src, p, a, yBeg, yEnd, mak, mak + macroK, buf);
327326
size_t bufOffs = _is1x1 ? 0 : mak * a.F;
327+
const float* bias = _bias.data, * params = _params.data;
328328
for (size_t dc = 0; dc < p.dstC; dc += a.macroD)
329329
{
330330
size_t macroD = Simd::Min(p.dstC, dc + a.macroD) - dc;
@@ -337,6 +337,9 @@ namespace Simd
337337
else
338338
_convolutions[0](weight, p, a, macroD, yEnd - yBeg, macroK, mak == 0 ? 1 : 0,
339339
buf + bufOffs, bias, params, sum + sumOffs, dst + dstOffs);
340+
bias += macroD;
341+
if (p.activation == ::SimdConvolutionActivationPrelu)
342+
params += macroD;
340343
}
341344
}
342345
yBeg = yEnd;

src/Test/TestSynetConvolution16b.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ namespace Test
332332
result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 383, 13, 13, 1155, _1, _1, _1, _0, _0, 1, aRe, tF, b16, b16), c, f1, f2);
333333
#endif
334334
#else
335-
result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 384, 13, 13, 1152, _1, _1, _1, _0, _0, 1, aRe, tF, b16, b16), c, f1, f2);
335+
result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 384, 13, 13, 1152, _1, _1, _1, _0, _0, 1, aPr, tF, b16, b16), c, f1, f2);
336336
#endif
337337

338338
return result;

0 commit comments

Comments
 (0)