@@ -276,6 +276,177 @@ end
276
276
end
277
277
nothing
278
278
end
279
+ @generated function _ldivu_remainder! (
280
+ spa,
281
+ spu,
282
+ N,
283
+ Nr,
284
+ :: StaticInt{W} ,
285
+ :: Val{UNIT} ,
286
+ :: StaticInt{r}
287
+ ) where {W,UNIT,r}
288
+ error (" not updated" )
289
+ r <= 0 && throw (" Remainder of `<= 0` shouldn't be called, but had $r ." )
290
+ r >= W && throw (" Reaminderof `>= $W ` shouldn't be called, but had $r ." )
291
+ if r == 1
292
+ z = static (0 )
293
+ sub = Base. FastMath. sub_fast
294
+ mul = Base. FastMath. mul_fast
295
+ div = Base. FastMath. div_fast
296
+ vlxj = :(vload (spa, ($ z, j)))
297
+ if UNIT
298
+ vlxj = :(xj = $ vlxj)
299
+ else
300
+ vlxj = quote
301
+ xj = $ div ($ vlxj, vload (spu, (j, j)))
302
+ vstore! (spa, xj, ($ z, j))
303
+ end
304
+ end
305
+ quote
306
+ $ (Expr (:meta , :inline ))
307
+ for j = 0 : N- 1
308
+ $ vlxj
309
+ for i = (j+ 1 ): N- 1
310
+ xi = vload (spa, ($ z, i))
311
+ Uji = vload (spu, (j, i))
312
+ vstore! (spa, $ sub (xi, $ mul (xj, Uji)), ($ z, i))
313
+ end
314
+ end
315
+ end
316
+ else
317
+ WS = static (W)
318
+ quote
319
+ $ (Expr (:meta , :inline ))
320
+ n = Nr # non factor of W remainder
321
+ if n > 0
322
+ let t = (spa, spu),
323
+ ft = flatten_to_tup (t),
324
+ mask = $ (getfield (_mask (WS, r), :u ) % UInt32)
325
+
326
+ BdivU_small_kern! (n, mask, $ WS, $ (Val (UNIT)), typeof (t), ft... )
327
+ end
328
+ end
329
+ # while n < N - $(W * U - 1)
330
+ # ldivu_solve_W_u!(spa, spa, spu, n, $WS, $US, Val(UNIT), Val(r))
331
+ # n += $(W * U)
332
+ # end
333
+ while n != N
334
+ ldivu_solve_W! (spa, spu, n, $ WS, $ (Val (UNIT)), $ (StaticInt (r)))
335
+ n += $ W
336
+ end
337
+ end
338
+ end
339
+ end
340
+ @generated function ldivu_remainder! (
341
+ M,
342
+ N,
343
+ m,
344
+ Nr,
345
+ :: StaticInt{W} ,
346
+ # ::Val{U},
347
+ :: Val{UNIT} ,
348
+ :: Type{Args} ,
349
+ args:: Vararg{Any,K}
350
+ ) where {W,UNIT,Args,K}
351
+ error (" not updated" )
352
+ WS = static (W)
353
+ # US = static(U)
354
+ if W == 2
355
+ quote
356
+ $ (Expr (:meta , :inline ))
357
+ spa, spu = reassemble_tup (Args, args)
358
+ _ldivu_remainder! (spa, spu, N, Nr, $ WS, $ (Val (UNIT)), $ (static (1 )))
359
+ nothing
360
+ end
361
+ elseif W == 8
362
+ quote
363
+ # $(Expr(:meta, :inline))
364
+ spa, spu = reassemble_tup (Args, args)
365
+ if m == M - 1
366
+ _ldivu_remainder! (spa, spu, N, Nr, static (8 ), $ (Val (UNIT)), StaticInt (1 ))
367
+ else
368
+ if m == M - 2
369
+ _ldivu_remainder! (
370
+ spa,
371
+ spu,
372
+ N,
373
+ Nr,
374
+ static (8 ),
375
+ $ (Val (UNIT)),
376
+ StaticInt (2 )
377
+ )
378
+ else
379
+ if m == M - 3
380
+ _ldivu_remainder! (
381
+ spa,
382
+ spu,
383
+ N,
384
+ Nr,
385
+ static (8 ),
386
+ $ (Val (UNIT)),
387
+ StaticInt (3 )
388
+ )
389
+ else
390
+ if m == M - 4
391
+ _ldivu_remainder! (
392
+ spa,
393
+ spu,
394
+ N,
395
+ Nr,
396
+ static (8 ),
397
+ $ (Val (UNIT)),
398
+ StaticInt (4 )
399
+ )
400
+ else
401
+ if m == M - 5
402
+ _ldivu_remainder! (
403
+ spa,
404
+ spu,
405
+ N,
406
+ Nr,
407
+ static (8 ),
408
+ $ (Val (UNIT)),
409
+ StaticInt (5 )
410
+ )
411
+ else
412
+ if m == M - 6
413
+ _ldivu_remainder! (
414
+ spa,
415
+ spu,
416
+ N,
417
+ Nr,
418
+ static (8 ),
419
+ $ (Val (UNIT)),
420
+ StaticInt (6 )
421
+ )
422
+ else
423
+ _ldivu_remainder! (
424
+ spa,
425
+ spu,
426
+ N,
427
+ Nr,
428
+ static (8 ),
429
+ $ (Val (UNIT)),
430
+ StaticInt (7 )
431
+ )
432
+ end
433
+ end
434
+ end
435
+ end
436
+ end
437
+ end
438
+ nothing
439
+ end
440
+ else
441
+ quote
442
+ # $(Expr(:meta, :inline))
443
+ spa, spu = reassemble_tup (Args, args)
444
+ Base. Cartesian. @nif $ (W - 1 ) w -> m == M - w w ->
445
+ _ldivu_remainder! (spa, spu, N, Nr, $ WS, $ (Val (UNIT)), static (w))
446
+ nothing
447
+ end
448
+ end
449
+ end
279
450
function _ldivu_L! (
280
451
M,
281
452
N,
@@ -301,11 +472,11 @@ function _ldivu_L!(
301
472
end
302
473
end
303
474
while n < N - (WU - 1 )
304
- ldivl_solve_W_u ! (spa, spl, n, WS, UF, Val (UNIT))
475
+ ldivu_solve_W_u ! (spa, spl, n, WS, UF, Val (UNIT))
305
476
n += WU
306
477
end
307
478
while n != N
308
- ldivl_solve_W ! (spa, spl, n, WS, Val (UNIT))
479
+ ldivu_solve_W ! (spa, spl, n, WS, Val (UNIT))
309
480
n += W
310
481
end
311
482
m += W
@@ -314,7 +485,7 @@ function _ldivu_L!(
314
485
# remainder on `m`
315
486
if m < M
316
487
let tup = (spa, spl), ftup = flatten_to_tup (tup)
317
- ldivl_remainder ! (M, N, m, Nr, WS, Val (UNIT), typeof (tup), ftup... )
488
+ ldivu_remainder ! (M, N, m, Nr, WS, Val (UNIT), typeof (tup), ftup... )
318
489
end
319
490
end
320
491
nothing
0 commit comments