Skip to content

Commit 454737b

Browse files
committed
add unupdated ldivu
1 parent fe2787d commit 454737b

File tree

1 file changed

+174
-3
lines changed

1 file changed

+174
-3
lines changed

src/rdivl.jl

Lines changed: 174 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,177 @@ end
276276
end
277277
nothing
278278
end
279+
@generated function _ldivu_remainder!(
280+
spa,
281+
spu,
282+
N,
283+
Nr,
284+
::StaticInt{W},
285+
::Val{UNIT},
286+
::StaticInt{r}
287+
) where {W,UNIT,r}
288+
error("not updated")
289+
r <= 0 && throw("Remainder of `<= 0` shouldn't be called, but had $r.")
290+
r >= W && throw("Reaminderof `>= $W` shouldn't be called, but had $r.")
291+
if r == 1
292+
z = static(0)
293+
sub = Base.FastMath.sub_fast
294+
mul = Base.FastMath.mul_fast
295+
div = Base.FastMath.div_fast
296+
vlxj = :(vload(spa, ($z, j)))
297+
if UNIT
298+
vlxj = :(xj = $vlxj)
299+
else
300+
vlxj = quote
301+
xj = $div($vlxj, vload(spu, (j, j)))
302+
vstore!(spa, xj, ($z, j))
303+
end
304+
end
305+
quote
306+
$(Expr(:meta, :inline))
307+
for j = 0:N-1
308+
$vlxj
309+
for i = (j+1):N-1
310+
xi = vload(spa, ($z, i))
311+
Uji = vload(spu, (j, i))
312+
vstore!(spa, $sub(xi, $mul(xj, Uji)), ($z, i))
313+
end
314+
end
315+
end
316+
else
317+
WS = static(W)
318+
quote
319+
$(Expr(:meta, :inline))
320+
n = Nr # non factor of W remainder
321+
if n > 0
322+
let t = (spa, spu),
323+
ft = flatten_to_tup(t),
324+
mask = $(getfield(_mask(WS, r), :u) % UInt32)
325+
326+
BdivU_small_kern!(n, mask, $WS, $(Val(UNIT)), typeof(t), ft...)
327+
end
328+
end
329+
# while n < N - $(W * U - 1)
330+
# ldivu_solve_W_u!(spa, spa, spu, n, $WS, $US, Val(UNIT), Val(r))
331+
# n += $(W * U)
332+
# end
333+
while n != N
334+
ldivu_solve_W!(spa, spu, n, $WS, $(Val(UNIT)), $(StaticInt(r)))
335+
n += $W
336+
end
337+
end
338+
end
339+
end
340+
@generated function ldivu_remainder!(
341+
M,
342+
N,
343+
m,
344+
Nr,
345+
::StaticInt{W},
346+
# ::Val{U},
347+
::Val{UNIT},
348+
::Type{Args},
349+
args::Vararg{Any,K}
350+
) where {W,UNIT,Args,K}
351+
error("not updated")
352+
WS = static(W)
353+
# US = static(U)
354+
if W == 2
355+
quote
356+
$(Expr(:meta, :inline))
357+
spa, spu = reassemble_tup(Args, args)
358+
_ldivu_remainder!(spa, spu, N, Nr, $WS, $(Val(UNIT)), $(static(1)))
359+
nothing
360+
end
361+
elseif W == 8
362+
quote
363+
# $(Expr(:meta, :inline))
364+
spa, spu = reassemble_tup(Args, args)
365+
if m == M - 1
366+
_ldivu_remainder!(spa, spu, N, Nr, static(8), $(Val(UNIT)), StaticInt(1))
367+
else
368+
if m == M - 2
369+
_ldivu_remainder!(
370+
spa,
371+
spu,
372+
N,
373+
Nr,
374+
static(8),
375+
$(Val(UNIT)),
376+
StaticInt(2)
377+
)
378+
else
379+
if m == M - 3
380+
_ldivu_remainder!(
381+
spa,
382+
spu,
383+
N,
384+
Nr,
385+
static(8),
386+
$(Val(UNIT)),
387+
StaticInt(3)
388+
)
389+
else
390+
if m == M - 4
391+
_ldivu_remainder!(
392+
spa,
393+
spu,
394+
N,
395+
Nr,
396+
static(8),
397+
$(Val(UNIT)),
398+
StaticInt(4)
399+
)
400+
else
401+
if m == M - 5
402+
_ldivu_remainder!(
403+
spa,
404+
spu,
405+
N,
406+
Nr,
407+
static(8),
408+
$(Val(UNIT)),
409+
StaticInt(5)
410+
)
411+
else
412+
if m == M - 6
413+
_ldivu_remainder!(
414+
spa,
415+
spu,
416+
N,
417+
Nr,
418+
static(8),
419+
$(Val(UNIT)),
420+
StaticInt(6)
421+
)
422+
else
423+
_ldivu_remainder!(
424+
spa,
425+
spu,
426+
N,
427+
Nr,
428+
static(8),
429+
$(Val(UNIT)),
430+
StaticInt(7)
431+
)
432+
end
433+
end
434+
end
435+
end
436+
end
437+
end
438+
nothing
439+
end
440+
else
441+
quote
442+
# $(Expr(:meta, :inline))
443+
spa, spu = reassemble_tup(Args, args)
444+
Base.Cartesian.@nif $(W - 1) w -> m == M - w w ->
445+
_ldivu_remainder!(spa, spu, N, Nr, $WS, $(Val(UNIT)), static(w))
446+
nothing
447+
end
448+
end
449+
end
279450
function _ldivu_L!(
280451
M,
281452
N,
@@ -301,11 +472,11 @@ function _ldivu_L!(
301472
end
302473
end
303474
while n < N - (WU - 1)
304-
ldivl_solve_W_u!(spa, spl, n, WS, UF, Val(UNIT))
475+
ldivu_solve_W_u!(spa, spl, n, WS, UF, Val(UNIT))
305476
n += WU
306477
end
307478
while n != N
308-
ldivl_solve_W!(spa, spl, n, WS, Val(UNIT))
479+
ldivu_solve_W!(spa, spl, n, WS, Val(UNIT))
309480
n += W
310481
end
311482
m += W
@@ -314,7 +485,7 @@ function _ldivu_L!(
314485
# remainder on `m`
315486
if m < M
316487
let tup = (spa, spl), ftup = flatten_to_tup(tup)
317-
ldivl_remainder!(M, N, m, Nr, WS, Val(UNIT), typeof(tup), ftup...)
488+
ldivu_remainder!(M, N, m, Nr, WS, Val(UNIT), typeof(tup), ftup...)
318489
end
319490
end
320491
nothing

0 commit comments

Comments
 (0)