-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy path05-03_counting_zeros.go
89 lines (77 loc) · 2.83 KB
/
05-03_counting_zeros.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
package hd
const u = 99
var nlz_goryavsky = [...]uint8{
32, 20, 19, u, u, 18, u, 7, 10, 17, u, u, 14, u, 6, u,
u, 9, u, 16, u, u, 1, 26, u, 13, u, u, 24, 5, u, u,
u, 21, u, 8, 11, u, 15, u, u, u, u, 2, 27, 0, 25, u,
22, u, 12, u, u, 3, 28, u, 23, u, 4, 29, u, u, 30, 31,
}
// LeadingZerosUint32 (nlz) is algorithm from Robert Harley.
// It consists of 14 instructions branch-free.
// It uses Julius Goryavsky variation for smaller lookup table size.
// LeadingZerosUint32 has direct relationship of log2 as well, and can be used to compute it directly.
// Some instruction sets, such as ARM M1 chips, include single assembly instruction for this operation.
func LeadingZerosUint32(x uint32) uint8 {
x |= x >> 1 // Propagate leftmost
x |= x >> 2 // 1-bit to the right
x |= x >> 4
x |= x >> 8
x &= ^(x >> 16) // Goryavsky
x *= 0xFD70_49FF // Multiplier is 7 * 255 ** 3, Gorvsky
return nlz_goryavsky[(x >> 26)]
}
func LeadingZerosUint64(x uint64) uint8 {
var n uint8 = 32
if x > 0xFFFF_FFFF {
x >>= 32
n -= 32
}
return n + LeadingZerosUint32(uint32(x))
}
func LeadingZerosUint32BinarySearch(x uint32) uint8 {
var y uint32 = 0
var n uint8 = 32
for _, i := range [...]uint8{16, 8, 4, 2, 1} {
y = x >> i
if y != 0 {
n -= i
x = y
}
}
return n - uint8(x)
}
func LeadingZerosEqual[T Unsigned](x, y T) bool { return (x ^ y) <= (x & y) }
func LeadingZerosLess[T Unsigned](x, y T) bool { return (x & ^y) > y }
func LeadingZerosLessOrEqual[T Unsigned](x, y T) bool { return (y & ^x) <= x }
// BitSize32 returns minimum number of bits requires to represent number in two's complement signed number.
func BitSize32(x int32) uint8 { return 32 - LeadingZerosUint32((uint32(x ^ (x << 1)))) }
var ntz_reiser = [...]uint8{
32, 0, 1, 26, 2, 23, 27,
u, 3, 16, 24, 30, 28, 11, u, 13, 4,
7, 17, u, 25, 22, 31, 15, 29, 10, 12,
6, u, 21, 14, 9, 5, 20, 8, 19, 18,
}
// TrailingZerosUint32 (ntz) uses John Reiser variant of David Seal method.
// Among applications of TrailingZerosUint32 is R.W.Gosper Loop Detection Algorithm.
func TrailingZerosUint32(x uint32) uint8 { return ntz_reiser[((x & -x) % 37)] }
// LoopDetectionGosper uses R.W.Gosper algorithm to detect start index of a loop and it's period.
// loop is defined on sequence: X_n+1 = f(X_n); X_0, X_1, ..., X_μ-1, X_μ, ... X_μ+λ. [HAK #132].
func LoopDetectionGosper(f func(int) int, x0 int) (μLower, μUpper, λ int) {
var T [33]int
T[0] = x0
Xn := x0
for n := 1; ; n++ {
Xn = f(Xn)
kmax := 31 - LeadingZerosUint32(uint32(n)) // Floor (log2 n)
for k := uint8(0); k <= kmax; k++ {
if Xn == T[k] {
// Compute m = max({i | i < n and ntz(i+1) = k})
m := ((((n >> k) - 1) | 1) << k) - 1
λ = n - m
lgl := 31 - LeadingZerosUint32(uint32(λ-1)) // Ceil(log2 lambda) - 1
return m - max(1, 1<<lgl) + 1, m, λ
}
}
T[TrailingZerosUint32(uint32(n)+1)] = Xn // No match
}
}