32
32
BITMASKP = Pointer .malloc(8 ) { |i | 1 _u8 << i }
33
33
34
34
macro unroll_setbits (bitarrp , starti , limiti , stepi )
35
- ndx: Int32 = {{starti}} & 7
35
+ ndx : Int32 = {{starti}} & 7
36
36
r0 = {{starti}} >> 3
37
37
r1 = {{starti}} + {{stepi}}
38
38
r2 = r1 + {{stepi}}
@@ -47,8 +47,8 @@ macro unroll_setbits(bitarrp, starti, limiti, stepi)
47
47
r3 = (r3 >> 3 ) - r0
48
48
r2 = (r2 >> 3 ) - r0
49
49
r1 = (r1 >> 3 ) - r0
50
- bytep: Pointer (UInt8 ) = {{bitarrp}} + r0
51
- looplmtp: Pointer (UInt8 ) = {{bitarrp}} + (({{limiti}} >> 3 ) - r7)
50
+ bytep : Pointer (UInt8 ) = {{bitarrp}} + r0
51
+ looplmtp : Pointer (UInt8 ) = {{bitarrp}} + (({{limiti}} >> 3 ) - r7)
52
52
case ((({{stepi}} & 7 ) << 3 ) | ({{starti}} & 7 )).to_u8
53
53
{% for n in (0 _u8 ..0x3F ) % }
54
54
when {{n}}
77
77
# swi = (ndx + ndx) * (ndx + 3) + 3
78
78
# r = ((swi | 63) + 1 - swi) % (ndx + ndx + 3)
79
79
# starti = if r == 0 then 0 else ndx + ndx + 3 - r
80
- STARTIS = [ 2 , 2 , 1 , 2 , 6 , 7 , 13 , 2 , 6 , 5 , 12 , 16 , 6 , 0 , 29 , 0 ,
81
- 6 , 16 , 30 , 25 , 6 , 32 , 45 , 32 , 6 , 48 , 30 , 16 , 6 , 0 , 62 ]
80
+ STARTIS = [2 , 2 , 1 , 2 , 6 , 7 , 13 , 2 , 6 , 5 , 12 , 16 , 6 , 0 , 29 , 0 ,
81
+ 6 , 16 , 30 , 25 , 6 , 32 , 45 , 32 , 6 , 48 , 30 , 16 , 6 , 0 , 62 ]
82
82
83
83
macro dense_setbits (bitarrp , starti , limiti , stepi )
84
84
dndx = {{starti}}
85
85
dndxlmt = {{starti}} | 63
86
86
while dndx <= dndxlmt # cull to an even 64-bit boundary...
87
87
{{bitarrp}}[dndx >> 3 ] |= BITMASKP [dndx & 7 ]; dndx += {{stepi}}
88
88
end
89
- wordp: Pointer (UInt64 ) = ({{bitarrp}} + ((dndx >> 3 ) & (-8 ))).as(Pointer (UInt64 ))
89
+ wordp : Pointer (UInt64 ) = ({{bitarrp}} + ((dndx >> 3 ) & (-8 ))).as(Pointer (UInt64 ))
90
90
keep = wordp
91
- wordlmtp: Pointer (UInt64 ) = ({{bitarrp}} + ((({{limiti}} >> 3 ) & (-8 )) -
91
+ wordlmtp : Pointer (UInt64 ) = ({{bitarrp}} + ((({{limiti}} >> 3 ) & (-8 )) -
92
92
(({{stepi}} << 3 ) - 8 ))).as(Pointer (UInt64 ))
93
93
dndx &= 63
94
94
case {{stepi}}.to_u8
95
95
{% for stpvi in (0 ...STARTIS .size) % } # odd primes STARTIS.size
96
96
when {{stpvi + stpvi + 3 }}.to_u8
97
97
while wordp <= wordlmtp
98
98
# for all modulo pattern 64-bit words
99
- {% for wi in (0 ... (stpvi + stpvi + 3 )) % }
99
+ {% for wi in (0 ...(stpvi + stpvi + 3 )) % }
100
100
# for all modulo pattern 64-bit words
101
- {% for bi in (((wi * 64 - 1 - STARTIS [stpvi]) / (stpvi + stpvi + 3 ) + 1 ) .. ((wi * 64 + 63 - STARTIS [stpvi]) / (stpvi + stpvi + 3 ))) % }
102
- {% if (STARTIS [stpvi] + (bi - 1 ) * (stpvi + stpvi + 3 )) < wi * 64 && (STARTIS [stpvi] + (bi + 1 ) * (stpvi + stpvi + 3 )) >= (wi + 1 ) * 64 % } # only one bit
101
+ {% for bi in (((wi * 64 - 1 - STARTIS [stpvi]) / (stpvi + stpvi + 3 ) + 1 ).. ((wi * 64 + 63 - STARTIS [stpvi]) / (stpvi + stpvi + 3 ))) % }
102
+ {% if (STARTIS [stpvi] + (bi - 1 ) * (stpvi + stpvi + 3 )) < wi * 64 && (STARTIS [stpvi] + (bi + 1 ) * (stpvi + stpvi + 3 )) >= (wi + 1 ) * 64 % } # only one bit
103
103
wordp[{{wi}}] |= {{1 _u64 << ((STARTIS [stpvi] + bi * (stpvi + stpvi + 3 )) & 63 )}}
104
104
{% elsif (STARTIS [stpvi] + (bi - 1 ) * (stpvi + stpvi + 3 )) < wi * 64 % } # first bit of many in word
105
105
v = wordp[{{wi}}] | {{1 _u64 << ((STARTIS [stpvi] + bi * (stpvi + stpvi + 3 )) & 63 )}}
106
106
{% elsif (STARTIS [stpvi] + (bi + 1 ) * (stpvi + stpvi + 3 )) >= (wi + 1 ) * 64 % } # last bit of many in word
107
107
wordp[{{wi}}] = v | {{1 _u64 << ((STARTIS [stpvi] + bi * (stpvi + stpvi + 3 )) & 63 )}}
108
108
{% else % } # not the first nor the last bit in the word
109
109
v |= {{1 _u64 << ((STARTIS [stpvi] + bi * (stpvi + stpvi + 3 )) & 63 )}}
110
- {% end % }
110
+ {% end % }
111
111
{% end % }
112
112
{% end % }
113
113
wordp += {{stpvi + stpvi + 3 }}
@@ -140,7 +140,6 @@ class PrimeSieve
140
140
bap[swi >> 3 ] |= BITMASKP [swi & 7 ]; swi += bp
141
141
end
142
142
end
143
-
144
143
in Techniques ::Stride8
145
144
(0 ..).each do |i |
146
145
swi = (i + i) * (i + 3 ) + 3 # calculate start marking index
@@ -158,7 +157,6 @@ class PrimeSieve
158
157
swi += bp
159
158
end
160
159
end
161
-
162
160
in Techniques ::Stride8Block16K
163
161
strtsp = Pointer .malloc(8 , nil .as Pointer (UInt8 ))
164
162
(0 ..).each do |i |
@@ -176,17 +174,16 @@ class PrimeSieve
176
174
mask = BITMASKP [si]; bytendxp = strtsp[si]
177
175
while bytendxp <= blockstopp
178
176
bytendxp[0 ] |= mask; bytendxp[bp] |= mask
179
- bytendxp[bp2] |= mask; bytendxp[bp3] |= mask ; bytendxp += bp4
177
+ bytendxp[bp2] |= mask; bytendxp[bp3] |= mask; bytendxp += bp4
180
178
end
181
179
while bytendxp <= blocklmtp
182
- bytendxp[0 ] |= mask ; bytendxp += bp
180
+ bytendxp[0 ] |= mask; bytendxp += bp
183
181
end
184
182
strtsp[si] = bytendxp
185
183
end
186
184
pagebytendx += CPUL1CACHE
187
185
end
188
186
end
189
-
190
187
in Techniques ::Extreme
191
188
(0 ..).each do |i |
192
189
swi = (i + i) * (i + 3 ) + 3 # calculate start marking index
@@ -208,7 +205,7 @@ class PrimeSieve
208
205
end
209
206
end
210
207
end
211
- end
208
+ end
212
209
213
210
def count_primes
214
211
if @range < 3
@@ -245,8 +242,8 @@ def bench(tec : Techniques)
245
242
end
246
243
if duration >= FORTO
247
244
prime_count = sieve.count_primes
248
- count = sieve.@range < 2 ? 0 : 1
249
- (0 .. ((sieve.@range - 3 ) >> 1 ).to_i32).each do |i |
245
+ count = sieve.@range < 2 ? 0 : 1
246
+ (0 .. ((sieve.@range - 3 ) >> 1 ).to_i32).each do |i |
250
247
count += 1 if (sieve.@bufp [i >> 3 ] & BITMASKP [i & 7 ]) == 0
251
248
end
252
249
valid = count == EXPECTED && prime_count == EXPECTED
@@ -256,21 +253,23 @@ def bench(tec : Techniques)
256
253
printf(" Invalid result!!!: " )
257
254
end
258
255
STDERR .printf(" Passes: %d Time: %f Avg: %f Limit: %d Count1: %d Count2: %d Valid: %s\n " ,
259
- passes, duration, (duration / passes),
260
- sieve.@range , count, prime_count, valid)
256
+ passes, duration, (duration / passes),
257
+ sieve.@range , count, prime_count, valid)
261
258
break
262
259
end
263
260
end
264
261
end
265
262
266
- {% if flag? :expand_macro % } # only one bit
263
+ {% if flag? :expand_macro % }
264
+ # only one bit
267
265
bap = Pointer .malloc(16384 , 0 _u8 )
268
266
bp = 3
269
267
swi = (bp * bp - 3 ) >> 3
270
268
lmti = 131071
271
269
unroll_setbits(bap, swi, lmti, bp)
272
270
dense_setbits(bap, swi, lmti, bp)
273
271
{% else % }
274
- Techniques .each do |t | bench(t) end
272
+ Techniques .each do |t |
273
+ bench(t)
274
+ end
275
275
{% end % }
276
-
0 commit comments