diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index ef31f1203..d3fb47d54 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -6,15 +6,14 @@ on: pull_request: branches: ["**"] workflow_dispatch: - branches: ["**"] jobs: collect-solutions: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: "Checkout code" - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: "Find Dockerfiles" id: "solutions" @@ -31,10 +30,10 @@ jobs: solutions: ${{ steps.solutions.outputs.solutions }} build: - env: + env: ENABLE_DOCKER_PUSH: ${{ github.repository_owner == 'PlummersSoftwareLLC' && github.ref == 'refs/heads/drag-race' }} - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" needs: ["collect-solutions"] @@ -45,7 +44,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: "Normalize solution name" id: solution-name @@ -76,21 +75,21 @@ jobs: config_file: ./config/hadolint.yml - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 - name: Setup Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Login to DockerHub if: ${{ env.ENABLE_DOCKER_PUSH == 'true' }} - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Build amd64 if: steps.arch-amd64.outputs.build - uses: docker/build-push-action@v3 + uses: docker/build-push-action@v6 with: tags: primeimages/primes:${{ steps.solution-name.outputs.normalized }} context: ${{ matrix.solution }} @@ -98,10 +97,10 @@ jobs: push: ${{ env.ENABLE_DOCKER_PUSH }} cache-from: type=registry,ref=primeimages/primes:${{ steps.solution-name.outputs.normalized }} cache-to: type=inline - + - name: Build arm64 if: steps.arch-arm64.outputs.build - uses: docker/build-push-action@v3 + uses: docker/build-push-action@v6 with: tags: primeimages/primes:${{ steps.solution-name.outputs.normalized }} context: ${{ matrix.solution }} diff --git a/BENCHMARK.md b/BENCHMARK.md index dd79da38e..be5bfbbf2 100644 --- a/BENCHMARK.md +++ b/BENCHMARK.md @@ -22,6 +22,7 @@ Some solutions are not included in the automated benchmark runs, either because - [Running a benchmark of all solutions for a particular language](#running-a-benchmark-of-all-solutions-for-a-particular-language) - [Running in unconfined mode](#running-in-unconfined-mode) - [Output formats](#output-formats) +- [Setting the solution timeout](#setting-the-solution-timeout) ## What operating system to use? @@ -375,3 +376,13 @@ The output format can be controlled via the `FORMATTER` variable like this: make FORMATTER=json make DIRECTORY=PrimeCrystal/solution_1 FORMATTER=csv ``` + +## Setting the solution timeout + +The run of each solution is limited to a certain duration, which is 10 minutes by default. +You can change this setting through the `TIMEOUT` variable like this: + +```shell +make TIMEOUT=15 +make DIRECTORY=PrimeCPP/solution_2 TIMEOUT=15 +``` diff --git a/Makefile b/Makefile index 55fdfb371..548d27050 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ SHELL := /bin/bash DIRECTORY := $(shell pwd) FORMATTER := "table" +TIMEOUT := "10" .PHONY: all all: benchmark @@ -14,6 +15,7 @@ benchmark: check-env ARGS=("-d $${REALPATH}" "-f $(FORMATTER)"); \ [ ! -z $${OUTPUT_FILE} ] && ARGS+=( "-o $${OUTPUT_FILE}" ); \ [ ! -z $${UNCONFINED} ] && ARGS+=( "--unconfined" ); \ + [ ! -z $${TIMEOUT} ] && ARGS+=( "-t $${TIMEOUT}" ); \ cd ./tools; npm ci --silent && npm start --silent -- benchmark $${ARGS[@]} .PHONY: check-env diff --git a/PrimeBrainFuck/solution_1/Dockerfile b/PrimeBrainFuck/solution_1/Dockerfile index a95b5d9af..11b276987 100644 --- a/PrimeBrainFuck/solution_1/Dockerfile +++ b/PrimeBrainFuck/solution_1/Dockerfile @@ -1,25 +1,13 @@ # container for building -FROM ubuntu:18.04 AS build +FROM ubuntu:22.04 AS build # install tools RUN apt-get update \ - && apt-get install -y lsb-release wget software-properties-common git - -# install clang-12 for C++ standard 17 -RUN wget https://apt.llvm.org/llvm.sh \ - && chmod +x llvm.sh \ - && ./llvm.sh 12 + && apt-get install -y lsb-release wget software-properties-common git clang cmake # set clang as default compiler for C and C++ -ENV CC=/usr/bin/clang-12 \ - CXX=/usr/bin/clang++-12 - -# install latest version of cmake -SHELL ["/bin/bash", "-o", "pipefail", "-c"] -RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add - \ - && apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' \ - && apt-get update \ - && apt-get install -y cmake +ENV CC=/usr/bin/clang \ + CXX=/usr/bin/clang++ # clone custom language interpreter RUN git clone https://github.com/ThatAquarel/BrainF-ck-Interpreter \ @@ -34,10 +22,10 @@ RUN cmake -DCMAKE_BUILD_TYPE=Release .. \ # build prime sieve caller WORKDIR /opt/app/ COPY *.cpp *.b ./ -RUN clang++-12 -Ofast -std=c++17 PrimeBrainFuck.cpp -oPrimeBrainFuck +RUN clang++ -Ofast -std=c++17 PrimeBrainFuck.cpp -oPrimeBrainFuck # container for running built binaries -FROM ubuntu:18.04 +FROM ubuntu:22.04 # copy binaries from build container to current COPY --from=build /BrainF-ck-Interpreter/release/brainfuck /usr/local/bin diff --git a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp index d31567f21..fe79df276 100644 --- a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp @@ -24,46 +24,41 @@ using namespace std::chrono; const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; class BitArray { - uint32_t *array; - size_t arrSize; + uint8_t *array; + size_t logicalSize; - inline static size_t arraySize(size_t size) + static constexpr size_t arraySize(size_t size) { - return (size >> 5) + ((size & 31) > 0); + return (size >> 3) + ((size & 7) > 0); } - inline static size_t index(size_t n) + static constexpr size_t index(size_t n) { - return (n >> 5); - } - - inline static uint32_t getSubindex(size_t n, uint32_t d) - { - return d & uint32_t(uint32_t(0x01) << (n % 32)); - } - - inline void setFalseSubindex(size_t n, uint32_t &d) - { - d &= ~uint32_t(uint32_t(0x01) << (n % (8*sizeof(uint32_t)))); + return (n >> 3); } public: - explicit BitArray(size_t size) : arrSize(size) + explicit BitArray(size_t size) : logicalSize(size) { - array = new uint32_t[arraySize(size)]; - std::memset(array, 0xFF, (size >> 3) + ((size & 7) > 0)); + auto arrSize = (size + 1) / 2; // Only store bits for odd numbers + array = new uint8_t[arraySize(arrSize)]; + std::memset(array, 0x00, arraySize(arrSize)); } - ~BitArray() {delete [] array;} + ~BitArray() { delete[] array; } - bool get(size_t n) const + constexpr bool get(size_t n) const { - return getSubindex(n, array[index(n)]); + if (n % 2 == 0) + return false; // Even numbers > 2 are not prime + n = n / 2; // Map the actual number to the index in the array + return !(array[index(n)] & (uint8_t(1) << (n % 8))); } - static constexpr uint32_t rol(uint32_t x, uint32_t n) + void set(size_t n) { - return (x<>(32-n)); + n = n / 2; // Map the actual number to the index in the array + array[index(n)] |= (uint8_t(1) << (n % 8)); } static constexpr uint32_t buildSkipMask(size_t skip, size_t offset) @@ -75,6 +70,15 @@ class BitArray { return ~mask; } + uint32_t rol(uint32_t value, size_t bits) + { + bits %= 32; + if (bits == 0) + return value; + // Ensure that the number of bits to rotate is within 0-31 + return (value << bits) | (value >> (32 - bits)); + } + void setFlagsFalse(size_t n, size_t skip) { if (skip <= 12) { @@ -83,7 +87,7 @@ class BitArray { size_t bit_pos = n % 32; size_t curr_n = n; - while (curr_n < arrSize) + while (curr_n < size()) { // Build mask for current word starting at bit_pos uint32_t mask = buildSkipMask(skip, bit_pos); @@ -95,7 +99,7 @@ class BitArray { size_t bits_remaining = 32 - bit_pos; curr_n += ((bits_remaining + skip - 1) / skip) * skip; - if (curr_n >= arrSize) break; + if (curr_n >= size()) break; word_idx = index(curr_n); bit_pos = curr_n % 32; @@ -106,7 +110,7 @@ class BitArray { // Original implementation for larger skips auto rolling_mask = ~uint32_t(1 << (n % 32)); auto roll_bits = skip % 32; - while (n < arrSize) { + while (n < size()) { array[index(n)] &= rolling_mask; n += skip; rolling_mask = rol(rolling_mask, roll_bits); @@ -116,25 +120,25 @@ class BitArray { inline size_t size() const { - return arrSize; + return logicalSize; } }; // prime_sieve // -// Represents the data comprising the sieve (an array of N bits, where N is the upper limit prime being tested) -// as well as the code needed to eliminate non-primes from its array, which you perform by calling runSieve. +// Represents the data comprising the sieve (an array of bits representing odd numbers starting from 3) +// and includes the code needed to eliminate non-primes from its array by calling runSieve. class prime_sieve { private: - BitArray Bits; // Sieve data, where 1==prime, 0==not + BitArray Bits; // Sieve data, where 0==prime, 1==not public: - prime_sieve(uint64_t n) : Bits(n) // Initialize all to true (potential primes) + prime_sieve(uint64_t n) : Bits(n) // Initialize bits to zero default { } @@ -154,15 +158,21 @@ class prime_sieve while (factor <= q) { - for (uint64_t num = factor; num < Bits.size(); num += 2) + // Find the next prime number + for (; factor <= q; factor += 2) { - if (Bits.get(num)) + if (Bits.get(factor)) { - factor = num; break; } } - Bits.setFlagsFalse(factor * factor, factor + factor); + + // Mark multiples of the prime number as not prime + uint64_t start = factor * factor; + for (uint64_t num = start; num <= Bits.size(); num += factor * 2) + { + Bits.set(num); + } factor += 2; } @@ -174,9 +184,9 @@ class prime_sieve size_t countPrimes() const { - size_t count = (Bits.size() >= 2); // Count 2 as prime if within range - for (int i = 3; i < Bits.size(); i+=2) - if (Bits.get(i)) + size_t count = (Bits.size() >= 2); // Count 2 as prime if within range + for (uint64_t num = 3; num <= Bits.size(); num += 2) + if (Bits.get(num)) count++; return count; } @@ -187,23 +197,24 @@ class prime_sieve bool isPrime(uint64_t n) const { - if (n & 1) - return Bits.get(n); - else + if (n == 2) + return true; + if (n < 2 || n % 2 == 0) return false; + return Bits.get(n); } // validateResults // - // Checks to see if the number of primes found matches what we should expect. This data isn't used in the + // Checks to see if the number of primes found matches what we should expect. This data isn't used in the // sieve processing at all, only to sanity check that the results are right when done. bool validateResults() const { const std::map resultsDictionary = { - { 10LLU, 4 }, // Historical data for validating our results - the number of primes - { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 + { 10LLU, 4 }, // Historical data for validating our results - the number of primes + { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 { 1'000LLU, 168 }, { 10'000LLU, 1229 }, { 100'000LLU, 9592 }, @@ -227,8 +238,8 @@ class prime_sieve if (showResults) cout << "2, "; - size_t count = (Bits.size() >= 2); // Count 2 as prime if in range - for (uint64_t num = 3; num <= Bits.size(); num+=2) + size_t count = (Bits.size() >= 2); // Count 2 as prime if in range + for (uint64_t num = 3; num <= Bits.size(); num += 2) { if (Bits.get(num)) { @@ -247,7 +258,7 @@ class prime_sieve << "Average: " << duration/passes << ", " << "Limit: " << Bits.size() << ", " << "Counts: " << count << "/" << countPrimes() << ", " - << "Valid : " << (validateResults() ? "Pass" : "FAIL!") + << "Valid: " << (validateResults() ? "Pass" : "FAIL!") << "\n"; // Following 2 lines added by rbergen to conform to drag race output format @@ -354,7 +365,7 @@ int main(int argc, char **argv) } if (bOneshot) - cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; + cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; if (bOneshot && (cSecondsRequested > 0 || cThreadsRequested > 1)) { @@ -389,8 +400,8 @@ int main(int argc, char **argv) else { auto tStart = steady_clock::now(); - std::thread threads[cThreads]; - uint64_t l_passes[cThreads]; + std::vector threads(cThreads); + std::vector l_passes(cThreads); for (unsigned int i = 0; i < cThreads; i++) threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit) { diff --git a/PrimeCPP/solution_2/primes_par.exe b/PrimeCPP/solution_2/primes_par.exe new file mode 100755 index 000000000..c815cbfcf Binary files /dev/null and b/PrimeCPP/solution_2/primes_par.exe differ diff --git a/PrimeCrystal/solution_1/Dockerfile b/PrimeCrystal/solution_1/Dockerfile index 327d22a3e..1c87631fb 100644 --- a/PrimeCrystal/solution_1/Dockerfile +++ b/PrimeCrystal/solution_1/Dockerfile @@ -1,11 +1,11 @@ -FROM crystallang/crystal:1.0.0-alpine AS build +FROM crystallang/crystal:1.14-alpine AS build WORKDIR /opt/app COPY primes.cr . RUN crystal build primes.cr --release --static --no-debug -FROM alpine:3.13 +FROM alpine:3 COPY --from=build /opt/app/primes /usr/local/bin/ -ENTRYPOINT [ "primes" ] \ No newline at end of file +ENTRYPOINT [ "primes" ] diff --git a/PrimeCrystal/solution_2/Dockerfile b/PrimeCrystal/solution_2/Dockerfile index f9c320870..3e6f87fe1 100644 --- a/PrimeCrystal/solution_2/Dockerfile +++ b/PrimeCrystal/solution_2/Dockerfile @@ -1,6 +1,6 @@ FROM debian:11 -ENV CRYSTAL_VER="1.1" +ENV CRYSTAL_VER="1.14" WORKDIR /opt diff --git a/PrimeCrystal/solution_2/primes.cr b/PrimeCrystal/solution_2/primes.cr index 27c79f59d..894610590 100644 --- a/PrimeCrystal/solution_2/primes.cr +++ b/PrimeCrystal/solution_2/primes.cr @@ -32,7 +32,7 @@ end BITMASKP = Pointer.malloc(8) { |i| 1_u8 << i } macro unroll_setbits(bitarrp, starti, limiti, stepi) - ndx: Int32 = {{starti}} & 7 + ndx : Int32 = {{starti}} & 7 r0 = {{starti}} >> 3 r1 = {{starti}} + {{stepi}} r2 = r1 + {{stepi}} @@ -47,8 +47,8 @@ macro unroll_setbits(bitarrp, starti, limiti, stepi) r3 = (r3 >> 3) - r0 r2 = (r2 >> 3) - r0 r1 = (r1 >> 3) - r0 - bytep: Pointer(UInt8) = {{bitarrp}} + r0 - looplmtp: Pointer(UInt8) = {{bitarrp}} + (({{limiti}} >> 3) - r7) + bytep : Pointer(UInt8) = {{bitarrp}} + r0 + looplmtp : Pointer(UInt8) = {{bitarrp}} + (({{limiti}} >> 3) - r7) case ((({{stepi}} & 7) << 3) | ({{starti}} & 7)).to_u8 {% for n in (0_u8..0x3F) %} when {{n}} @@ -77,8 +77,8 @@ end # swi = (ndx + ndx) * (ndx + 3) + 3 # r = ((swi | 63) + 1 - swi) % (ndx + ndx + 3) # starti = if r == 0 then 0 else ndx + ndx + 3 - r -STARTIS = [ 2, 2, 1, 2, 6, 7, 13, 2, 6, 5, 12, 16, 6, 0, 29, 0, - 6, 16, 30, 25, 6, 32, 45, 32, 6, 48, 30, 16, 6, 0, 62 ] +STARTIS = [2, 2, 1, 2, 6, 7, 13, 2, 6, 5, 12, 16, 6, 0, 29, 0, + 6, 16, 30, 25, 6, 32, 45, 32, 6, 48, 30, 16, 6, 0, 62] macro dense_setbits(bitarrp, starti, limiti, stepi) dndx = {{starti}} @@ -86,9 +86,9 @@ macro dense_setbits(bitarrp, starti, limiti, stepi) while dndx <= dndxlmt # cull to an even 64-bit boundary... {{bitarrp}}[dndx >> 3] |= BITMASKP[dndx & 7]; dndx += {{stepi}} end - wordp: Pointer(UInt64) = ({{bitarrp}} + ((dndx >> 3) & (-8))).as(Pointer(UInt64)) + wordp : Pointer(UInt64) = ({{bitarrp}} + ((dndx >> 3) & (-8))).as(Pointer(UInt64)) keep = wordp - wordlmtp: Pointer(UInt64) = ({{bitarrp}} + ((({{limiti}} >> 3) & (-8)) - + wordlmtp : Pointer(UInt64) = ({{bitarrp}} + ((({{limiti}} >> 3) & (-8)) - (({{stepi}} << 3) - 8))).as(Pointer(UInt64)) dndx &= 63 case {{stepi}}.to_u8 @@ -96,10 +96,10 @@ macro dense_setbits(bitarrp, starti, limiti, stepi) when {{stpvi + stpvi + 3}}.to_u8 while wordp <= wordlmtp # for all modulo pattern 64-bit words - {% for wi in (0 ... (stpvi + stpvi + 3)) %} + {% for wi in (0...(stpvi + stpvi + 3)) %} # for all modulo pattern 64-bit words - {% for bi in (((wi * 64 - 1 - STARTIS[stpvi]) / (stpvi + stpvi + 3) + 1) .. ((wi * 64 + 63 - STARTIS[stpvi]) / (stpvi + stpvi + 3))) %} - {% if (STARTIS[stpvi] + (bi - 1) * (stpvi + stpvi + 3)) < wi * 64 && (STARTIS[stpvi] + (bi + 1) * (stpvi + stpvi + 3)) >= (wi + 1) * 64 %} # only one bit + {% for bi in (((wi * 64 - 1 - STARTIS[stpvi]) / (stpvi + stpvi + 3) + 1)..((wi * 64 + 63 - STARTIS[stpvi]) / (stpvi + stpvi + 3))) %} + {% if (STARTIS[stpvi] + (bi - 1) * (stpvi + stpvi + 3)) < wi * 64 && (STARTIS[stpvi] + (bi + 1) * (stpvi + stpvi + 3)) >= (wi + 1) * 64 %} # only one bit wordp[{{wi}}] |= {{1_u64 << ((STARTIS[stpvi] + bi * (stpvi + stpvi + 3)) & 63)}} {% elsif (STARTIS[stpvi] + (bi - 1) * (stpvi + stpvi + 3)) < wi * 64 %} # first bit of many in word v = wordp[{{wi}}] | {{1_u64 << ((STARTIS[stpvi] + bi * (stpvi + stpvi + 3)) & 63)}} @@ -107,7 +107,7 @@ macro dense_setbits(bitarrp, starti, limiti, stepi) wordp[{{wi}}] = v | {{1_u64 << ((STARTIS[stpvi] + bi * (stpvi + stpvi + 3)) & 63)}} {% else %} # not the first nor the last bit in the word v |= {{1_u64 << ((STARTIS[stpvi] + bi * (stpvi + stpvi + 3)) & 63)}} - {% end %} + {% end %} {% end %} {% end %} wordp += {{stpvi + stpvi + 3}} @@ -140,7 +140,6 @@ class PrimeSieve bap[swi >> 3] |= BITMASKP[swi & 7]; swi += bp end end - in Techniques::Stride8 (0..).each do |i| swi = (i + i) * (i + 3) + 3 # calculate start marking index @@ -158,7 +157,6 @@ class PrimeSieve swi += bp end end - in Techniques::Stride8Block16K strtsp = Pointer.malloc(8, nil.as Pointer(UInt8)) (0..).each do |i| @@ -176,17 +174,16 @@ class PrimeSieve mask = BITMASKP[si]; bytendxp = strtsp[si] while bytendxp <= blockstopp bytendxp[0] |= mask; bytendxp[bp] |= mask - bytendxp[bp2] |= mask; bytendxp[bp3] |= mask ; bytendxp += bp4 + bytendxp[bp2] |= mask; bytendxp[bp3] |= mask; bytendxp += bp4 end while bytendxp <= blocklmtp - bytendxp[0] |= mask ; bytendxp += bp + bytendxp[0] |= mask; bytendxp += bp end strtsp[si] = bytendxp end pagebytendx += CPUL1CACHE end end - in Techniques::Extreme (0..).each do |i| swi = (i + i) * (i + 3) + 3 # calculate start marking index @@ -208,7 +205,7 @@ class PrimeSieve end end end - end + end def count_primes if @range < 3 @@ -245,8 +242,8 @@ def bench(tec : Techniques) end if duration >= FORTO prime_count = sieve.count_primes - count = sieve.@range < 2 ? 0 : 1 - (0 .. ((sieve.@range - 3) >> 1).to_i32).each do |i| + count = sieve.@range < 2 ? 0 : 1 + (0..((sieve.@range - 3) >> 1).to_i32).each do |i| count += 1 if (sieve.@bufp[i >> 3] & BITMASKP[i & 7]) == 0 end valid = count == EXPECTED && prime_count == EXPECTED @@ -256,14 +253,15 @@ def bench(tec : Techniques) printf("Invalid result!!!: ") end STDERR.printf("Passes: %d Time: %f Avg: %f Limit: %d Count1: %d Count2: %d Valid: %s\n", - passes, duration, (duration / passes), - sieve.@range, count, prime_count, valid) + passes, duration, (duration / passes), + sieve.@range, count, prime_count, valid) break end end end -{% if flag? :expand_macro %} # only one bit +{% if flag? :expand_macro %} + # only one bit bap = Pointer.malloc(16384, 0_u8) bp = 3 swi = (bp * bp - 3) >> 3 @@ -271,6 +269,7 @@ end unroll_setbits(bap, swi, lmti, bp) dense_setbits(bap, swi, lmti, bp) {% else %} - Techniques.each do |t| bench(t) end + Techniques.each do |t| + bench(t) + end {% end %} - diff --git a/PrimeJulia/solution_1/Dockerfile b/PrimeJulia/solution_1/Dockerfile index decf14cdf..982f83436 100644 --- a/PrimeJulia/solution_1/Dockerfile +++ b/PrimeJulia/solution_1/Dockerfile @@ -1,4 +1,4 @@ -FROM julia:1.6-alpine3.13 +FROM julia:1-alpine WORKDIR /opt/app diff --git a/PrimeJulia/solution_2/Dockerfile b/PrimeJulia/solution_2/Dockerfile index fbbfbaf6c..e73d5c67d 100644 --- a/PrimeJulia/solution_2/Dockerfile +++ b/PrimeJulia/solution_2/Dockerfile @@ -1,4 +1,4 @@ -FROM julia:1.6.1-alpine3.13 +FROM julia:1-alpine WORKDIR /opt/app diff --git a/PrimeJulia/solution_3/Dockerfile b/PrimeJulia/solution_3/Dockerfile index 1578aafbc..a1cd012cd 100644 --- a/PrimeJulia/solution_3/Dockerfile +++ b/PrimeJulia/solution_3/Dockerfile @@ -1,4 +1,4 @@ -FROM julia:1.6-buster +FROM julia:1 WORKDIR /opt/app diff --git a/PrimeJulia/solution_3/README.md b/PrimeJulia/solution_3/README.md index aa8f9e71d..8c4e3b935 100644 --- a/PrimeJulia/solution_3/README.md +++ b/PrimeJulia/solution_3/README.md @@ -10,7 +10,7 @@ optimizations. This is a sort-of "low-level" style implementation in Julia to get as much as speed as possible out of the language. It is *not* designed to be idiomatic Julia code. -This solution requires at least **Julia 1.5** to run. Julia 1.6 is +This solution requires at least **Julia 1.5** to run. the latest stable 1.X Julia version is recommended and is used in the Docker image. ## Description @@ -40,7 +40,7 @@ and bits are unset when the number is *prime*. This simplifies the set_bit operation slightly (`arr[i] |= mask vs. arr[i] &= ~mask`). If you see any room for improvement in the code or have any -suggestions, don't hesitate to open an issue, pull request (PR), +suggestions, don't hesitate to open an issue, pull request (PR), Discussion, or the like. Don't forget to tag me at `@louie-github` so I can be notified if my personal input is either wanted or needed. I'm open to fixing stylistic issues or discussing cosmetic changes to diff --git a/PrimeJulia/solution_4/Dockerfile b/PrimeJulia/solution_4/Dockerfile index 8c1f2fc9b..fde5f0052 100644 --- a/PrimeJulia/solution_4/Dockerfile +++ b/PrimeJulia/solution_4/Dockerfile @@ -1,4 +1,4 @@ -FROM julia:1.6-buster +FROM julia:1 WORKDIR /opt/app diff --git a/PrimeMACRO11/solution_1/README.md b/PrimeMACRO11/solution_1/README.md new file mode 100644 index 000000000..c2955949f --- /dev/null +++ b/PrimeMACRO11/solution_1/README.md @@ -0,0 +1,37 @@ +# MACRO-11 solution by davepl + +![Algorithm](https://img.shields.io/badge/Algorithm-base-green) +![Faithfulness](https://img.shields.io/badge/Faithful-no-yellowgreen) +![Parallelism](https://img.shields.io/badge/Parallel-no-green) +![Bit count](https://img.shields.io/badge/Bits-1-green) +![Bit count](https://img.shields.io/badge/Bits-8-yellowgreen) +![Deviation](https://img.shields.io/badge/Deviation-sievesize-blue) + +## Description + +This solution provides two implementations in [MACRO-11](https://en.wikipedia.org/wiki/MACRO-11), that being the macro assembly language for the [DEC PDP-11](https://en.wikipedia.org/wiki/PDP-11) range of computers. + +## Implementations and sieve sizes + +This solution includes two implementations: + +- One ([SIEVE.ASM](SIEVE.ASM)) uses one byte per prime candidate. Due to applicable memory constraints, the sieve size for this implementation is 1,000. +- The other ([SIEVE2.ASM](SIEVE2.ASM)) uses one bit per prime candidate. This implementation's sieve size is 10,000. + +## Run instructions + +This solution's implementations can be assembled and executed on an actual PDP-11 computer or a sufficiently complete emulator, provided it's running an operating system that has MACRO-11 installed. A list of available emulators can be found in the [Emulators section on the PDP-11 Wikipedia page](https://en.wikipedia.org/wiki/PDP-11#Emulators). + +Use the following commands to edit, assemble and run an implementation on the solution. These specific commands apply to a PDP-11 running RT-11 and build and run SIEVE2.ASM which is located on the D1 device; modify the commands to match your specific situation where appropriate: + +```text +macro d1:sieve2.asm +link sieve2 +run sieve2 +``` + +## Results + +This is an image showing the results of an execution of SIEVE2.ASM on an actual PDP-11/34. The "ticks" that are reported are time units equal to 1/60th of a second on PDP-11s that are connected to a 60Hz power grid. On machines that are powered from a 50Hz grid, a tick _may_ be 1/50th of a second instead, but that is not always the case. + +![SIEVE2.ASM results](sieve2_result.jpg) diff --git a/PrimeMACRO11/solution_1/SIEVE.ASM b/PrimeMACRO11/solution_1/SIEVE.ASM new file mode 100644 index 000000000..ff55fc178 --- /dev/null +++ b/PrimeMACRO11/solution_1/SIEVE.ASM @@ -0,0 +1,165 @@ +;--------------------------------------------------------------------- + .TITLE SIEVE ; Program title +;--------------------------------------------------------------------- + + .MCALL .PRINT,.EXIT,.TTYOUT ; System macros + +;-- String Table ----------------------------------------------------- + +HELLOMSG: .ASCIZ /Sieve of Eratosthenes by Davepl 2024/ +DASHESMSG: .ASCIZ /----------------------------------/ +CBITSMSG: .ASCIZ /Clearing byte array. Sieve Size:/ +MSG2: .ASCIZ /Setup complete/ +RUNMSG: .ASCIZ /Running sieve/ +DONEMSG: .ASCIZ /Sieve complete/ +PRIMMSG: .ASCIZ /Prime numbers found:/ +MARKCMPMSG: .ASCIZ /Marking composite: / +CHECKCMPMSG: .ASCIZ /Checking if composite: / +EMPTYMSG: .ASCIZ // + +;-- Constants --------------------------------------------------------- + +LIMIT = 1000. ; Upper limit for primes +BSIZE = 1000. ; Size of byte array (fixed value) + + .EVEN ; Ensure we're on a word boundary + +;-- Code Entry -------------------------------------------------------- + +START: .PRINT #HELLOMSG + .PRINT #DASHESMSG + .PRINT #CBITSMSG + MOV #LIMIT, R0 + JSR PC, PRNUM + .PRINT #EMPTYMSG + +;-- Clear the byte array ----------------------------------------------- + + MOV #BSIZE, R1 + CLR R2 +INITL: + CLRB BYTEARR(R2) + INC R2 + CMP R2, R1 + BNE INITL + + .PRINT #MSG2 + .PRINT #RUNMSG + +;-- Run the sieve ------------------------------------------------------ + + MOV #3, R1 ; Start with 3 (first odd prime) +SIEVE: + CMP R1, #LIMIT ; Check if we've reached the limit + BGE DONE.SV ; If so, we're done + +; Debug Output +; .PRINT #CHECKCMPMSG ; Print the number we're checking next +; MOV R1, R0 +; JSR PC, PRNUM +; .PRINT #EMPTYMSG + + JSR PC, ISCOMP ; Check if R1 is composite + BNE NXTODD ; If prime, skip to next odd number + + MOV R1, R2 ; R2 = R1 (prime number found) +MARK: + ADD R1, R2 ; R2 += R1 (next multiple) + CMP R2, #LIMIT ; Check if we've exceeded the limit + BGE NXTODD ; If so, move to next odd number + +; Debug Output +; .PRINT #MARKCMPMSG ; Print the multiple we're marking next +; MOV R2, R0 +; JSR PC, PRNUM +; .PRINT #EMPTYMSG + + JSR PC, SETCMP ; Mark R2 as composite + BR MARK ; Continue marking multiples + +NXTODD: + ADD #2, R1 ; Move to next odd number + BR SIEVE ; Continue sieving + +DONE.SV: + .PRINT #DONEMSG + .PRINT #PRIMMSG + +;-- Print prime numbers ------------------------------------------------ + + MOV #1, R3 ; Prime count in R3 + + MOV #2, R0 ; Start with 2 (only even prime) + JSR PC, PRNUM + MOV #',, R0 + .TTYOUT R0 + + MOV #3, R1 ; Start checking odd numbers from 3 +PRNLP: + CMP R1, #LIMIT ; Check if we've reached the limit + BGE DONE ; If so, we're done + + JSR PC, ISCOMP ; Check if R1 is composite + BNE PRNXT ; If prime, skip to next odd number + + MOV R1, R0 + JSR PC, PRNUM ; Print the prime number + INC R3 ; Bump the count + MOV #',, R0 + .TTYOUT R0 +PRNXT: + ADD #2, R1 ; Move to next odd number + BR PRNLP ; Continue printing primes + +DONE: .PRINT EMPTYMSG + MOV R3, R0 ; Print count of primes + JSR PC, PRNUM + + .EXIT ; Exit program + +;-- Subroutines -------------------------------------------------------- + +ISCOMP: + MOV R1, R0 + MOVB BYTEARR(R0), R2 ; Load byte from byte array + TSTB R2 ; Check if byte is non-zero + BEQ ISPRIME ; If byte is zero, number is prime + MOV #1, R0 ; Return 1 if composite + RTS PC +ISPRIME: + CLR R0 ; Return 0 if prime + RTS PC + +SETCMP: + MOV R2, R0 + MOVB #1, BYTEARR(R0) ; Set the byte in the array + RTS PC + +PRNUM: +BTOA: + MOV R0, -(SP) ; Save R0 on stack + MOV R1, -(SP) ; Save R1 on stack + MOV R2, -(SP) ; Save R2 on stack + + MOV R0, R1 ; Move number to R1 (low part of dividend) + CLR R0 ; Clear R0 (high part of dividend) + DIV #10., R0 ; Divide R0:R1 by 10, quotient in R0, remainder in R1 + + TST R0 ; Check if quotient is 0 + BEQ PRINT ; If quotient is 0, print digit + JSR PC, BTOA ; Recursive call with quotient + +PRINT: ADD #'0, R1 ; Convert remainder to ASCII + MOV R1, R0 ; Move ASCII digit to R0 + .TTYOUT R0 ; Print the digit + + MOV (SP)+, R2 ; Restore R2 + MOV (SP)+, R1 ; Restore R1 + MOV (SP)+, R0 ; Restore R0 + RTS PC ; Return + + +BYTEARR: .BLKB BSIZE ; Byte array for sieve + + .END START + diff --git a/PrimeMACRO11/solution_1/SIEVE2.ASM b/PrimeMACRO11/solution_1/SIEVE2.ASM new file mode 100644 index 000000000..9854abfc2 --- /dev/null +++ b/PrimeMACRO11/solution_1/SIEVE2.ASM @@ -0,0 +1,259 @@ +;----------------------------------------------------------------------- +; SIEVE2.ASM - Sieve of Eratosthenes in PDP-11 assembly language +; by Dave Plummer 2024 +;----------------------------------------------------------------------- +; +; This variant uses a single bit per number to mark composites, rather +; than a byte per number. This reduces memory usage by a factor of 8 +; so we can sieve larger ranges, but requires more complex bit work. + + .TITLE SIEVE ; Program title + .MCALL .PRINT,.EXIT,.TTYOUT, .GTIM ; System macros + .GLOBL $DIVTK,$DIV60 ; Global symbols + + +HELLOMSG: .ASCIZ /Sieve of Eratosthenes by Davepl 2024/ +DASHESMSG: .ASCIZ /------------------------------------/ +CBITSMSG: .ASCII /Clearing byte array. Sieve Size: /<200> +RUNMSG: .ASCIZ /Running sieve/ +DONEMSG: .ASCIZ /Sieve complete/ +PRIMMSG: .ASCII /Prime numbers found: /<200> +TIMRMSG: .ASCII /Ticks Elapsed: /<200> +MARKCMPMSG: .ASCIZ /Marking composite: / +CHECKCMPMSG: .ASCIZ /Checking if composite: / +TIMESTMP: .ASCII /Timestamp: /<200> +NEWLINE: .ASCIZ // + +LIMIT = 10000. ; Upper limit for primes +BSIZE = 625. ; Size of byte array (fixed value) + +; Ensure we're on a word boundary in case the strings above are odd length + + .EVEN + +START: .PRINT #HELLOMSG ; Welcome banner + .PRINT #DASHESMSG + .PRINT #CBITSMSG ; Display sieve size + MOV #LIMIT, R0 + JSR PC, PRNUM + .PRINT #NEWLINE + +;-- Clear the byte array ----------------------------------------------- + +; .PRINT #TIMESTMP ; Display timestamp + JSR PC, STRTTMR +; JSR PC, PRNUM +; .PRINT #NEWLINE + + MOV #BSIZE, R1 + CLR R2 +INITL: + CLRB BITARR(R2) ; Clear the byte array + INC R2 + CMP R2, R1 + BNE INITL + + .PRINT #RUNMSG ; Display progress message "Running sieve"; + +;-- Run the sieve ------------------------------------------------------ + + MOV #3, R1 ; Start with 3 (first odd prime) +SIEVE: + CMP R1, #LIMIT ; Check if we've reached the limit + BGE DONESV ; If so, we're done + +; Debug - print the number we're checking +; .PRINT #CHECKCMPMSG +; MOV R1, R0 +; JSR PC, PRNUM +; .PRINT #NEWLINE + + JSR PC, ISCOMP ; Check if R1 is composite + BNE NXTODD ; If prime, skip to next odd number + + MOV R1, R2 ; R2 = R1 (prime number found) +MARK: + ADD R1, R2 ; R2 += R1 (next multiple) + CMP R2, #LIMIT ; Check if we've exceeded the limit + BGE NXTODD ; If so, move to next odd number + +; Debug - print the number we're marking +; .PRINT #MARKCMPMSG +; MOV R2, R0 +; JSR PC, PRNUM +; .PRINT #NEWLINE + + JSR PC, SETCMP ; Mark R2 as composite + BR MARK ; Continue marking multiples + +NXTODD: + ADD #2, R1 ; Move to next odd number + BR SIEVE ; Continue sieving + +DONESV: +; .PRINT #TIMESTMP ; Display timestamp + JSR PC, STOPTMR ; Stop the timer +; JSR PC, PRNUM +; .PRINT #NEWLINE + + .PRINT #DONEMSG + .PRINT #PRIMMSG + +;-- Print prime numbers ------------------------------------------------ + +LISTPRM: + MOV #1, R3 ; Prime count in R3 + +; Debug - Print list of primes +; MOV #2, R0 ; Start with 2 (only even prime) +; JSR PC, PRNUM +; MOV #',, R0 +; .TTYOUT R0 + + MOV #3, R1 ; Start checking odd numbers from 3 +PRNLP: + CMP R1, #LIMIT ; Check if we've reached the limit + BGE DONE ; If so, we're done + + JSR PC, ISCOMP ; Check if R1 is composite + BNE PRNXT ; If bit set, composite, so skip to next odd number + + INC R3 ; Bump the count + +PRNXT: + ADD #2, R1 ; Move to next odd number + BR PRNLP ; Continue printing primes + +DONE: + MOV R3, R0 ; Print count of primes + JSR PC, PRNUM + .PRINT NEWLINE + + .PRINT #TIMRMSG + JSR PC, ELAPSED + JSR PC, PRNUM ; Print elapsed time + + .EXIT ; Exit program + +;-- Subroutines -------------------------------------------------------- + +ISCOMP: + ; Check if the number is even + BIT #1, R1 ; Check the low bit of R1 + BEQ ISNOTPRM ; If even, it's not prime (composite) + + ; Adjust index for odd numbers + MOV R1, R4 ; Copy bit number to R4 + SUB #1, R4 ; Subtract 1 to handle only odd numbers + ASR R4 ; Divide by 2 to get the index for bit array + ASR R4 ; Divide by 8 to get byte offset + ASR R4 + ASR R4 + ADD #BITARR, R4 ; Add base address of BYTEARR + MOV R4, R0 ; Store byte address in R0 + + MOV R1, R5 ; Copy bit number to R5 + SUB #1, R5 ; Subtract 1 to handle only odd numbers + ASR R5 ; Divide by 2 to get bit position + BIC #^B1111111111111000, R5 ; Keep only lower 3 bits (bit position within byte) + MOV #1., R2 ; Prepare a single bit + ASH R5, R2 ; Shift to create bit mask + + BITB R2, (R0) ; Test the bit position in the byte + RTS PC ; Return with condition codes set + +ISNOTPRM: + MOV #1., R0 ; Return 1 (composite) for even numbers + RTS PC ; Return + +SETCMP: + MOV R0, -(SP) ; Preserve R0 + MOV R1, -(SP) ; Preserve R1 + + ; Check if the number is even + BIT #1, R2 ; Check the low bit of R2 + BEQ SETRET ; If even, just return + + ; Adjust index for odd numbers + MOV R2, R4 ; Copy bit number to R4 + SUB #1, R4 ; Subtract 1 to handle only odd numbers + ASR R4 ; Divide by 2 to get the index for bit array + ASR R4 ; Divide by 8 to get byte offset + ASR R4 + ASR R4 + ADD #BITARR, R4 ; Add base address of BYTEARR + MOV R4, R0 ; Store byte address in R0 + + MOV R2, R5 ; Copy bit number to R5 + SUB #1, R5 ; Subtract 1 to handle only odd numbers + ASR R5 ; Divide by 2 to get bit position + BIC #^B1111111111111000, R5 ; Keep only lower 3 bits (bit position within byte) + MOV #1., R1 ; Prepare a single bit + ASH R5, R1 ; Shift to create bit mask + + BISB R1, (R0) ; Set the bit (byte operation) + +SETRET: + MOV (SP)+, R1 ; Restore R1 + MOV (SP)+, R0 ; Restore R0 + RTS PC ; Return + +;----------------------------------------------------------------------- +; PRNUM - Print a number in R1 as ASCII +; All registers preserved +;----------------------------------------------------------------------- + +PRNUM: + MOV R0, -(SP) ; Save R0 on stack + MOV R1, -(SP) ; Save R1 on stack + + MOV R0, R1 ; Move number to R1 (low part of dividend) + CLR R0 ; Clear R0 (high part of dividend) + DIV #10., R0 ; Divide R0:R1 by 10, quotient in R0, remainder in R1 + + TST R0 ; Check if quotient is 0 + BEQ PRINT ; If quotient is 0, print digit + JSR PC, PRNUM ; Recursive like I were back in school + +PRINT: ADD #'0, R1 ; Convert remainder to ASCII + MOV R1, R0 ; Move ASCII digit to R0 + .TTYOUT R0 ; Print the digit + + MOV (SP)+, R1 ; Restore R1 + MOV (SP)+, R0 ; Restore R0 + RTS PC ; Return + + .EVEN + +;----------------------------------------------------------------------- +; Timer routines - Used to measure how long the sieve took to execute +;----------------------------------------------------------------------- + +; Global variables to store start and stop times +AREA1: .BLKW 2 +TICKS: .BLKW 2 +TICKS1: .BLKW 2 +TICKS2: .BLKW 2 + +STRTTMR: MOV #TICKS, R1 + .GTIM #AREA1, R1 + MOV TICKS, TICKS1 ; Store low order time in TICKS1 + MOV TICKS+2, TICKS1+2 ; Store low order time in TICKS1 + MOV TICKS+2, R0 + RTS PC + +STOPTMR: MOV #TICKS, R1 + .GTIM #AREA1, R1 + MOV TICKS, TICKS2 ; Store low order time in TICKS2 + MOV TICKS+2, TICKS2+2 ; Store low order time in TICKS2 + MOV TICKS+2, R0 + RTS PC + +ELAPSED: MOV TICKS2+2, R0 + SUB TICKS1+2, R0 + RTS PC + +; Bit array data for the sieve +BITARR: .BLKB BSIZE ; Byte array for sieve + +.END START diff --git a/PrimeMACRO11/solution_1/sieve2_result.jpg b/PrimeMACRO11/solution_1/sieve2_result.jpg new file mode 100755 index 000000000..0d0c3723c Binary files /dev/null and b/PrimeMACRO11/solution_1/sieve2_result.jpg differ diff --git a/PrimeProlog/solution_1/Dockerfile b/PrimeProlog/solution_1/Dockerfile index bade1e5b4..2216fec95 100644 --- a/PrimeProlog/solution_1/Dockerfile +++ b/PrimeProlog/solution_1/Dockerfile @@ -1,4 +1,4 @@ -FROM library/swipl:8.3.26 +FROM library/swipl:9.2.6 WORKDIR /opt/app COPY primes-*.pl bitvector.c run.sh ./ diff --git a/PrimeV/solution_1/primes.v b/PrimeV/solution_1/primes.v index 5921dd421..ebbe53523 100644 --- a/PrimeV/solution_1/primes.v +++ b/PrimeV/solution_1/primes.v @@ -1,23 +1,21 @@ import time import math -const ( - sieve_size = 1_000_000 - q = math.sqrt(sieve_size) - all_bits_true_array = []bool{len: sieve_size, init: true} - dictionary = { - '10': 4 - '100': 25 - '1000': 168 - '10000': 1229 - '100000': 9592 - '1000000': 78498 - '10000000': 664579 - '100000000': 5761455 - '1000000000': 50847534 - '10000000000': 455052511 - } -) +const sieve_size = 1_000_000 +const q = math.sqrt(sieve_size) +const all_bits_true_array = []bool{len: sieve_size, init: true} +const dictionary = { + '10': 4 + '100': 25 + '1000': 168 + '10000': 1229 + '100000': 9592 + '1000000': 78498 + '10000000': 664579 + '100000000': 5761455 + '1000000000': 50847534 + '10000000000': 455052511 +} struct Sieve { sieve_size u64 @@ -25,7 +23,7 @@ mut: bits []bool } -[direct_array_access] +@[direct_array_access] fn (mut sieve Sieve) run_sieve() { mut factor := u64(3) @@ -54,7 +52,7 @@ fn (sieve Sieve) print_results(show_results bool, duration time.Duration, passes for num := u64(3); num <= sieve.sieve_size; num += u64(2) { if sieve.bits[num] { if show_results { - print('$num, ') + print('${num}, ') } count++ @@ -68,9 +66,9 @@ fn (sieve Sieve) print_results(show_results bool, duration time.Duration, passes avg := f64(duration / passes) count_primes := sieve.count_primes() valid := (count_primes == u64(dictionary[sieve.sieve_size.str()])) - eprintln('Passes: $passes, Time: $duration, Avg: $avg, Limit: $sieve.sieve_size, Count1: $count, Count2: $count_primes, Valid: $valid') + eprintln('Passes: ${passes}, Time: ${duration}, Avg: ${avg}, Limit: ${sieve.sieve_size}, Count1: ${count}, Count2: ${count_primes}, Valid: ${valid}') - println('marghidanu;$passes;$duration;1;algorithm=base,faithful=yes') + println('marghidanu;${passes};${duration};1;algorithm=base,faithful=yes') } fn (sieve Sieve) count_primes() u64 { @@ -92,7 +90,7 @@ fn main() { for { mut sieve := Sieve{ sieve_size: 1_000_000 - bits: all_bits_true_array + bits: all_bits_true_array } sieve.run_sieve() diff --git a/PrimeV/solution_2/primes.v b/PrimeV/solution_2/primes.v index 441722a68..9cbc94905 100644 --- a/PrimeV/solution_2/primes.v +++ b/PrimeV/solution_2/primes.v @@ -2,915 +2,913 @@ import time type Prime = u64 -const ( - limit = Prime(1_000_000) - cpul1cache = 16384 - results = { - Prime(10): 4 - Prime(100): 25 - Prime(1000): 168 - Prime(10000): 1229 - Prime(100000): 9592 - Prime(1000000): 78498 - Prime(10000000): 664579 - Prime(100000000): 5761455 - Prime(1000000000): 50847534 - Prime(10000000000): 455052511 - } - result = results[limit] - bitmask = [ u8(1), u8(2), u8(4), u8(8), u8(16), u8(32), u8(64), u8(128) ] - dense_threshold = 19 - extreme_bitset = [ // only four case used -> base prime value modulo 8 - // for modulo 1 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - mut bytendx := strti >> 3 - r1 := ((strti + stepi) >> 3) - bytendx - r2 := ((strti + 2 * stepi) >> 3) - bytendx - r3 := ((strti + 3 * stepi) >> 3) - bytendx - r4 := ((strti + 4 * stepi) >> 3) - bytendx - r5 := ((strti + 5 * stepi) >> 3) - bytendx - r6 := ((strti + 6 * stepi) >> 3) - bytendx - r7 := ((strti + 7 * stepi) >> 3) - bytendx - bytelmt := (lmti >> 3) - r7 - for ; bytendx <= bytelmt; bytendx += stepi { - bytearrp[bytendx] |= u8(128) - bytearrp[bytendx + r1] |= u8(1) - bytearrp[bytendx + r2] |= u8(2) - bytearrp[bytendx + r3] |= u8(4) - bytearrp[bytendx + r4] |= u8(8) - bytearrp[bytendx + r5] |= u8(16) - bytearrp[bytendx + r6] |= u8(32) - bytearrp[bytendx + r7] |= u8(64) - } - for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { - bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) - } +const limit = Prime(1_000_000) +const cpul1cache = 16384 +const results = { + Prime(10): 4 + Prime(100): 25 + Prime(1000): 168 + Prime(10000): 1229 + Prime(100000): 9592 + Prime(1000000): 78498 + Prime(10000000): 664579 + Prime(100000000): 5761455 + Prime(1000000000): 50847534 + Prime(10000000000): 455052511 +} +const result = results[limit] +const bitmask = [u8(1), u8(2), u8(4), u8(8), u8(16), u8(32), u8(64), u8(128)] +const dense_threshold = 19 +const extreme_bitset = [// only four case used -> base prime value modulo 8 + // for modulo 1 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + mut bytendx := strti >> 3 + r1 := ((strti + stepi) >> 3) - bytendx + r2 := ((strti + 2 * stepi) >> 3) - bytendx + r3 := ((strti + 3 * stepi) >> 3) - bytendx + r4 := ((strti + 4 * stepi) >> 3) - bytendx + r5 := ((strti + 5 * stepi) >> 3) - bytendx + r6 := ((strti + 6 * stepi) >> 3) - bytendx + r7 := ((strti + 7 * stepi) >> 3) - bytendx + bytelmt := (lmti >> 3) - r7 + for ; bytendx <= bytelmt; bytendx += stepi { + bytearrp[bytendx] |= u8(128) + bytearrp[bytendx + r1] |= u8(1) + bytearrp[bytendx + r2] |= u8(2) + bytearrp[bytendx + r3] |= u8(4) + bytearrp[bytendx + r4] |= u8(8) + bytearrp[bytendx + r5] |= u8(16) + bytearrp[bytendx + r6] |= u8(32) + bytearrp[bytendx + r7] |= u8(64) } - }, - // for modulo 3 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - mut bytendx := strti >> 3 - r1 := ((strti + stepi) >> 3) - bytendx - r2 := ((strti + 2 * stepi) >> 3) - bytendx - r3 := ((strti + 3 * stepi) >> 3) - bytendx - r4 := ((strti + 4 * stepi) >> 3) - bytendx - r5 := ((strti + 5 * stepi) >> 3) - bytendx - r6 := ((strti + 6 * stepi) >> 3) - bytendx - r7 := ((strti + 7 * stepi) >> 3) - bytendx - bytelmt := (lmti >> 3) - r7 - for ; bytendx <= bytelmt; bytendx += stepi { - bytearrp[bytendx] |= u8(8) - bytearrp[bytendx + r1] |= u8(64) - bytearrp[bytendx + r2] |= u8(2) - bytearrp[bytendx + r3] |= u8(16) - bytearrp[bytendx + r4] |= u8(128) - bytearrp[bytendx + r5] |= u8(4) - bytearrp[bytendx + r6] |= u8(32) - bytearrp[bytendx + r7] |= u8(1) - } - for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { - bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) - } + for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { + bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) } - }, - // for modulo 5 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - mut bytendx := strti >> 3 - r1 := ((strti + stepi) >> 3) - bytendx - r2 := ((strti + 2 * stepi) >> 3) - bytendx - r3 := ((strti + 3 * stepi) >> 3) - bytendx - r4 := ((strti + 4 * stepi) >> 3) - bytendx - r5 := ((strti + 5 * stepi) >> 3) - bytendx - r6 := ((strti + 6 * stepi) >> 3) - bytendx - r7 := ((strti + 7 * stepi) >> 3) - bytendx - bytelmt := (lmti >> 3) - r7 - for ; bytendx <= bytelmt; bytendx += stepi { - bytearrp[bytendx] |= u8(8) - bytearrp[bytendx + r1] |= u8(1) - bytearrp[bytendx + r2] |= u8(32) - bytearrp[bytendx + r3] |= u8(4) - bytearrp[bytendx + r4] |= u8(128) - bytearrp[bytendx + r5] |= u8(16) - bytearrp[bytendx + r6] |= u8(2) - bytearrp[bytendx + r7] |= u8(64) - } - for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { - bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) - } + } + }, + // for modulo 3 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + mut bytendx := strti >> 3 + r1 := ((strti + stepi) >> 3) - bytendx + r2 := ((strti + 2 * stepi) >> 3) - bytendx + r3 := ((strti + 3 * stepi) >> 3) - bytendx + r4 := ((strti + 4 * stepi) >> 3) - bytendx + r5 := ((strti + 5 * stepi) >> 3) - bytendx + r6 := ((strti + 6 * stepi) >> 3) - bytendx + r7 := ((strti + 7 * stepi) >> 3) - bytendx + bytelmt := (lmti >> 3) - r7 + for ; bytendx <= bytelmt; bytendx += stepi { + bytearrp[bytendx] |= u8(8) + bytearrp[bytendx + r1] |= u8(64) + bytearrp[bytendx + r2] |= u8(2) + bytearrp[bytendx + r3] |= u8(16) + bytearrp[bytendx + r4] |= u8(128) + bytearrp[bytendx + r5] |= u8(4) + bytearrp[bytendx + r6] |= u8(32) + bytearrp[bytendx + r7] |= u8(1) } - }, - // for modulo 7 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - mut bytendx := strti >> 3 - r1 := ((strti + stepi) >> 3) - bytendx - r2 := ((strti + 2 * stepi) >> 3) - bytendx - r3 := ((strti + 3 * stepi) >> 3) - bytendx - r4 := ((strti + 4 * stepi) >> 3) - bytendx - r5 := ((strti + 5 * stepi) >> 3) - bytendx - r6 := ((strti + 6 * stepi) >> 3) - bytendx - r7 := ((strti + 7 * stepi) >> 3) - bytendx - bytelmt := (lmti >> 3) - r7 - for ; bytendx <= bytelmt; bytendx += stepi { - bytearrp[bytendx] |= u8(128) - bytearrp[bytendx + r1] |= u8(64) - bytearrp[bytendx + r2] |= u8(32) - bytearrp[bytendx + r3] |= u8(16) - bytearrp[bytendx + r4] |= u8(8) - bytearrp[bytendx + r5] |= u8(4) - bytearrp[bytendx + r6] |= u8(2) - bytearrp[bytendx + r7] |= u8(1) - } - for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { - bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) - } + for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { + bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) } } - ] - dense_bitset = [ - // for step of 3 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000004 - v |= 0x0000000000000020 - v |= 0x0000000000000100 - v |= 0x0000000000000800 - v |= 0x0000000000004000 - v |= 0x0000000000020000 - v |= 0x0000000000100000 - v |= 0x0000000000800000 - v |= 0x0000000004000000 - v |= 0x0000000020000000 - v |= 0x0000000100000000 - v |= 0x0000000800000000 - v |= 0x0000004000000000 - v |= 0x0000020000000000 - v |= 0x0000100000000000 - v |= 0x0000800000000000 - v |= 0x0004000000000000 - v |= 0x0020000000000000 - v |= 0x0100000000000000 - v |= 0x0800000000000000 - wordp[0] = v | 0x4000000000000000 - v = wordp[1] | 0x0000000000000002 - v |= 0x0000000000000010 - v |= 0x0000000000000080 - v |= 0x0000000000000400 - v |= 0x0000000000002000 - v |= 0x0000000000010000 - v |= 0x0000000000080000 - v |= 0x0000000000400000 - v |= 0x0000000002000000 - v |= 0x0000000010000000 - v |= 0x0000000080000000 - v |= 0x0000000400000000 - v |= 0x0000002000000000 - v |= 0x0000010000000000 - v |= 0x0000080000000000 - v |= 0x0000400000000000 - v |= 0x0002000000000000 - v |= 0x0010000000000000 - v |= 0x0080000000000000 - v |= 0x0400000000000000 - wordp[1] = v | 0x2000000000000000 - v = wordp[2] | 0x0000000000000001 - v |= 0x0000000000000008 - v |= 0x0000000000000040 - v |= 0x0000000000000200 - v |= 0x0000000000001000 - v |= 0x0000000000008000 - v |= 0x0000000000040000 - v |= 0x0000000000200000 - v |= 0x0000000001000000 - v |= 0x0000000008000000 - v |= 0x0000000040000000 - v |= 0x0000000200000000 - v |= 0x0000001000000000 - v |= 0x0000008000000000 - v |= 0x0000040000000000 - v |= 0x0000200000000000 - v |= 0x0001000000000000 - v |= 0x0008000000000000 - v |= 0x0040000000000000 - v |= 0x0200000000000000 - v |= 0x1000000000000000 - wordp[2] |= v | 0x8000000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + }, + // for modulo 5 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + mut bytendx := strti >> 3 + r1 := ((strti + stepi) >> 3) - bytendx + r2 := ((strti + 2 * stepi) >> 3) - bytendx + r3 := ((strti + 3 * stepi) >> 3) - bytendx + r4 := ((strti + 4 * stepi) >> 3) - bytendx + r5 := ((strti + 5 * stepi) >> 3) - bytendx + r6 := ((strti + 6 * stepi) >> 3) - bytendx + r7 := ((strti + 7 * stepi) >> 3) - bytendx + bytelmt := (lmti >> 3) - r7 + for ; bytendx <= bytelmt; bytendx += stepi { + bytearrp[bytendx] |= u8(8) + bytearrp[bytendx + r1] |= u8(1) + bytearrp[bytendx + r2] |= u8(32) + bytearrp[bytendx + r3] |= u8(4) + bytearrp[bytendx + r4] |= u8(128) + bytearrp[bytendx + r5] |= u8(16) + bytearrp[bytendx + r6] |= u8(2) + bytearrp[bytendx + r7] |= u8(64) } - }, - // for step of 5 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000004 - v |= 0x0000000000000080 - v |= 0x0000000000001000 - v |= 0x0000000000020000 - v |= 0x0000000000400000 - v |= 0x0000000008000000 - v |= 0x0000000100000000 - v |= 0x0000002000000000 - v |= 0x0000040000000000 - v |= 0x0000800000000000 - v |= 0x0010000000000000 - v |= 0x0200000000000000 - wordp[0] = v | 0x4000000000000000 - v = wordp[1] | 0x0000000000000008 - v |= 0x0000000000000100 - v |= 0x0000000000002000 - v |= 0x0000000000040000 - v |= 0x0000000000800000 - v |= 0x0000000010000000 - v |= 0x0000000200000000 - v |= 0x0000004000000000 - v |= 0x0000080000000000 - v |= 0x0001000000000000 - v |= 0x0020000000000000 - v |= 0x0400000000000000 - wordp[1] = v | 0x8000000000000000 - v = wordp[2] | 0x0000000000000010 - v |= 0x0000000000000200 - v |= 0x0000000000004000 - v |= 0x0000000000080000 - v |= 0x0000000001000000 - v |= 0x0000000020000000 - v |= 0x0000000400000000 - v |= 0x0000008000000000 - v |= 0x0000100000000000 - v |= 0x0002000000000000 - v |= 0x0040000000000000 - wordp[2] = v | 0x0800000000000000 - v = wordp[3] | 0x0000000000000001 - v |= 0x0000000000000020 - v |= 0x0000000000000400 - v |= 0x0000000000008000 - v |= 0x0000000000100000 - v |= 0x0000000002000000 - v |= 0x0000000040000000 - v |= 0x0000000800000000 - v |= 0x0000010000000000 - v |= 0x0000200000000000 - v |= 0x0004000000000000 - v |= 0x0080000000000000 - wordp[3] = v | 0x1000000000000000 - v = wordp[4] | 0x0000000000000002 - v |= 0x0000000000000040 - v |= 0x0000000000000800 - v |= 0x0000000000010000 - v |= 0x0000000000200000 - v |= 0x0000000004000000 - v |= 0x0000000080000000 - v |= 0x0000001000000000 - v |= 0x0000020000000000 - v |= 0x0000400000000000 - v |= 0x0008000000000000 - v |= 0x0100000000000000 - wordp[4] = v | 0x2000000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { + bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) } - }, - // for step of 7 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000002 - v |= 0x0000000000000100 - v |= 0x0000000000008000 - v |= 0x0000000000400000 - v |= 0x0000000020000000 - v |= 0x0000001000000000 - v |= 0x0000080000000000 - v |= 0x0004000000000000 - wordp[0] = v | 0x0200000000000000 - v = wordp[1] | 0x0000000000000001 - v |= 0x0000000000000080 - v |= 0x0000000000004000 - v |= 0x0000000000200000 - v |= 0x0000000010000000 - v |= 0x0000000800000000 - v |= 0x0000040000000000 - v |= 0x0002000000000000 - v |= 0x0100000000000000 - wordp[1] = v | 0x8000000000000000 - v = wordp[2] | 0x0000000000000040 - v |= 0x0000000000002000 - v |= 0x0000000000100000 - v |= 0x0000000008000000 - v |= 0x0000000400000000 - v |= 0x0000020000000000 - v |= 0x0001000000000000 - v |= 0x0080000000000000 - wordp[2] = v | 0x4000000000000000 - v = wordp[3] | 0x0000000000000020 - v |= 0x0000000000001000 - v |= 0x0000000000080000 - v |= 0x0000000004000000 - v |= 0x0000000200000000 - v |= 0x0000010000000000 - v |= 0x0000800000000000 - v |= 0x0040000000000000 - wordp[3] = v | 0x2000000000000000 - v = wordp[4] | 0x0000000000000010 - v |= 0x0000000000000800 - v |= 0x0000000000040000 - v |= 0x0000000002000000 - v |= 0x0000000100000000 - v |= 0x0000008000000000 - v |= 0x0000400000000000 - v |= 0x0020000000000000 - wordp[4] = v | 0x1000000000000000 - v = wordp[5] | 0x0000000000000008 - v |= 0x0000000000000400 - v |= 0x0000000000020000 - v |= 0x0000000001000000 - v |= 0x0000000080000000 - v |= 0x0000004000000000 - v |= 0x0000200000000000 - v |= 0x0010000000000000 - wordp[5] = v | 0x0800000000000000 - v = wordp[6] | 0x0000000000000004 - v |= 0x0000000000000200 - v |= 0x0000000000010000 - v |= 0x0000000000800000 - v |= 0x0000000040000000 - v |= 0x0000002000000000 - v |= 0x0000100000000000 - v |= 0x0008000000000000 - wordp[6] = v | 0x0400000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + } + }, + // for modulo 7 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + mut bytendx := strti >> 3 + r1 := ((strti + stepi) >> 3) - bytendx + r2 := ((strti + 2 * stepi) >> 3) - bytendx + r3 := ((strti + 3 * stepi) >> 3) - bytendx + r4 := ((strti + 4 * stepi) >> 3) - bytendx + r5 := ((strti + 5 * stepi) >> 3) - bytendx + r6 := ((strti + 6 * stepi) >> 3) - bytendx + r7 := ((strti + 7 * stepi) >> 3) - bytendx + bytelmt := (lmti >> 3) - r7 + for ; bytendx <= bytelmt; bytendx += stepi { + bytearrp[bytendx] |= u8(128) + bytearrp[bytendx + r1] |= u8(64) + bytearrp[bytendx + r2] |= u8(32) + bytearrp[bytendx + r3] |= u8(16) + bytearrp[bytendx + r4] |= u8(8) + bytearrp[bytendx + r5] |= u8(4) + bytearrp[bytendx + r6] |= u8(2) + bytearrp[bytendx + r7] |= u8(1) } - }, - // for step of 9; never used - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000004 - v |= 0x0000000000000800 - v |= 0x0000000000100000 - v |= 0x0000000020000000 - v |= 0x0000004000000000 - v |= 0x0000800000000000 - wordp[0] = v | 0x0100000000000000 - v = wordp[1] | 0x0000000000000002 - v |= 0x0000000000000400 - v |= 0x0000000000080000 - v |= 0x0000000010000000 - v |= 0x0000002000000000 - v |= 0x0000400000000000 - wordp[1] = v | 0x0080000000000000 - v = wordp[2] | 0x0000000000000001 - v |= 0x0000000000000200 - v |= 0x0000000000040000 - v |= 0x0000000008000000 - v |= 0x0000001000000000 - v |= 0x0000200000000000 - v |= 0x0040000000000000 - wordp[2] = v | 0x8000000000000000 - v = wordp[3] | 0x0000000000000100 - v |= 0x0000000000020000 - v |= 0x0000000004000000 - v |= 0x0000000800000000 - v |= 0x0000100000000000 - v |= 0x0020000000000000 - wordp[3] = v | 0x4000000000000000 - v = wordp[4] | 0x0000000000000080 - v |= 0x0000000000010000 - v |= 0x0000000002000000 - v |= 0x0000000400000000 - v |= 0x0000080000000000 - v |= 0x0010000000000000 - wordp[4] = v | 0x2000000000000000 - v = wordp[5] | 0x0000000000000040 - v |= 0x0000000000008000 - v |= 0x0000000001000000 - v |= 0x0000000200000000 - v |= 0x0000040000000000 - v |= 0x0008000000000000 - wordp[5] = v | 0x1000000000000000 - v = wordp[6] | 0x0000000000000020 - v |= 0x0000000000004000 - v |= 0x0000000000800000 - v |= 0x0000000100000000 - v |= 0x0000020000000000 - v |= 0x0004000000000000 - wordp[6] = v | 0x0800000000000000 - v = wordp[7] | 0x0000000000000010 - v |= 0x0000000000002000 - v |= 0x0000000000400000 - v |= 0x0000000080000000 - v |= 0x0000010000000000 - v |= 0x0002000000000000 - wordp[7] = v | 0x0400000000000000 - v = wordp[8] | 0x0000000000000008 - v |= 0x0000000000001000 - v |= 0x0000000000200000 - v |= 0x0000000040000000 - v |= 0x0000008000000000 - v |= 0x0001000000000000 - wordp[8] = v | 0x0200000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { + bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) } - }, - // for step of 11 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000040 - v |= 0x0000000000020000 - v |= 0x0000000010000000 - v |= 0x0000008000000000 - v |= 0x0004000000000000 - wordp[0] = v | 0x2000000000000000 - v = wordp[1] | 0x0000000000000100 - v |= 0x0000000000080000 - v |= 0x0000000040000000 - v |= 0x0000020000000000 - v |= 0x0010000000000000 - wordp[1] = v | 0x8000000000000000 - v = wordp[2] | 0x0000000000000400 - v |= 0x0000000000200000 - v |= 0x0000000100000000 - v |= 0x0000080000000000 - wordp[2] = v | 0x0040000000000000 - v = wordp[3] | 0x0000000000000002 - v |= 0x0000000000001000 - v |= 0x0000000000800000 - v |= 0x0000000400000000 - v |= 0x0000200000000000 - wordp[3] = v | 0x0100000000000000 - v = wordp[4] | 0x0000000000000008 - v |= 0x0000000000004000 - v |= 0x0000000002000000 - v |= 0x0000001000000000 - v |= 0x0000800000000000 - wordp[4] = v | 0x0400000000000000 - v = wordp[5] | 0x0000000000000020 - v |= 0x0000000000010000 - v |= 0x0000000008000000 - v |= 0x0000004000000000 - v |= 0x0002000000000000 - wordp[5] = v | 0x1000000000000000 - v = wordp[6] | 0x0000000000000080 - v |= 0x0000000000040000 - v |= 0x0000000020000000 - v |= 0x0000010000000000 - v |= 0x0008000000000000 - wordp[6] = v | 0x4000000000000000 - v = wordp[7] | 0x0000000000000200 - v |= 0x0000000000100000 - v |= 0x0000000080000000 - v |= 0x0000040000000000 - wordp[7] = v | 0x0020000000000000 - v = wordp[8] | 0x0000000000000001 - v |= 0x0000000000000800 - v |= 0x0000000000400000 - v |= 0x0000000200000000 - v |= 0x0000100000000000 - wordp[8] = v | 0x0080000000000000 - v = wordp[9] | 0x0000000000000004 - v |= 0x0000000000002000 - v |= 0x0000000001000000 - v |= 0x0000000800000000 - v |= 0x0000400000000000 - wordp[9] = v | 0x0200000000000000 - v = wordp[10] | 0x0000000000000010 - v |= 0x0000000000008000 - v |= 0x0000000004000000 - v |= 0x0000002000000000 - v |= 0x0001000000000000 - wordp[10] = v | 0x0800000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + } + }, +] +const dense_bitset = [ + // for step of 3 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] } - }, - // for step of 13 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000080 - v |= 0x0000000000100000 - v |= 0x0000000200000000 - v |= 0x0000400000000000 - wordp[0] = v | 0x0800000000000000 - v = wordp[1] | 0x0000000000000100 - v |= 0x0000000000200000 - v |= 0x0000000400000000 - v |= 0x0000800000000000 - wordp[1] = v | 0x1000000000000000 - v = wordp[2] | 0x0000000000000200 - v |= 0x0000000000400000 - v |= 0x0000000800000000 - v |= 0x0001000000000000 - wordp[2] = v | 0x2000000000000000 - v = wordp[3] | 0x0000000000000400 - v |= 0x0000000000800000 - v |= 0x0000001000000000 - v |= 0x0002000000000000 - wordp[3] = v | 0x4000000000000000 - v = wordp[4] | 0x0000000000000800 - v |= 0x0000000001000000 - v |= 0x0000002000000000 - v |= 0x0004000000000000 - wordp[4] = v | 0x8000000000000000 - v = wordp[5] | 0x0000000000001000 - v |= 0x0000000002000000 - v |= 0x0000004000000000 - wordp[5] = v | 0x0008000000000000 - v = wordp[6] | 0x0000000000000001 - v |= 0x0000000000002000 - v |= 0x0000000004000000 - v |= 0x0000008000000000 - wordp[6] = v | 0x0010000000000000 - v = wordp[7] | 0x0000000000000002 - v |= 0x0000000000004000 - v |= 0x0000000008000000 - v |= 0x0000010000000000 - wordp[7] = v | 0x0020000000000000 - v = wordp[8] | 0x0000000000000004 - v |= 0x0000000000008000 - v |= 0x0000000010000000 - v |= 0x0000020000000000 - wordp[8] = v | 0x0040000000000000 - v = wordp[9] | 0x0000000000000008 - v |= 0x0000000000010000 - v |= 0x0000000020000000 - v |= 0x0000040000000000 - wordp[9] = v | 0x0080000000000000 - v = wordp[10] | 0x0000000000000010 - v |= 0x0000000000020000 - v |= 0x0000000040000000 - v |= 0x0000080000000000 - wordp[10] = v | 0x0100000000000000 - v = wordp[11] | 0x0000000000000020 - v |= 0x0000000000040000 - v |= 0x0000000080000000 - v |= 0x0000100000000000 - wordp[11] = v | 0x0200000000000000 - v = wordp[12] | 0x0000000000000040 - v |= 0x0000000000080000 - v |= 0x0000000100000000 - v |= 0x0000200000000000 - wordp[12] = v | 0x0400000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000004 + v |= 0x0000000000000020 + v |= 0x0000000000000100 + v |= 0x0000000000000800 + v |= 0x0000000000004000 + v |= 0x0000000000020000 + v |= 0x0000000000100000 + v |= 0x0000000000800000 + v |= 0x0000000004000000 + v |= 0x0000000020000000 + v |= 0x0000000100000000 + v |= 0x0000000800000000 + v |= 0x0000004000000000 + v |= 0x0000020000000000 + v |= 0x0000100000000000 + v |= 0x0000800000000000 + v |= 0x0004000000000000 + v |= 0x0020000000000000 + v |= 0x0100000000000000 + v |= 0x0800000000000000 + wordp[0] = v | 0x4000000000000000 + v = wordp[1] | 0x0000000000000002 + v |= 0x0000000000000010 + v |= 0x0000000000000080 + v |= 0x0000000000000400 + v |= 0x0000000000002000 + v |= 0x0000000000010000 + v |= 0x0000000000080000 + v |= 0x0000000000400000 + v |= 0x0000000002000000 + v |= 0x0000000010000000 + v |= 0x0000000080000000 + v |= 0x0000000400000000 + v |= 0x0000002000000000 + v |= 0x0000010000000000 + v |= 0x0000080000000000 + v |= 0x0000400000000000 + v |= 0x0002000000000000 + v |= 0x0010000000000000 + v |= 0x0080000000000000 + v |= 0x0400000000000000 + wordp[1] = v | 0x2000000000000000 + v = wordp[2] | 0x0000000000000001 + v |= 0x0000000000000008 + v |= 0x0000000000000040 + v |= 0x0000000000000200 + v |= 0x0000000000001000 + v |= 0x0000000000008000 + v |= 0x0000000000040000 + v |= 0x0000000000200000 + v |= 0x0000000001000000 + v |= 0x0000000008000000 + v |= 0x0000000040000000 + v |= 0x0000000200000000 + v |= 0x0000001000000000 + v |= 0x0000008000000000 + v |= 0x0000040000000000 + v |= 0x0000200000000000 + v |= 0x0001000000000000 + v |= 0x0008000000000000 + v |= 0x0040000000000000 + v |= 0x0200000000000000 + v |= 0x1000000000000000 + wordp[2] |= v | 0x8000000000000000 } - }, - // for step of 15 - never used - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000002000 - v |= 0x0000000010000000 - v |= 0x0000080000000000 - wordp[0] = v | 0x0400000000000000 - v = wordp[1] | 0x0000000000000200 - v |= 0x0000000001000000 - v |= 0x0000008000000000 - wordp[1] = v | 0x0040000000000000 - v = wordp[2] | 0x0000000000000020 - v |= 0x0000000000100000 - v |= 0x0000000800000000 - wordp[2] = v | 0x0004000000000000 - v = wordp[3] | 0x0000000000000002 - v |= 0x0000000000010000 - v |= 0x0000000080000000 - v |= 0x0000400000000000 - wordp[3] = v | 0x2000000000000000 - v = wordp[4] | 0x0000000000001000 - v |= 0x0000000008000000 - v |= 0x0000040000000000 - wordp[4] = v | 0x0200000000000000 - v = wordp[5] | 0x0000000000000100 - v |= 0x0000000000800000 - v |= 0x0000004000000000 - wordp[5] = v | 0x0020000000000000 - v = wordp[6] | 0x0000000000000010 - v |= 0x0000000000080000 - v |= 0x0000000400000000 - wordp[6] = v | 0x0002000000000000 - v = wordp[7] | 0x0000000000000001 - v |= 0x0000000000008000 - v |= 0x0000000040000000 - v |= 0x0000200000000000 - wordp[7] = v | 0x1000000000000000 - v = wordp[8] | 0x0000000000000800 - v |= 0x0000000004000000 - v |= 0x0000020000000000 - wordp[8] = v | 0x0100000000000000 - v = wordp[9] | 0x0000000000000080 - v |= 0x0000000000400000 - v |= 0x0000002000000000 - wordp[9] = v | 0x0010000000000000 - v = wordp[10] | 0x0000000000000008 - v |= 0x0000000000040000 - v |= 0x0000000200000000 - v |= 0x0001000000000000 - wordp[10] = v | 0x8000000000000000 - v = wordp[11] | 0x0000000000004000 - v |= 0x0000000020000000 - v |= 0x0000100000000000 - wordp[11] = v | 0x0800000000000000 - v = wordp[12] | 0x0000000000000400 - v |= 0x0000000002000000 - v |= 0x0000010000000000 - wordp[12] = v | 0x0080000000000000 - v = wordp[13] | 0x0000000000000040 - v |= 0x0000000000200000 - v |= 0x0000001000000000 - wordp[13] = v | 0x0008000000000000 - v = wordp[14] | 0x0000000000000004 - v |= 0x0000000000020000 - v |= 0x0000000100000000 - v |= 0x0000800000000000 - wordp[14] = v | 0x4000000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] } - }, - // for step of 17 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000004 - v |= 0x0000000000080000 - v |= 0x0000001000000000 - wordp[0] = v | 0x0020000000000000 - v = wordp[1] | 0x0000000000000040 - v |= 0x0000000000800000 - v |= 0x0000010000000000 - wordp[1] = v | 0x0200000000000000 - v = wordp[2] | 0x0000000000000400 - v |= 0x0000000008000000 - v |= 0x0000100000000000 - wordp[2] = v | 0x2000000000000000 - v = wordp[3] | 0x0000000000004000 - v |= 0x0000000080000000 - wordp[3] = v | 0x0001000000000000 - v = wordp[4] | 0x0000000000000002 - v |= 0x0000000000040000 - v |= 0x0000000800000000 - wordp[4] = v | 0x0010000000000000 - v = wordp[5] | 0x0000000000000020 - v |= 0x0000000000400000 - v |= 0x0000008000000000 - wordp[5] = v | 0x0100000000000000 - v = wordp[6] | 0x0000000000000200 - v |= 0x0000000004000000 - v |= 0x0000080000000000 - wordp[6] = v | 0x1000000000000000 - v = wordp[7] | 0x0000000000002000 - v |= 0x0000000040000000 - wordp[7] = v | 0x0000800000000000 - v = wordp[8] | 0x0000000000000001 - v |= 0x0000000000020000 - v |= 0x0000000400000000 - wordp[8] = v | 0x0008000000000000 - v = wordp[9] | 0x0000000000000010 - v |= 0x0000000000200000 - v |= 0x0000004000000000 - wordp[9] = v | 0x0080000000000000 - v = wordp[10] | 0x0000000000000100 - v |= 0x0000000002000000 - v |= 0x0000040000000000 - wordp[10] = v | 0x0800000000000000 - v = wordp[11] | 0x0000000000001000 - v |= 0x0000000020000000 - v |= 0x0000400000000000 - wordp[11] = v | 0x8000000000000000 - v = wordp[12] | 0x0000000000010000 - v |= 0x0000000200000000 - wordp[12] = v | 0x0004000000000000 - v = wordp[13] | 0x0000000000000008 - v |= 0x0000000000100000 - v |= 0x0000002000000000 - wordp[13] = v | 0x0040000000000000 - v = wordp[14] | 0x0000000000000080 - v |= 0x0000000001000000 - v |= 0x0000020000000000 - wordp[14] = v | 0x0400000000000000 - v = wordp[15] | 0x0000000000000800 - v |= 0x0000000010000000 - v |= 0x0000200000000000 - wordp[15] = v | 0x4000000000000000 - v = wordp[16] | 0x0000000000008000 - v |= 0x0000000100000000 - wordp[16] = v | 0x0002000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + } + }, + // for step of 5 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] } - }, - // for step of 19 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000040 - v |= 0x0000000002000000 - v |= 0x0000100000000000 - wordp[0] = v | 0x8000000000000000 - v = wordp[1] | 0x0000000000040000 - v |= 0x0000002000000000 - wordp[1] = v | 0x0100000000000000 - v = wordp[2] | 0x0000000000000800 - v |= 0x0000000040000000 - wordp[2] = v | 0x0002000000000000 - v = wordp[3] | 0x0000000000000010 - v |= 0x0000000000800000 - v |= 0x0000040000000000 - wordp[3] = v | 0x2000000000000000 - v = wordp[4] | 0x0000000000010000 - v |= 0x0000000800000000 - wordp[4] = v | 0x0040000000000000 - v = wordp[5] | 0x0000000000000200 - v |= 0x0000000010000000 - wordp[5] = v | 0x0000800000000000 - v = wordp[6] | 0x0000000000000004 - v |= 0x0000000000200000 - v |= 0x0000010000000000 - wordp[6] = v | 0x0800000000000000 - v = wordp[7] | 0x0000000000004000 - v |= 0x0000000200000000 - wordp[7] = v | 0x0010000000000000 - v = wordp[8] | 0x0000000000000080 - v |= 0x0000000004000000 - wordp[8] = v | 0x0000200000000000 - v = wordp[9] | 0x0000000000000001 - v |= 0x0000000000080000 - v |= 0x0000004000000000 - wordp[9] = v | 0x0200000000000000 - v = wordp[10] | 0x0000000000001000 - v |= 0x0000000080000000 - wordp[10] = v | 0x0004000000000000 - v = wordp[11] | 0x0000000000000020 - v |= 0x0000000001000000 - v |= 0x0000080000000000 - wordp[11] = v | 0x4000000000000000 - v = wordp[12] | 0x0000000000020000 - v |= 0x0000001000000000 - wordp[12] = v | 0x0080000000000000 - v = wordp[13] | 0x0000000000000400 - v |= 0x0000000020000000 - wordp[13] = v | 0x0001000000000000 - v = wordp[14] | 0x0000000000000008 - v |= 0x0000000000400000 - v |= 0x0000020000000000 - wordp[14] = v | 0x1000000000000000 - v = wordp[15] | 0x0000000000008000 - v |= 0x0000000400000000 - wordp[15] = v | 0x0020000000000000 - v = wordp[16] | 0x0000000000000100 - v |= 0x0000000008000000 - wordp[16] = v | 0x0000400000000000 - v = wordp[17] | 0x0000000000000002 - v |= 0x0000000000100000 - v |= 0x0000008000000000 - wordp[17] = v | 0x0400000000000000 - v = wordp[18] | 0x0000000000002000 - v |= 0x0000000100000000 - wordp[18] = v | 0x0008000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000004 + v |= 0x0000000000000080 + v |= 0x0000000000001000 + v |= 0x0000000000020000 + v |= 0x0000000000400000 + v |= 0x0000000008000000 + v |= 0x0000000100000000 + v |= 0x0000002000000000 + v |= 0x0000040000000000 + v |= 0x0000800000000000 + v |= 0x0010000000000000 + v |= 0x0200000000000000 + wordp[0] = v | 0x4000000000000000 + v = wordp[1] | 0x0000000000000008 + v |= 0x0000000000000100 + v |= 0x0000000000002000 + v |= 0x0000000000040000 + v |= 0x0000000000800000 + v |= 0x0000000010000000 + v |= 0x0000000200000000 + v |= 0x0000004000000000 + v |= 0x0000080000000000 + v |= 0x0001000000000000 + v |= 0x0020000000000000 + v |= 0x0400000000000000 + wordp[1] = v | 0x8000000000000000 + v = wordp[2] | 0x0000000000000010 + v |= 0x0000000000000200 + v |= 0x0000000000004000 + v |= 0x0000000000080000 + v |= 0x0000000001000000 + v |= 0x0000000020000000 + v |= 0x0000000400000000 + v |= 0x0000008000000000 + v |= 0x0000100000000000 + v |= 0x0002000000000000 + v |= 0x0040000000000000 + wordp[2] = v | 0x0800000000000000 + v = wordp[3] | 0x0000000000000001 + v |= 0x0000000000000020 + v |= 0x0000000000000400 + v |= 0x0000000000008000 + v |= 0x0000000000100000 + v |= 0x0000000002000000 + v |= 0x0000000040000000 + v |= 0x0000000800000000 + v |= 0x0000010000000000 + v |= 0x0000200000000000 + v |= 0x0004000000000000 + v |= 0x0080000000000000 + wordp[3] = v | 0x1000000000000000 + v = wordp[4] | 0x0000000000000002 + v |= 0x0000000000000040 + v |= 0x0000000000000800 + v |= 0x0000000000010000 + v |= 0x0000000000200000 + v |= 0x0000000004000000 + v |= 0x0000000080000000 + v |= 0x0000001000000000 + v |= 0x0000020000000000 + v |= 0x0000400000000000 + v |= 0x0008000000000000 + v |= 0x0100000000000000 + wordp[4] = v | 0x2000000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 7 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000002 + v |= 0x0000000000000100 + v |= 0x0000000000008000 + v |= 0x0000000000400000 + v |= 0x0000000020000000 + v |= 0x0000001000000000 + v |= 0x0000080000000000 + v |= 0x0004000000000000 + wordp[0] = v | 0x0200000000000000 + v = wordp[1] | 0x0000000000000001 + v |= 0x0000000000000080 + v |= 0x0000000000004000 + v |= 0x0000000000200000 + v |= 0x0000000010000000 + v |= 0x0000000800000000 + v |= 0x0000040000000000 + v |= 0x0002000000000000 + v |= 0x0100000000000000 + wordp[1] = v | 0x8000000000000000 + v = wordp[2] | 0x0000000000000040 + v |= 0x0000000000002000 + v |= 0x0000000000100000 + v |= 0x0000000008000000 + v |= 0x0000000400000000 + v |= 0x0000020000000000 + v |= 0x0001000000000000 + v |= 0x0080000000000000 + wordp[2] = v | 0x4000000000000000 + v = wordp[3] | 0x0000000000000020 + v |= 0x0000000000001000 + v |= 0x0000000000080000 + v |= 0x0000000004000000 + v |= 0x0000000200000000 + v |= 0x0000010000000000 + v |= 0x0000800000000000 + v |= 0x0040000000000000 + wordp[3] = v | 0x2000000000000000 + v = wordp[4] | 0x0000000000000010 + v |= 0x0000000000000800 + v |= 0x0000000000040000 + v |= 0x0000000002000000 + v |= 0x0000000100000000 + v |= 0x0000008000000000 + v |= 0x0000400000000000 + v |= 0x0020000000000000 + wordp[4] = v | 0x1000000000000000 + v = wordp[5] | 0x0000000000000008 + v |= 0x0000000000000400 + v |= 0x0000000000020000 + v |= 0x0000000001000000 + v |= 0x0000000080000000 + v |= 0x0000004000000000 + v |= 0x0000200000000000 + v |= 0x0010000000000000 + wordp[5] = v | 0x0800000000000000 + v = wordp[6] | 0x0000000000000004 + v |= 0x0000000000000200 + v |= 0x0000000000010000 + v |= 0x0000000000800000 + v |= 0x0000000040000000 + v |= 0x0000002000000000 + v |= 0x0000100000000000 + v |= 0x0008000000000000 + wordp[6] = v | 0x0400000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 9; never used + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000004 + v |= 0x0000000000000800 + v |= 0x0000000000100000 + v |= 0x0000000020000000 + v |= 0x0000004000000000 + v |= 0x0000800000000000 + wordp[0] = v | 0x0100000000000000 + v = wordp[1] | 0x0000000000000002 + v |= 0x0000000000000400 + v |= 0x0000000000080000 + v |= 0x0000000010000000 + v |= 0x0000002000000000 + v |= 0x0000400000000000 + wordp[1] = v | 0x0080000000000000 + v = wordp[2] | 0x0000000000000001 + v |= 0x0000000000000200 + v |= 0x0000000000040000 + v |= 0x0000000008000000 + v |= 0x0000001000000000 + v |= 0x0000200000000000 + v |= 0x0040000000000000 + wordp[2] = v | 0x8000000000000000 + v = wordp[3] | 0x0000000000000100 + v |= 0x0000000000020000 + v |= 0x0000000004000000 + v |= 0x0000000800000000 + v |= 0x0000100000000000 + v |= 0x0020000000000000 + wordp[3] = v | 0x4000000000000000 + v = wordp[4] | 0x0000000000000080 + v |= 0x0000000000010000 + v |= 0x0000000002000000 + v |= 0x0000000400000000 + v |= 0x0000080000000000 + v |= 0x0010000000000000 + wordp[4] = v | 0x2000000000000000 + v = wordp[5] | 0x0000000000000040 + v |= 0x0000000000008000 + v |= 0x0000000001000000 + v |= 0x0000000200000000 + v |= 0x0000040000000000 + v |= 0x0008000000000000 + wordp[5] = v | 0x1000000000000000 + v = wordp[6] | 0x0000000000000020 + v |= 0x0000000000004000 + v |= 0x0000000000800000 + v |= 0x0000000100000000 + v |= 0x0000020000000000 + v |= 0x0004000000000000 + wordp[6] = v | 0x0800000000000000 + v = wordp[7] | 0x0000000000000010 + v |= 0x0000000000002000 + v |= 0x0000000000400000 + v |= 0x0000000080000000 + v |= 0x0000010000000000 + v |= 0x0002000000000000 + wordp[7] = v | 0x0400000000000000 + v = wordp[8] | 0x0000000000000008 + v |= 0x0000000000001000 + v |= 0x0000000000200000 + v |= 0x0000000040000000 + v |= 0x0000008000000000 + v |= 0x0001000000000000 + wordp[8] = v | 0x0200000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 11 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000040 + v |= 0x0000000000020000 + v |= 0x0000000010000000 + v |= 0x0000008000000000 + v |= 0x0004000000000000 + wordp[0] = v | 0x2000000000000000 + v = wordp[1] | 0x0000000000000100 + v |= 0x0000000000080000 + v |= 0x0000000040000000 + v |= 0x0000020000000000 + v |= 0x0010000000000000 + wordp[1] = v | 0x8000000000000000 + v = wordp[2] | 0x0000000000000400 + v |= 0x0000000000200000 + v |= 0x0000000100000000 + v |= 0x0000080000000000 + wordp[2] = v | 0x0040000000000000 + v = wordp[3] | 0x0000000000000002 + v |= 0x0000000000001000 + v |= 0x0000000000800000 + v |= 0x0000000400000000 + v |= 0x0000200000000000 + wordp[3] = v | 0x0100000000000000 + v = wordp[4] | 0x0000000000000008 + v |= 0x0000000000004000 + v |= 0x0000000002000000 + v |= 0x0000001000000000 + v |= 0x0000800000000000 + wordp[4] = v | 0x0400000000000000 + v = wordp[5] | 0x0000000000000020 + v |= 0x0000000000010000 + v |= 0x0000000008000000 + v |= 0x0000004000000000 + v |= 0x0002000000000000 + wordp[5] = v | 0x1000000000000000 + v = wordp[6] | 0x0000000000000080 + v |= 0x0000000000040000 + v |= 0x0000000020000000 + v |= 0x0000010000000000 + v |= 0x0008000000000000 + wordp[6] = v | 0x4000000000000000 + v = wordp[7] | 0x0000000000000200 + v |= 0x0000000000100000 + v |= 0x0000000080000000 + v |= 0x0000040000000000 + wordp[7] = v | 0x0020000000000000 + v = wordp[8] | 0x0000000000000001 + v |= 0x0000000000000800 + v |= 0x0000000000400000 + v |= 0x0000000200000000 + v |= 0x0000100000000000 + wordp[8] = v | 0x0080000000000000 + v = wordp[9] | 0x0000000000000004 + v |= 0x0000000000002000 + v |= 0x0000000001000000 + v |= 0x0000000800000000 + v |= 0x0000400000000000 + wordp[9] = v | 0x0200000000000000 + v = wordp[10] | 0x0000000000000010 + v |= 0x0000000000008000 + v |= 0x0000000004000000 + v |= 0x0000002000000000 + v |= 0x0001000000000000 + wordp[10] = v | 0x0800000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 13 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000080 + v |= 0x0000000000100000 + v |= 0x0000000200000000 + v |= 0x0000400000000000 + wordp[0] = v | 0x0800000000000000 + v = wordp[1] | 0x0000000000000100 + v |= 0x0000000000200000 + v |= 0x0000000400000000 + v |= 0x0000800000000000 + wordp[1] = v | 0x1000000000000000 + v = wordp[2] | 0x0000000000000200 + v |= 0x0000000000400000 + v |= 0x0000000800000000 + v |= 0x0001000000000000 + wordp[2] = v | 0x2000000000000000 + v = wordp[3] | 0x0000000000000400 + v |= 0x0000000000800000 + v |= 0x0000001000000000 + v |= 0x0002000000000000 + wordp[3] = v | 0x4000000000000000 + v = wordp[4] | 0x0000000000000800 + v |= 0x0000000001000000 + v |= 0x0000002000000000 + v |= 0x0004000000000000 + wordp[4] = v | 0x8000000000000000 + v = wordp[5] | 0x0000000000001000 + v |= 0x0000000002000000 + v |= 0x0000004000000000 + wordp[5] = v | 0x0008000000000000 + v = wordp[6] | 0x0000000000000001 + v |= 0x0000000000002000 + v |= 0x0000000004000000 + v |= 0x0000008000000000 + wordp[6] = v | 0x0010000000000000 + v = wordp[7] | 0x0000000000000002 + v |= 0x0000000000004000 + v |= 0x0000000008000000 + v |= 0x0000010000000000 + wordp[7] = v | 0x0020000000000000 + v = wordp[8] | 0x0000000000000004 + v |= 0x0000000000008000 + v |= 0x0000000010000000 + v |= 0x0000020000000000 + wordp[8] = v | 0x0040000000000000 + v = wordp[9] | 0x0000000000000008 + v |= 0x0000000000010000 + v |= 0x0000000020000000 + v |= 0x0000040000000000 + wordp[9] = v | 0x0080000000000000 + v = wordp[10] | 0x0000000000000010 + v |= 0x0000000000020000 + v |= 0x0000000040000000 + v |= 0x0000080000000000 + wordp[10] = v | 0x0100000000000000 + v = wordp[11] | 0x0000000000000020 + v |= 0x0000000000040000 + v |= 0x0000000080000000 + v |= 0x0000100000000000 + wordp[11] = v | 0x0200000000000000 + v = wordp[12] | 0x0000000000000040 + v |= 0x0000000000080000 + v |= 0x0000000100000000 + v |= 0x0000200000000000 + wordp[12] = v | 0x0400000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 15 - never used + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000002000 + v |= 0x0000000010000000 + v |= 0x0000080000000000 + wordp[0] = v | 0x0400000000000000 + v = wordp[1] | 0x0000000000000200 + v |= 0x0000000001000000 + v |= 0x0000008000000000 + wordp[1] = v | 0x0040000000000000 + v = wordp[2] | 0x0000000000000020 + v |= 0x0000000000100000 + v |= 0x0000000800000000 + wordp[2] = v | 0x0004000000000000 + v = wordp[3] | 0x0000000000000002 + v |= 0x0000000000010000 + v |= 0x0000000080000000 + v |= 0x0000400000000000 + wordp[3] = v | 0x2000000000000000 + v = wordp[4] | 0x0000000000001000 + v |= 0x0000000008000000 + v |= 0x0000040000000000 + wordp[4] = v | 0x0200000000000000 + v = wordp[5] | 0x0000000000000100 + v |= 0x0000000000800000 + v |= 0x0000004000000000 + wordp[5] = v | 0x0020000000000000 + v = wordp[6] | 0x0000000000000010 + v |= 0x0000000000080000 + v |= 0x0000000400000000 + wordp[6] = v | 0x0002000000000000 + v = wordp[7] | 0x0000000000000001 + v |= 0x0000000000008000 + v |= 0x0000000040000000 + v |= 0x0000200000000000 + wordp[7] = v | 0x1000000000000000 + v = wordp[8] | 0x0000000000000800 + v |= 0x0000000004000000 + v |= 0x0000020000000000 + wordp[8] = v | 0x0100000000000000 + v = wordp[9] | 0x0000000000000080 + v |= 0x0000000000400000 + v |= 0x0000002000000000 + wordp[9] = v | 0x0010000000000000 + v = wordp[10] | 0x0000000000000008 + v |= 0x0000000000040000 + v |= 0x0000000200000000 + v |= 0x0001000000000000 + wordp[10] = v | 0x8000000000000000 + v = wordp[11] | 0x0000000000004000 + v |= 0x0000000020000000 + v |= 0x0000100000000000 + wordp[11] = v | 0x0800000000000000 + v = wordp[12] | 0x0000000000000400 + v |= 0x0000000002000000 + v |= 0x0000010000000000 + wordp[12] = v | 0x0080000000000000 + v = wordp[13] | 0x0000000000000040 + v |= 0x0000000000200000 + v |= 0x0000001000000000 + wordp[13] = v | 0x0008000000000000 + v = wordp[14] | 0x0000000000000004 + v |= 0x0000000000020000 + v |= 0x0000000100000000 + v |= 0x0000800000000000 + wordp[14] = v | 0x4000000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 17 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000004 + v |= 0x0000000000080000 + v |= 0x0000001000000000 + wordp[0] = v | 0x0020000000000000 + v = wordp[1] | 0x0000000000000040 + v |= 0x0000000000800000 + v |= 0x0000010000000000 + wordp[1] = v | 0x0200000000000000 + v = wordp[2] | 0x0000000000000400 + v |= 0x0000000008000000 + v |= 0x0000100000000000 + wordp[2] = v | 0x2000000000000000 + v = wordp[3] | 0x0000000000004000 + v |= 0x0000000080000000 + wordp[3] = v | 0x0001000000000000 + v = wordp[4] | 0x0000000000000002 + v |= 0x0000000000040000 + v |= 0x0000000800000000 + wordp[4] = v | 0x0010000000000000 + v = wordp[5] | 0x0000000000000020 + v |= 0x0000000000400000 + v |= 0x0000008000000000 + wordp[5] = v | 0x0100000000000000 + v = wordp[6] | 0x0000000000000200 + v |= 0x0000000004000000 + v |= 0x0000080000000000 + wordp[6] = v | 0x1000000000000000 + v = wordp[7] | 0x0000000000002000 + v |= 0x0000000040000000 + wordp[7] = v | 0x0000800000000000 + v = wordp[8] | 0x0000000000000001 + v |= 0x0000000000020000 + v |= 0x0000000400000000 + wordp[8] = v | 0x0008000000000000 + v = wordp[9] | 0x0000000000000010 + v |= 0x0000000000200000 + v |= 0x0000004000000000 + wordp[9] = v | 0x0080000000000000 + v = wordp[10] | 0x0000000000000100 + v |= 0x0000000002000000 + v |= 0x0000040000000000 + wordp[10] = v | 0x0800000000000000 + v = wordp[11] | 0x0000000000001000 + v |= 0x0000000020000000 + v |= 0x0000400000000000 + wordp[11] = v | 0x8000000000000000 + v = wordp[12] | 0x0000000000010000 + v |= 0x0000000200000000 + wordp[12] = v | 0x0004000000000000 + v = wordp[13] | 0x0000000000000008 + v |= 0x0000000000100000 + v |= 0x0000002000000000 + wordp[13] = v | 0x0040000000000000 + v = wordp[14] | 0x0000000000000080 + v |= 0x0000000001000000 + v |= 0x0000020000000000 + wordp[14] = v | 0x0400000000000000 + v = wordp[15] | 0x0000000000000800 + v |= 0x0000000010000000 + v |= 0x0000200000000000 + wordp[15] = v | 0x4000000000000000 + v = wordp[16] | 0x0000000000008000 + v |= 0x0000000100000000 + wordp[16] = v | 0x0002000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 19 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000040 + v |= 0x0000000002000000 + v |= 0x0000100000000000 + wordp[0] = v | 0x8000000000000000 + v = wordp[1] | 0x0000000000040000 + v |= 0x0000002000000000 + wordp[1] = v | 0x0100000000000000 + v = wordp[2] | 0x0000000000000800 + v |= 0x0000000040000000 + wordp[2] = v | 0x0002000000000000 + v = wordp[3] | 0x0000000000000010 + v |= 0x0000000000800000 + v |= 0x0000040000000000 + wordp[3] = v | 0x2000000000000000 + v = wordp[4] | 0x0000000000010000 + v |= 0x0000000800000000 + wordp[4] = v | 0x0040000000000000 + v = wordp[5] | 0x0000000000000200 + v |= 0x0000000010000000 + wordp[5] = v | 0x0000800000000000 + v = wordp[6] | 0x0000000000000004 + v |= 0x0000000000200000 + v |= 0x0000010000000000 + wordp[6] = v | 0x0800000000000000 + v = wordp[7] | 0x0000000000004000 + v |= 0x0000000200000000 + wordp[7] = v | 0x0010000000000000 + v = wordp[8] | 0x0000000000000080 + v |= 0x0000000004000000 + wordp[8] = v | 0x0000200000000000 + v = wordp[9] | 0x0000000000000001 + v |= 0x0000000000080000 + v |= 0x0000004000000000 + wordp[9] = v | 0x0200000000000000 + v = wordp[10] | 0x0000000000001000 + v |= 0x0000000080000000 + wordp[10] = v | 0x0004000000000000 + v = wordp[11] | 0x0000000000000020 + v |= 0x0000000001000000 + v |= 0x0000080000000000 + wordp[11] = v | 0x4000000000000000 + v = wordp[12] | 0x0000000000020000 + v |= 0x0000001000000000 + wordp[12] = v | 0x0080000000000000 + v = wordp[13] | 0x0000000000000400 + v |= 0x0000000020000000 + wordp[13] = v | 0x0001000000000000 + v = wordp[14] | 0x0000000000000008 + v |= 0x0000000000400000 + v |= 0x0000020000000000 + wordp[14] = v | 0x1000000000000000 + v = wordp[15] | 0x0000000000008000 + v |= 0x0000000400000000 + wordp[15] = v | 0x0020000000000000 + v = wordp[16] | 0x0000000000000100 + v |= 0x0000000008000000 + wordp[16] = v | 0x0000400000000000 + v = wordp[17] | 0x0000000000000002 + v |= 0x0000000000100000 + v |= 0x0000008000000000 + wordp[17] = v | 0x0400000000000000 + v = wordp[18] | 0x0000000000002000 + v |= 0x0000000100000000 + wordp[18] = v | 0x0008000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] } } - ] -) + }, +] enum Technique { bit_twiddle @@ -921,8 +919,8 @@ enum Technique { } struct PrimeSieve { - sieve_size Prime - sieve_buffer []u8 + sieve_size Prime + sieve_buffer []u8 } fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { @@ -934,10 +932,14 @@ fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { .bit_twiddle { unsafe { sievep := &sieve[0] - for i := 0; ; i++ { + for i := 0; true; i++ { mut swi := (i + i) * (i + 3) + 3 // computer mark start index - if swi > bitlmt { break } - if (sievep[i >> 3] & bitmask[i & 7]) != u8(0) { continue } + if swi > bitlmt { + break + } + if (sievep[i >> 3] & bitmask[i & 7]) != u8(0) { + continue + } bp := i + i + 3 for ; swi <= bitlmt; swi += bp { sievep[swi >> 3] |= bitmask[swi & 7] @@ -948,10 +950,14 @@ fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { .stride8 { unsafe { sievep := &sieve[0] - for i := 0; ; i++ { + for i := 0; true; i++ { mut swi := (i + i) * (i + 3) + 3 // computer mark start index - if swi > bitlmt { break } - if (sievep[i >> 3] & bitmask[i & 7]) != u8(0) { continue } + if swi > bitlmt { + break + } + if (sievep[i >> 3] & bitmask[i & 7]) != u8(0) { + continue + } bp := i + i + 3 tstlmt := swi + (bp << 3) - 1 slmt := if tstlmt > bitlmt { bitlmt } else { tstlmt } @@ -969,10 +975,14 @@ fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { sievep := &sieve[0] strts := []int{len: 8, cap: 8} strtsp := &strts[0] - for i := 0; ; i++ { + for i := 0; true; i++ { mut swi := (i + i) * (i + 3) + 3 // computer mark start index - if swi > bitlmt { break } - if (sievep[i >> 3] & bitmask[i & 7]) != u8(0) { continue } + if swi > bitlmt { + break + } + if (sievep[i >> 3] & bitmask[i & 7]) != u8(0) { + continue + } bp := i + i + 3 bp2 := bp + bp bp3 := bp + bp2 @@ -983,8 +993,11 @@ fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { swi += bp } for pgndx := pgndx0; pgndx < size; pgndx += cpul1cache { - pglmt := if pgndx + cpul1cache > size { size - 1 } - else { pgndx + cpul1cache - 1 } + pglmt := if pgndx + cpul1cache > size { + size - 1 + } else { + pgndx + cpul1cache - 1 + } pgstp := pglmt - bp3 for si := 0; si < 8; si++ { mut bytendx := strtsp[si] @@ -1005,16 +1018,19 @@ fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { } } .extreme, .extreme_hybrid { - for i := 0; ; i++ { + for i := 0; true; i++ { mut swi := (i + i) * (i + 3) + 3 // computer mark start index - if swi > bitlmt { break } - if (sieve[i >> 3] & bitmask[i & 7]) != u8(0) { continue } + if swi > bitlmt { + break + } + if (sieve[i >> 3] & bitmask[i & 7]) != u8(0) { + continue + } bp := i + i + 3 if tec == Technique.extreme_hybrid && bp <= dense_threshold { // only from 3 to 19; cases 9 and 15 actually not used dense_bitset[(bp - 3) >> 1](sieve, swi, bitlmt, bp) - } - else { + } else { // only four cases are actually used! extreme_bitset[(bp >> 1) & 3](sieve, swi, bitlmt, bp) } @@ -1022,15 +1038,17 @@ fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { } } - return PrimeSieve { - sieve_size: lmt + return PrimeSieve{ + sieve_size: lmt sieve_buffer: sieve } } fn (sieve PrimeSieve) count_primes() int { if sieve.sieve_size < 3 { - if sieve.sieve_size < 2 { return 0 } + if sieve.sieve_size < 2 { + return 0 + } return 1 } @@ -1053,21 +1071,24 @@ fn bench(tec Technique) { passes++ duration := (time.now() - start_time).seconds() if duration >= 5.0 { - mut rsltstr := "2 " + mut rsltstr := '2 ' mut count := 1 bitlmt := int((sieve.sieve_size - 3) >> 1) for ndx := 0; ndx <= bitlmt; ndx++ { if (sieve.sieve_buffer[ndx >> 3] & bitmask[ndx & 7]) == u8(0) { - if count < 25 { rsltstr += (ndx + ndx + 3).str() + " " } + if count < 25 { + rsltstr += (ndx + ndx + 3).str() + ' ' + } count++ } } avg := duration / f64(passes) count_primes := sieve.count_primes() - valid := rsltstr == "2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 " - && count_primes == result && count == result - eprintln('Passes: $passes, Time: $duration, Avg: $avg, Limit: $sieve.sieve_size, Count1: $count, Count2: $count_primes, Valid: $valid') + valid := + rsltstr == '2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 ' + && count_primes == result && count == result + eprintln('Passes: ${passes}, Time: ${duration}, Avg: ${avg}, Limit: ${sieve.sieve_size}, Count1: ${count}, Count2: ${count_primes}, Valid: ${valid}') label := 'GordonBGood_' + match tec { .bit_twiddle { 'bittwiddle' } .stride8 { 'stride8' } @@ -1075,7 +1096,7 @@ fn bench(tec Technique) { .extreme { 'extreme' } .extreme_hybrid { 'extreme-hybrid' } } - println('$label;$passes;$duration;1;algorithm=base,faithful=yes,bits=1') + println('${label};${passes};${duration};1;algorithm=base,faithful=yes,bits=1') break } } @@ -1088,4 +1109,3 @@ fn main() { bench(Technique.extreme) bench(Technique.extreme_hybrid) } - diff --git a/tools/src/commands/benchmark.ts b/tools/src/commands/benchmark.ts index 0f34acb74..5dc2a8331 100644 --- a/tools/src/commands/benchmark.ts +++ b/tools/src/commands/benchmark.ts @@ -30,9 +30,11 @@ export const command = new Command('benchmark') .option('-f, --formatter ', 'Output formatter', 'table') .option('-o, --output-file ', 'Write output to given file') .option('-u, --unconfined', 'Run with seccomp:unconfined (native performance for interpreted languages)') + .option('-t, --timeout ', 'Timeout for each benchmark in minutes', '10') .action(async (args) => { const directory = path.resolve(args.directory as string); const unconfined = args.unconfined === true; + const timeout = parseInt(args.timeout as string); logger.info(`Unconfined mode: ${unconfined}`); @@ -106,11 +108,18 @@ export const command = new Command('benchmark') let output = ''; try { logger.info(`[${implementation}][${solution}] Running...`); - output = dockerService.runContainer(imageName, options); + output = dockerService.runContainer(imageName, timeout, options); } catch (err) { - logger.warn( - `[${implementation}][${solution}] Exited with abnormal code: ${err.status}. Results might be partial...` - ); + if (err.signal) { + logger.warn( + `[${implementation}][${solution}] Killed after ${timeout} minutes with signal: ${err.signal}. Results are likely partial...` + ); + } + else { + logger.warn( + `[${implementation}][${solution}] Exited with abnormal code: ${err.status}. Results might be partial...` + ); + } output = err.output .filter((block: Buffer | null) => block !== null) .map((block: Buffer) => block.toString('utf8')) diff --git a/tools/src/services/docker.ts b/tools/src/services/docker.ts index cf891c158..04aafc5fb 100644 --- a/tools/src/services/docker.ts +++ b/tools/src/services/docker.ts @@ -7,9 +7,11 @@ export default class DockerService { }); } - public runContainer(imageName: string, options: Array): string { + public runContainer(imageName: string, duration: number, options: Array): string { const output = child_process.execSync(`docker run --rm ${options.join(' ')} ${imageName}`, { - stdio: 'pipe' + stdio: 'pipe', + timeout: duration ? duration * 60000 : undefined, + killSignal: 'SIGKILL' }); return output.toString('utf8'); }