Skip to content

Commit

Permalink
Properly flush memory in benchmark and after each block.
Browse files Browse the repository at this point in the history
  • Loading branch information
michal-zurkowski committed May 15, 2021
1 parent 154528a commit a2c56b0
Show file tree
Hide file tree
Showing 10 changed files with 155 additions and 95 deletions.
10 changes: 0 additions & 10 deletions algo/gr/cryptonote/cryptonight.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,6 @@ static void do_skein_hash(const void *input, size_t len, void *output) {
static void (*const extra_hashes[4])(const void *, size_t, void *) = {
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};

static __attribute__((always_inline)) uint64_t
__umul128(const uint64_t *a, const uint64_t *b, uint64_t *hi) {
unsigned __int128 r = (unsigned __int128)(*a) * (unsigned __int128)(*b);
*hi = r >> 64;
return (uint64_t)r;
}

// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1) {
Expand Down Expand Up @@ -393,7 +386,6 @@ cryptonight_hash(const void *input, void *output, const uint32_t memory,
cl = ((uint64_t *)(&l0[idx0]))[0];
ch = ((uint64_t *)(&l0[idx0]))[1];

// lo = __umul128(&cxl0, &cl, &hi);
__asm("mulq %3\n\t" : "=d"(hi), "=a"(lo) : "1"(cxl0), "rm"(cl) : "cc");

al0 += hi;
Expand Down Expand Up @@ -535,7 +527,6 @@ cryptonight_2way_hash(const void *input0, const void *input1, void *output0,
cl = ((uint64_t *)(&l0[idx0]))[0];
ch = ((uint64_t *)(&l0[idx0]))[1];

// lo = __umul128(&cxl0, &cl, &hi);
__asm("mulq %3\n\t" : "=d"(hi), "=a"(lo) : "1"(cxl0), "rm"(cl) : "cc");

al0 += hi;
Expand All @@ -553,7 +544,6 @@ cryptonight_2way_hash(const void *input0, const void *input1, void *output0,
cl = ((uint64_t *)(&l1[idx1]))[0];
ch = ((uint64_t *)(&l1[idx1]))[1];

// lo = __umul128(&cxl1, &cl, &hi);
__asm("mulq %3\n\t" : "=d"(hi), "=a"(lo) : "1"(cxl1), "rm"(cl) : "cc");

al1 += hi;
Expand Down
21 changes: 4 additions & 17 deletions algo/gr/gr-4way.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include "gr-gate.h"
#include "virtual_memory.h"

#if defined(GR_4WAY)

Expand Down Expand Up @@ -397,22 +396,6 @@ int scanhash_gr_4way(struct work *work, uint32_t max_nonce,
__m256i *noncev = (__m256i *)vdata + 9; // aligned
volatile uint8_t *restart = &(work_restart[thr_id].restart);

if (hp_state == NULL) {
// Allocate 4MiB instead of 2MiB in case 2way Fast is used.
if (opt_tune || opt_benchmark_config || cn_config[Fast] == 1) {
hp_state = (uint8_t *)AllocateMemory(1 << 22);
} else if (opt_tuned) {
for (int i = 0; i < 20; i++) {
if (cn_tune[i][5] == 1) {
hp_state = (uint8_t *)AllocateMemory(1 << 22);
}
}
}
if (hp_state == NULL) {
hp_state = (uint8_t *)AllocateMemory(1 << 21);
}
}

if (opt_tune) {
tune(pdata, thr_id);
opt_tuned = true; // Tuned.
Expand Down Expand Up @@ -453,6 +436,10 @@ int scanhash_gr_4way(struct work *work, uint32_t max_nonce,
}
}

// Allocates hp_state for Cryptonight algorithms.
// Needs to be run AFTER gr_hash_order is set!
AllocateNeededMemory();

*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32(n + 3, 0, n + 2, 0, n + 1, 0, n, 0), *noncev);

Expand Down
83 changes: 58 additions & 25 deletions algo/gr/gr-gate.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "gr-gate.h"
#include <unistd.h> // usleep
#include "virtual_memory.h" // Memory allocation.
#include <unistd.h> // usleep

// Only 3 CN algos are selected from available 6.
__thread uint8_t gr_hash_order[GR_HASH_FUNC_COUNT - 3 + 1];
Expand Down Expand Up @@ -109,6 +110,40 @@ void gr_getAlgoString(const uint8_t *block, uint8_t *selectedAlgoOutput) {
}
}

// Mapping of gr_harh_order CN to cn-config - lightest to heaviest order.
// Config: Turtlelite, Turtle, Darklite, Dark, Lite, Fast.
// Gr_Hash: Dark, Darklite, Fast, Lite, Turtle, Turtlelite
static uint8_t cn_map[6] = {3, 2, 5, 4, 1, 0};

static size_t GetMaxCnSize() {
// Memory requirements for each CN variant
size_t cn_req[6] = {262144, 262144, 524288, 524288, 1048576, 2097152};
// Check tune/config if given variant uses 2way that requires 2x memory.
// cn_config should contain only 0 values in non GR_4WAY.
for (int i = 0; i < 6; i++) {
cn_req[i] *= (cn_config[i] + 1);
}

size_t order[3] = {cn_map[gr_hash_order[5] - 15],
cn_map[gr_hash_order[11] - 15],
cn_map[gr_hash_order[17] - 15]};
size_t max =
cn_req[order[0]] > cn_req[order[1]] ? cn_req[order[0]] : cn_req[order[1]];
max = max > cn_req[order[2]] ? max : cn_req[order[2]];

return max;
}

void AllocateNeededMemory() {
size_t size = GetMaxCnSize();
if (opt_debug) {
applog(LOG_DEBUG, "Current Cryptonight variants require: %lu memory", size);
}

// Purges previous memory allocation and creates new one.
PrepareMemory((void **)&hp_state, size);
}

void select_tuned_config() {
for (size_t i = 0; i < 20; i++) {
if (cn[i][0] + 15 == gr_hash_order[5] ||
Expand Down Expand Up @@ -223,9 +258,7 @@ void *statistic_thread(void *arg) {
}
}

#ifdef __AVX2__

static uint8_t cn_map[6] = {3, 2, 5, 4, 1, 0};
#if defined(GR_4WAY)

static void tune_config(void *input, int thr_id, int rot) {
srand(time(NULL) + thr_id);
Expand All @@ -251,6 +284,10 @@ static void tune_config(void *input, int thr_id, int rot) {
gr_hash_order[5] = cn[rot][0] + 15;
gr_hash_order[11] = cn[rot][1] + 15;
gr_hash_order[17] = cn[rot][2] + 15;

// Purge memory for test.
AllocateNeededMemory();

// Set desired CN config.
sync_bench();
sync_bench();
Expand Down Expand Up @@ -309,28 +346,21 @@ void tune(void *input, int thr_id) {
tune_config(input, thr_id, i);
sync_conf();
if (thr_id == 0) {
// TODO
// Do not set the improvement if Fast variant is included.
// Possible bug/inaccuracy in benchmarking with it set as 1.
// Can be reproduced with 5000 series Ryzens.
if (cn_map[cn[i][0]] != 5 && cn_map[cn[i][1]] != 5 &&
cn_map[cn[i][2]] != 5) {
if (best_hashrate < bench_hashrate) {
if (opt_debug) {
applog(LOG_DEBUG, "%d -> %d | %d -> %d | %d -> %d", cn[i][0],
(config & 1) >> 0, cn[i][1], (config & 2) >> 1, cn[i][2],
(config & 4) >> 2);
}
cn_tune[i][cn_map[cn[i][0]]] = (config & 1) >> 0;
cn_tune[i][cn_map[cn[i][1]]] = (config & 2) >> 1;
cn_tune[i][cn_map[cn[i][2]]] = (config & 4) >> 2;
if (opt_debug) {
applog(LOG_DEBUG, "Config for rotation %d: %d %d %d %d %d %d", i,
cn_tune[i][0], cn_tune[i][1], cn_tune[i][2], cn_tune[i][3],
cn_tune[i][4], cn_tune[i][5]);
}
best_hashrate = bench_hashrate;
if (best_hashrate < bench_hashrate) {
if (opt_debug) {
applog(LOG_DEBUG, "%d -> %d | %d -> %d | %d -> %d", cn[i][0],
(config & 1) >> 0, cn[i][1], (config & 2) >> 1, cn[i][2],
(config & 4) >> 2);
}
cn_tune[i][cn_map[cn[i][0]]] = (config & 1) >> 0;
cn_tune[i][cn_map[cn[i][1]]] = (config & 2) >> 1;
cn_tune[i][cn_map[cn[i][2]]] = (config & 4) >> 2;
if (opt_debug) {
applog(LOG_DEBUG, "Config for rotation %d: %d %d %d %d %d %d", i,
cn_tune[i][0], cn_tune[i][1], cn_tune[i][2], cn_tune[i][3],
cn_tune[i][4], cn_tune[i][5]);
}
best_hashrate = bench_hashrate;
}
bench_hashrate = 0;
bench_time = 0;
Expand Down Expand Up @@ -403,6 +433,9 @@ void benchmark(void *input, int thr_id, long sleep_time) {
select_tuned_config();
}

// Purge memory for test.
AllocateNeededMemory();

sync_bench(); // Rotation change sync.
if (rotation == 0) {
if (likely(stop_benchmark)) {
Expand Down
5 changes: 4 additions & 1 deletion algo/gr/gr-gate.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,14 @@ int scanhash_gr(struct work *work, uint32_t max_nonce, uint64_t *hashes_done,
extern __thread uint8_t *hp_state;

// Values for 20 CN rotations.
const static uint8_t cn[20][3] = {
static const uint8_t cn[20][3] = {
{0, 1, 2}, {0, 1, 3}, {0, 1, 4}, {0, 1, 5}, {0, 2, 3}, {0, 2, 4}, {0, 2, 5},
{0, 3, 4}, {0, 3, 5}, {0, 4, 5}, {1, 2, 3}, {1, 2, 4}, {1, 2, 5}, {1, 3, 4},
{1, 3, 5}, {1, 4, 5}, {2, 3, 4}, {2, 3, 5}, {2, 4, 5}, {3, 4, 5}};

// Uses hp_state as memory.
void AllocateNeededMemory();

void select_tuned_config();
void tune(void *input, int thr_id);

Expand Down
9 changes: 4 additions & 5 deletions algo/gr/gr.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include "gr-gate.h"
#include "virtual_memory.h"

int gr_hash(void *output, const void *input, int thrid) {
uint64_t hash[8] __attribute__((aligned(64)));
Expand Down Expand Up @@ -135,10 +134,6 @@ int scanhash_gr(struct work *work, uint32_t max_nonce, uint64_t *hashes_done,
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);

if (hp_state == NULL) {
hp_state = (uint8_t *)AllocateMemory(1 << 21);
}

if (opt_benchmark_config) {
benchmark_configs(pdata, thr_id);
}
Expand All @@ -165,6 +160,10 @@ int scanhash_gr(struct work *work, uint32_t max_nonce, uint64_t *hashes_done,
}
}

// Allocates hp_state for Cryptonight algorithms.
// Needs to be run AFTER gr_hash_order is set!
AllocateNeededMemory();

do {
edata[19] = nonce;
if (gr_hash(hash32, edata, thr_id)) {
Expand Down
20 changes: 10 additions & 10 deletions configure
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt-gr 1.1.4g.
# Generated by GNU Autoconf 2.69 for cpuminer-opt-gr 1.1.5.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
Expand Down Expand Up @@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt-gr'
PACKAGE_TARNAME='cpuminer-opt-gr'
PACKAGE_VERSION='1.1.4g'
PACKAGE_STRING='cpuminer-opt-gr 1.1.4g'
PACKAGE_VERSION='1.1.5'
PACKAGE_STRING='cpuminer-opt-gr 1.1.5'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''

Expand Down Expand Up @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt-gr 1.1.4g to adapt to many kinds of systems.
\`configure' configures cpuminer-opt-gr 1.1.5 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
Expand Down Expand Up @@ -1404,7 +1404,7 @@ fi

if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt-gr 1.1.4g:";;
short | recursive ) echo "Configuration of cpuminer-opt-gr 1.1.5:";;
esac
cat <<\_ACEOF
Expand Down Expand Up @@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt-gr configure 1.1.4g
cpuminer-opt-gr configure 1.1.5
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
Expand Down Expand Up @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt-gr $as_me 1.1.4g, which was
It was created by cpuminer-opt-gr $as_me 1.1.5, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
Expand Down Expand Up @@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt-gr'
VERSION='1.1.4g'
VERSION='1.1.5'
cat >>confdefs.h <<_ACEOF
Expand Down Expand Up @@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt-gr $as_me 1.1.4g, which was
This file was extended by cpuminer-opt-gr $as_me 1.1.5, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
Expand Down Expand Up @@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt-gr config.status 1.1.4g
cpuminer-opt-gr config.status 1.1.5
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt-gr], [1.1.4g])
AC_INIT([cpuminer-opt-gr], [1.1.5])

AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
Expand Down
22 changes: 7 additions & 15 deletions cpu-miner.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,17 +148,8 @@ bool opt_verify = false;
// Default config for CN variants.
// 0 - Use default 1way/SSE
// 1 - Use 2way algorithm.
// Use defines to keep compatibility with v1.1.1
#if defined(GR_4WAY_HEAVY)
__thread uint8_t cn_config[6] = {1, 1, 1, 1, 1, 1};
uint8_t cn_config_global[6] = {1, 1, 1, 1, 1, 1};
#elif defined(GR_4WAY_MEDIUM)
__thread uint8_t cn_config[6] = {0, 1, 1, 1, 0, 0};
uint8_t cn_config_global[6] = {0, 1, 1, 1, 0, 0};
#else
__thread uint8_t cn_config[6] = {0, 0, 0, 0, 0, 0};
uint8_t cn_config_global[6] = {0, 0, 0, 0, 0, 0};
#endif

bool opt_tune = false;
bool opt_tuned = false;
Expand Down Expand Up @@ -3200,10 +3191,12 @@ static bool load_tune_config(char *config_name) {
return false;
}
for (int i = 0; i < 20; i++) {
fscanf(fd, "%hhd %hhd %hhd %hhd %hhd %hhd\n", &cn_tune[i][0],
&cn_tune[i][1], &cn_tune[i][2], &cn_tune[i][3], &cn_tune[i][4],
&cn_tune[i][5]);
if (ferror(fd) != 0) {
size_t read = fscanf(fd,
"%" SCNu8 " %" SCNu8 " %" SCNu8 " %" SCNu8 " %" SCNu8
" %" SCNu8 "\n",
&cn_tune[i][0], &cn_tune[i][1], &cn_tune[i][2],
&cn_tune[i][3], &cn_tune[i][4], &cn_tune[i][5]);
if (ferror(fd) != 0 || read != 6) {
applog(LOG_ERR, "Could not read from %s file", config_name);
return false;
}
Expand Down Expand Up @@ -4002,8 +3995,7 @@ int main(int argc, char *argv[]) {
#endif

// Prepare and check Large Pages. At least 4MiB per thread.
huge_pages = InitHugePages(opt_n_threads * 2);
if (!huge_pages) {
if (!InitHugePages(opt_n_threads * 4)) {
applog(LOG_ERR, "Could not prepare Huge Pages.");
} else {
applog(LOG_BLUE, "Huge Pages set up successfuly.");
Expand Down
Loading

0 comments on commit a2c56b0

Please sign in to comment.