From 0f3ad73d1229fc81367a79eb046cf9d54e01b6e2 Mon Sep 17 00:00:00 2001 From: Kikyou Akino Date: Mon, 6 Mar 2023 12:00:51 +0900 Subject: [PATCH] Remove unnecessary algo sources --- algo/argon2/argon2a/ar2/ar2-scrypt-jane.c | 249 ------- algo/argon2/argon2a/ar2/ar2-scrypt-jane.h | 35 - algo/argon2/argon2a/ar2/argon2.c | 284 -------- algo/argon2/argon2a/ar2/argon2.h | 292 -------- algo/argon2/argon2a/ar2/bench.c | 114 ---- algo/argon2/argon2a/ar2/blake2/blake2-impl.h | 143 ---- algo/argon2/argon2a/ar2/blake2/blake2.h | 76 --- .../argon2a/ar2/blake2/blamka-round-opt.h | 162 ----- .../argon2a/ar2/blake2/blamka-round-ref.h | 39 -- algo/argon2/argon2a/ar2/blake2b.c | 316 --------- algo/argon2/argon2a/ar2/cores.c | 349 ---------- algo/argon2/argon2a/ar2/cores.h | 216 ------ algo/argon2/argon2a/ar2/genkat.c.hide | 186 ----- algo/argon2/argon2a/ar2/genkat.h.hide | 45 -- algo/argon2/argon2a/ar2/opt.c | 185 ----- algo/argon2/argon2a/ar2/opt.h | 49 -- algo/argon2/argon2a/ar2/ref.c.hide | 174 ----- algo/argon2/argon2a/ar2/ref.h.hide | 49 -- algo/argon2/argon2a/ar2/run.c.hide | 223 ------ algo/argon2/argon2a/ar2/sj/scrypt-jane-hash.h | 38 -- .../ar2/sj/scrypt-jane-hash_skein512.h | 188 ------ .../ar2/sj/scrypt-jane-mix_salsa64-avx.h | 367 ---------- .../ar2/sj/scrypt-jane-mix_salsa64-avx2.h | 221 ------ .../ar2/sj/scrypt-jane-mix_salsa64-sse2.h | 449 ------------ .../ar2/sj/scrypt-jane-mix_salsa64-ssse3.h | 399 ----------- .../ar2/sj/scrypt-jane-mix_salsa64-xop.h | 335 --------- .../argon2a/ar2/sj/scrypt-jane-mix_salsa64.h | 41 -- .../argon2a/ar2/sj/scrypt-jane-pbkdf2.h | 112 --- .../argon2a/ar2/sj/scrypt-jane-portable-x86.h | 463 ------------- .../argon2a/ar2/sj/scrypt-jane-portable.h | 310 --------- .../argon2a/ar2/sj/scrypt-jane-romix-basic.h | 75 -- .../ar2/sj/scrypt-jane-romix-template.h | 122 ---- .../argon2/argon2a/ar2/sj/scrypt-jane-romix.h | 23 - .../argon2a/ar2/sj/scrypt-jane-salsa64.h | 183 ----- .../argon2a/ar2/sj/scrypt-jane-test-vectors.h | 28 - algo/argon2/argon2a/argon2a.c | 85 --- algo/argon2/argon2d/argon2d-gate.c | 189 ------ algo/argon2/argon2d/argon2d-gate.h | 31 - algo/argon2/argon2d/argon2d/argon2.c | 458 ------------- algo/argon2/argon2d/argon2d/argon2.h | 440 ------------ algo/argon2/argon2d/argon2d/argon2d_thread.c | 57 -- algo/argon2/argon2d/argon2d/argon2d_thread.h | 67 -- algo/argon2/argon2d/argon2d/core.c | 638 ------------------ algo/argon2/argon2d/argon2d/core.h | 228 ------- algo/argon2/argon2d/argon2d/encoding.c | 463 ------------- algo/argon2/argon2d/argon2d/encoding.h | 57 -- algo/argon2/argon2d/argon2d/opt.c | 332 --------- algo/argon2/argon2d/blake2/blake2-impl.h | 156 ----- algo/argon2/argon2d/blake2/blake2.h | 91 --- algo/argon2/argon2d/blake2/blake2b.c | 390 ----------- algo/argon2/argon2d/blake2/blamka-round-opt.h | 465 ------------- algo/argon2/argon2d/blake2/blamka-round-ref.h | 56 -- algo/hodl/aes.c | 182 ----- algo/hodl/hodl-endian.h | 75 -- algo/hodl/hodl-gate.c | 184 ----- algo/hodl/hodl-gate.h | 6 - algo/hodl/hodl-wolf.c | 225 ------ algo/hodl/hodl-wolf.h | 27 - algo/hodl/hodlminer.1 | 208 ------ algo/hodl/sha512-avx.h | 50 -- algo/hodl/sha512_avx.c | 235 ------- algo/hodl/sha512_avx2.c | 241 ------- algo/hodl/wolf-aes.h | 25 - algo/m7m/m7m.c | 312 --------- algo/m7m/magimath.cpp | 75 -- algo/m7m/magimath.h | 54 -- 66 files changed, 12642 deletions(-) delete mode 100644 algo/argon2/argon2a/ar2/ar2-scrypt-jane.c delete mode 100644 algo/argon2/argon2a/ar2/ar2-scrypt-jane.h delete mode 100644 algo/argon2/argon2a/ar2/argon2.c delete mode 100644 algo/argon2/argon2a/ar2/argon2.h delete mode 100644 algo/argon2/argon2a/ar2/bench.c delete mode 100644 algo/argon2/argon2a/ar2/blake2/blake2-impl.h delete mode 100644 algo/argon2/argon2a/ar2/blake2/blake2.h delete mode 100644 algo/argon2/argon2a/ar2/blake2/blamka-round-opt.h delete mode 100644 algo/argon2/argon2a/ar2/blake2/blamka-round-ref.h delete mode 100644 algo/argon2/argon2a/ar2/blake2b.c delete mode 100644 algo/argon2/argon2a/ar2/cores.c delete mode 100644 algo/argon2/argon2a/ar2/cores.h delete mode 100644 algo/argon2/argon2a/ar2/genkat.c.hide delete mode 100644 algo/argon2/argon2a/ar2/genkat.h.hide delete mode 100644 algo/argon2/argon2a/ar2/opt.c delete mode 100644 algo/argon2/argon2a/ar2/opt.h delete mode 100644 algo/argon2/argon2a/ar2/ref.c.hide delete mode 100644 algo/argon2/argon2a/ar2/ref.h.hide delete mode 100644 algo/argon2/argon2a/ar2/run.c.hide delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-hash.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-hash_skein512.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-avx.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-avx2.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-sse2.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-ssse3.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-xop.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-pbkdf2.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-portable-x86.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-portable.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-romix-basic.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-romix-template.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-romix.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-salsa64.h delete mode 100644 algo/argon2/argon2a/ar2/sj/scrypt-jane-test-vectors.h delete mode 100644 algo/argon2/argon2a/argon2a.c delete mode 100644 algo/argon2/argon2d/argon2d-gate.c delete mode 100644 algo/argon2/argon2d/argon2d-gate.h delete mode 100644 algo/argon2/argon2d/argon2d/argon2.c delete mode 100644 algo/argon2/argon2d/argon2d/argon2.h delete mode 100644 algo/argon2/argon2d/argon2d/argon2d_thread.c delete mode 100644 algo/argon2/argon2d/argon2d/argon2d_thread.h delete mode 100644 algo/argon2/argon2d/argon2d/core.c delete mode 100644 algo/argon2/argon2d/argon2d/core.h delete mode 100644 algo/argon2/argon2d/argon2d/encoding.c delete mode 100644 algo/argon2/argon2d/argon2d/encoding.h delete mode 100644 algo/argon2/argon2d/argon2d/opt.c delete mode 100644 algo/argon2/argon2d/blake2/blake2-impl.h delete mode 100644 algo/argon2/argon2d/blake2/blake2.h delete mode 100644 algo/argon2/argon2d/blake2/blake2b.c delete mode 100644 algo/argon2/argon2d/blake2/blamka-round-opt.h delete mode 100644 algo/argon2/argon2d/blake2/blamka-round-ref.h delete mode 100644 algo/hodl/aes.c delete mode 100644 algo/hodl/hodl-endian.h delete mode 100644 algo/hodl/hodl-gate.c delete mode 100644 algo/hodl/hodl-gate.h delete mode 100644 algo/hodl/hodl-wolf.c delete mode 100644 algo/hodl/hodl-wolf.h delete mode 100644 algo/hodl/hodlminer.1 delete mode 100644 algo/hodl/sha512-avx.h delete mode 100644 algo/hodl/sha512_avx.c delete mode 100644 algo/hodl/sha512_avx2.c delete mode 100644 algo/hodl/wolf-aes.h delete mode 100644 algo/m7m/m7m.c delete mode 100644 algo/m7m/magimath.cpp delete mode 100644 algo/m7m/magimath.h diff --git a/algo/argon2/argon2a/ar2/ar2-scrypt-jane.c b/algo/argon2/argon2a/ar2/ar2-scrypt-jane.c deleted file mode 100644 index e75b73b0..00000000 --- a/algo/argon2/argon2a/ar2/ar2-scrypt-jane.c +++ /dev/null @@ -1,249 +0,0 @@ -/* - scrypt-jane by Andrew M, https://github.com/floodyberry/scrypt-jane - - Public Domain or MIT License, whichever is easier -*/ - -#include - -#if defined( _WINDOWS ) -#if !defined( QT_GUI ) -extern "C" { -#endif -#endif - -#include "ar2-scrypt-jane.h" - -#include "sj/scrypt-jane-portable.h" -#include "sj/scrypt-jane-hash.h" -#include "sj/scrypt-jane-romix.h" -#include "sj/scrypt-jane-test-vectors.h" - -#define scrypt_maxNfactor 30 /* (1 << (30 + 1)) = ~2 billion */ -#if (SCRYPT_BLOCK_BYTES == 64) -#define scrypt_r_32kb 8 /* (1 << 8) = 256 * 2 blocks in a chunk * 64 bytes = Max of 32kb in a chunk */ -#elif (SCRYPT_BLOCK_BYTES == 128) -#define scrypt_r_32kb 7 /* (1 << 7) = 128 * 2 blocks in a chunk * 128 bytes = Max of 32kb in a chunk */ -#elif (SCRYPT_BLOCK_BYTES == 256) -#define scrypt_r_32kb 6 /* (1 << 6) = 64 * 2 blocks in a chunk * 256 bytes = Max of 32kb in a chunk */ -#elif (SCRYPT_BLOCK_BYTES == 512) -#define scrypt_r_32kb 5 /* (1 << 5) = 32 * 2 blocks in a chunk * 512 bytes = Max of 32kb in a chunk */ -#endif -#define scrypt_maxrfactor scrypt_r_32kb /* 32kb */ -#define scrypt_maxpfactor 25 /* (1 << 25) = ~33 million */ - -#include -//#include - -static void NORETURN -scrypt_fatal_error_default(const char *msg) { - fprintf(stderr, "%s\n", msg); - exit(1); -} - -static scrypt_fatal_errorfn scrypt_fatal_error = scrypt_fatal_error_default; - -void scrypt_set_fatal_error(scrypt_fatal_errorfn fn) { - scrypt_fatal_error = fn; -} - -static int scrypt_power_on_self_test(void) -{ - const scrypt_test_setting *t; - uint8_t test_digest[64]; - uint32_t i; - int res = 7, scrypt_valid; - - if (!scrypt_test_mix()) { -#if !defined(SCRYPT_TEST) - scrypt_fatal_error("scrypt: mix function power-on-self-test failed"); -#endif - res &= ~1; - } - - if (!scrypt_test_hash()) { -#if !defined(SCRYPT_TEST) - scrypt_fatal_error("scrypt: hash function power-on-self-test failed"); -#endif - res &= ~2; - } - - for (i = 0, scrypt_valid = 1; post_settings[i].pw; i++) { - t = post_settings + i; - scrypt((uint8_t *)t->pw, strlen(t->pw), (uint8_t *)t->salt, strlen(t->salt), t->Nfactor, t->rfactor, t->pfactor, test_digest, sizeof(test_digest)); - scrypt_valid &= scrypt_verify(post_vectors[i], test_digest, sizeof(test_digest)); - } - - if (!scrypt_valid) { -#if !defined(SCRYPT_TEST) - scrypt_fatal_error("scrypt: scrypt power-on-self-test failed"); -#endif - res &= ~4; - } - - return res; -} - -typedef struct scrypt_aligned_alloc_t { - uint8_t *mem, *ptr; -} scrypt_aligned_alloc; - -#ifdef SCRYPT_TEST_SPEED - -static uint8_t *mem_base = (uint8_t *)0; -static size_t mem_bump = 0; - -/* allocations are assumed to be multiples of 64 bytes and total allocations not to exceed ~1.01gb */ -static scrypt_aligned_alloc scrypt_alloc(uint64_t size) -{ - scrypt_aligned_alloc aa; - if (!mem_base) { - mem_base = (uint8_t *)malloc((1024 * 1024 * 1024) + (1024 * 1024) + (SCRYPT_BLOCK_BYTES - 1)); - if (!mem_base) - scrypt_fatal_error("scrypt: out of memory"); - mem_base = (uint8_t *)(((size_t)mem_base + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1)); - } - aa.mem = mem_base + mem_bump; - aa.ptr = aa.mem; - mem_bump += (size_t)size; - return aa; -} - -static void scrypt_free(scrypt_aligned_alloc *aa) { - mem_bump = 0; -} - -#else - -static scrypt_aligned_alloc scrypt_alloc(uint64_t size) -{ - static const size_t max_alloc = (size_t)-1; - scrypt_aligned_alloc aa; - size += (SCRYPT_BLOCK_BYTES - 1); - if (size > max_alloc) - scrypt_fatal_error("scrypt: not enough address space on this CPU to allocate required memory"); - aa.mem = (uint8_t *)malloc((size_t)size); - aa.ptr = (uint8_t *)(((size_t)aa.mem + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1)); - if (!aa.mem) - scrypt_fatal_error("scrypt: out of memory"); - return aa; -} - -static void scrypt_free(scrypt_aligned_alloc *aa) -{ - free(aa->mem); -} - -#endif /* SCRYPT_TEST_SPEED */ - - -void scrypt(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, - uint8_t Nfactor, uint8_t rfactor, uint8_t pfactor, uint8_t *out, size_t bytes) -{ - scrypt_aligned_alloc YX, V; - uint8_t *X, *Y; - uint32_t N, r, p, chunk_bytes, i; - -#if !defined(SCRYPT_CHOOSE_COMPILETIME) - scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix(); -#endif - -#if !defined(SCRYPT_TEST) - static int power_on_self_test = 0; - if (!power_on_self_test) { - power_on_self_test = 1; - if (!scrypt_power_on_self_test()) - scrypt_fatal_error("scrypt: power on self test failed"); - } -#endif - - if (Nfactor > scrypt_maxNfactor) - scrypt_fatal_error("scrypt: N out of range"); - if (rfactor > scrypt_maxrfactor) - scrypt_fatal_error("scrypt: r out of range"); - if (pfactor > scrypt_maxpfactor) - scrypt_fatal_error("scrypt: p out of range"); - - N = (1 << (Nfactor + 1)); - r = (1 << rfactor); - p = (1 << pfactor); - - chunk_bytes = SCRYPT_BLOCK_BYTES * r * 2; - V = scrypt_alloc((uint64_t)N * chunk_bytes); - YX = scrypt_alloc((p + 1) * chunk_bytes); - - /* 1: X = PBKDF2(password, salt) */ - Y = YX.ptr; - X = Y + chunk_bytes; - scrypt_pbkdf2(password, password_len, salt, salt_len, 1, X, chunk_bytes * p); - - /* 2: X = ROMix(X) */ - for (i = 0; i < p; i++) - scrypt_ROMix((scrypt_mix_word_t *)(X + (chunk_bytes * i)), (scrypt_mix_word_t *)Y, (scrypt_mix_word_t *)V.ptr, N, r); - - /* 3: Out = PBKDF2(password, X) */ - scrypt_pbkdf2(password, password_len, X, chunk_bytes * p, 1, out, bytes); - - scrypt_ensure_zero(YX.ptr, (p + 1) * chunk_bytes); - - scrypt_free(&V); - scrypt_free(&YX); -} - -#define Nfactor 8 -#define rfactor 0 -#define pfactor 0 -#if (SCRYPT_BLOCK_BYTES == 64) -#define chunk_bytes 128 -#elif (SCRYPT_BLOCK_BYTES == 128) -#define chunk_bytes 256 -#elif (SCRYPT_BLOCK_BYTES == 256) -#define chunk_bytes 512 -#elif (SCRYPT_BLOCK_BYTES == 512) -#define chunk_bytes 1024 -#endif - -void my_scrypt(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint8_t *out) -{ - scrypt_aligned_alloc YX, V; - uint8_t *X, *Y; - -#if !defined(SCRYPT_CHOOSE_COMPILETIME) - scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix(); -#endif - -/* -#if !defined(SCRYPT_TEST) - static int power_on_self_test = 0; - if (!power_on_self_test) { - power_on_self_test = 1; - if (!scrypt_power_on_self_test()) - scrypt_fatal_error("scrypt: power on self test failed"); - } -#endif -*/ - V = scrypt_alloc((uint64_t)512 * chunk_bytes); - YX = scrypt_alloc(2 * chunk_bytes); - - /* 1: X = PBKDF2(password, salt) */ - Y = YX.ptr; - X = Y + chunk_bytes; - scrypt_pbkdf2(password, password_len, salt, salt_len, 1, X, chunk_bytes); - - /* 2: X = ROMix(X) */ - scrypt_ROMix((scrypt_mix_word_t *)X, (scrypt_mix_word_t *)Y, (scrypt_mix_word_t *)V.ptr, 512, 1); - - /* 3: Out = PBKDF2(password, X) */ - scrypt_pbkdf2(password, password_len, X, chunk_bytes, 1, out, 32); - - scrypt_ensure_zero(YX.ptr, 2 * chunk_bytes); - - scrypt_free(&V); - scrypt_free(&YX); -} - -#if defined( _WINDOWS ) -#if !defined( QT_GUI ) -} /* extern "C" */ -#endif -#endif diff --git a/algo/argon2/argon2a/ar2/ar2-scrypt-jane.h b/algo/argon2/argon2a/ar2/ar2-scrypt-jane.h deleted file mode 100644 index 78006e52..00000000 --- a/algo/argon2/argon2a/ar2/ar2-scrypt-jane.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef AR2_SCRYPT_JANE_H -#define AR2_SCRYPT_JANE_H - -#ifdef _MSC_VER -#undef SCRYPT_CHOOSE_COMPILETIME -#endif -//#define SCRYPT_TEST -#define SCRYPT_SKEIN512 -#define SCRYPT_SALSA64 - -/* - Nfactor: Increases CPU & Memory Hardness - N = (1 << (Nfactor + 1)): How many times to mix a chunk and how many temporary chunks are used - - rfactor: Increases Memory Hardness - r = (1 << rfactor): How large a chunk is - - pfactor: Increases CPU Hardness - p = (1 << pfactor): Number of times to mix the main chunk - - A block is the basic mixing unit (salsa/chacha block = 64 bytes) - A chunk is (2 * r) blocks - - ~Memory used = (N + 2) * ((2 * r) * block size) -*/ - -#include -#include - -typedef void (*scrypt_fatal_errorfn)(const char *msg); -void scrypt_set_fatal_error(scrypt_fatal_errorfn fn); - -void scrypt(const unsigned char *password, size_t password_len, const unsigned char *salt, size_t salt_len, unsigned char Nfactor, unsigned char rfactor, unsigned char pfactor, unsigned char *out, size_t bytes); -void my_scrypt(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint8_t *out); -#endif /* AR2_SCRYPT_JANE_H */ diff --git a/algo/argon2/argon2a/ar2/argon2.c b/algo/argon2/argon2a/ar2/argon2.c deleted file mode 100644 index c2385986..00000000 --- a/algo/argon2/argon2a/ar2/argon2.c +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#include -#include -#include -#include - -#include "argon2.h" -#include "cores.h" - -/* Error messages */ -static const char *Argon2_ErrorMessage[] = { - /*{ARGON2_OK, */ "OK", - /*}, - - {ARGON2_OUTPUT_PTR_NULL, */ "Output pointer is NULL", - /*}, - -{ARGON2_OUTPUT_TOO_SHORT, */ "Output is too short", - /*}, -{ARGON2_OUTPUT_TOO_LONG, */ "Output is too long", - /*}, - -{ARGON2_PWD_TOO_SHORT, */ "Password is too short", - /*}, -{ARGON2_PWD_TOO_LONG, */ "Password is too long", - /*}, - -{ARGON2_SALT_TOO_SHORT, */ "Salt is too short", - /*}, -{ARGON2_SALT_TOO_LONG, */ "Salt is too long", - /*}, - -{ARGON2_AD_TOO_SHORT, */ "Associated data is too short", - /*}, -{ARGON2_AD_TOO_LONG, */ "Associated date is too long", - /*}, - -{ARGON2_SECRET_TOO_SHORT, */ "Secret is too short", - /*}, -{ARGON2_SECRET_TOO_LONG, */ "Secret is too long", - /*}, - -{ARGON2_TIME_TOO_SMALL, */ "Time cost is too small", - /*}, -{ARGON2_TIME_TOO_LARGE, */ "Time cost is too large", - /*}, - -{ARGON2_MEMORY_TOO_LITTLE, */ "Memory cost is too small", - /*}, -{ARGON2_MEMORY_TOO_MUCH, */ "Memory cost is too large", - /*}, - -{ARGON2_LANES_TOO_FEW, */ "Too few lanes", - /*}, -{ARGON2_LANES_TOO_MANY, */ "Too many lanes", - /*}, - -{ARGON2_PWD_PTR_MISMATCH, */ "Password pointer is NULL, but password length is not 0", - /*}, -{ARGON2_SALT_PTR_MISMATCH, */ "Salt pointer is NULL, but salt length is not 0", - /*}, -{ARGON2_SECRET_PTR_MISMATCH, */ "Secret pointer is NULL, but secret length is not 0", - /*}, -{ARGON2_AD_PTR_MISMATCH, */ "Associated data pointer is NULL, but ad length is not 0", - /*}, - -{ARGON2_MEMORY_ALLOCATION_ERROR, */ "Memory allocation error", - /*}, - -{ARGON2_FREE_MEMORY_CBK_NULL, */ "The free memory callback is NULL", - /*}, -{ARGON2_ALLOCATE_MEMORY_CBK_NULL, */ "The allocate memory callback is NULL", - /*}, - -{ARGON2_INCORRECT_PARAMETER, */ "Argon2_Context context is NULL", - /*}, -{ARGON2_INCORRECT_TYPE, */ "There is no such version of Argon2", - /*}, - -{ARGON2_OUT_PTR_MISMATCH, */ "Output pointer mismatch", - /*}, - -{ARGON2_THREADS_TOO_FEW, */ "Not enough threads", - /*}, -{ARGON2_THREADS_TOO_MANY, */ "Too many threads", - /*}, -{ARGON2_MISSING_ARGS, */ "Missing arguments", /*},*/ -}; - -int argon2d(argon2_context *context) { return ar2_argon2_core(context, Argon2_d); } - -int argon2i(argon2_context *context) { return ar2_argon2_core(context, Argon2_i); } - -int ar2_verify_d(argon2_context *context, const char *hash) -{ - int result; - /*if (0 == context->outlen || NULL == hash) { - return ARGON2_OUT_PTR_MISMATCH; - }*/ - - result = ar2_argon2_core(context, Argon2_d); - - if (ARGON2_OK != result) { - return result; - } - - return 0 == memcmp(hash, context->out, 32); -} - -const char *error_message(int error_code) -{ - enum { - /* Make sure---at compile time---that the enum size matches the array - size */ - ERROR_STRING_CHECK = - 1 / - !!((sizeof(Argon2_ErrorMessage) / sizeof(Argon2_ErrorMessage[0])) == - ARGON2_ERROR_CODES_LENGTH) - }; - if (error_code < ARGON2_ERROR_CODES_LENGTH) { - return Argon2_ErrorMessage[(argon2_error_codes)error_code]; - } - return "Unknown error code."; -} - -/* encoding/decoding helpers */ - -/* - * Some macros for constant-time comparisons. These work over values in - * the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true". - */ -#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF) -#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF) -#define GE(x, y) (GT(y, x) ^ 0xFF) -#define LT(x, y) GT(y, x) -#define LE(x, y) GE(y, x) - -/* - * Convert value x (0..63) to corresponding Base64 character. - */ -static int b64_byte_to_char(unsigned x) { -//static inline int b64_byte_to_char(unsigned x) { - return (LT(x, 26) & (x + 'A')) | - (GE(x, 26) & LT(x, 52) & (x + ('a' - 26))) | - (GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+') | - (EQ(x, 63) & '/'); -} - -/* - * Convert some bytes to Base64. 'dst_len' is the length (in characters) - * of the output buffer 'dst'; if that buffer is not large enough to - * receive the result (including the terminating 0), then (size_t)-1 - * is returned. Otherwise, the zero-terminated Base64 string is written - * in the buffer, and the output length (counted WITHOUT the terminating - * zero) is returned. - */ -static size_t to_base64(char *dst, size_t dst_len, const void *src) -{ - size_t olen; - const unsigned char *buf; - unsigned acc, acc_len; - - olen = 43; - /*switch (32 % 3) { - case 2: - olen++;*/ - /* fall through */ - /*case 1: - olen += 2; - break; - }*/ - if (dst_len <= olen) { - return (size_t)-1; - } - acc = 0; - acc_len = 0; - buf = (const unsigned char *)src; - size_t src_len = 32; - while (src_len-- > 0) { - acc = (acc << 8) + (*buf++); - acc_len += 8; - while (acc_len >= 6) { - acc_len -= 6; - *dst++ = b64_byte_to_char((acc >> acc_len) & 0x3F); - } - } - if (acc_len > 0) { - *dst++ = b64_byte_to_char((acc << (6 - acc_len)) & 0x3F); - } - *dst++ = 0; - return olen; -} - -/* ==================================================================== */ -/* - * Code specific to Argon2i. - * - * The code below applies the following format: - * - * $argon2i$m=,t=,p=[,keyid=][,data=][$[$]] - * - * where is a decimal integer (positive, fits in an 'unsigned long') - * and is Base64-encoded data (no '=' padding characters, no newline - * or whitespace). The "keyid" is a binary identifier for a key (up to 8 - * bytes); "data" is associated data (up to 32 bytes). When the 'keyid' - * (resp. the 'data') is empty, then it is ommitted from the output. - * - * The last two binary chunks (encoded in Base64) are, in that order, - * the salt and the output. Both are optional, but you cannot have an - * output without a salt. The binary salt length is between 8 and 48 bytes. - * The output length is always exactly 32 bytes. - */ - -int ar2_encode_string(char *dst, size_t dst_len, argon2_context *ctx) -{ -#define SS(str) \ - do { \ - size_t pp_len = strlen(str); \ - if (pp_len >= dst_len) { \ - return 0; \ - } \ - memcpy(dst, str, pp_len + 1); \ - dst += pp_len; \ - dst_len -= pp_len; \ - } while (0) - -#define SX(x) \ - do { \ - char tmp[30]; \ - sprintf(tmp, "%lu", (unsigned long)(x)); \ - SS(tmp); \ - } while (0); - -#define SB(buf) \ - do { \ - size_t sb_len = to_base64(dst, dst_len, buf); \ - if (sb_len == (size_t)-1) { \ - return 0; \ - } \ - dst += sb_len; \ - dst_len -= sb_len; \ - } while (0); - - SS("$argon2i$m="); - SX(16); - SS(",t="); - SX(2); - SS(",p="); - SX(1); - - /*if (ctx->adlen > 0) { - SS(",data="); - SB(ctx->ad, ctx->adlen); - }*/ - - /*if (ctx->saltlen == 0) - return 1;*/ - - SS("$"); - SB(ctx->salt); - - /*if (ctx->outlen32 == 0) - return 1;*/ - - SS("$"); - SB(ctx->out); - return 1; - -#undef SS -#undef SX -#undef SB -} diff --git a/algo/argon2/argon2a/ar2/argon2.h b/algo/argon2/argon2a/ar2/argon2.h deleted file mode 100644 index 09fa983e..00000000 --- a/algo/argon2/argon2a/ar2/argon2.h +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ -#ifndef ARGON2_H -#define ARGON2_H - -#include -#include -#include - -#if defined(__cplusplus) -extern "C" { -#endif - -/*************************Argon2 input parameter - * restrictions**************************************************/ - -/* Minimum and maximum number of lanes (degree of parallelism) */ -#define ARGON2_MIN_LANES UINT32_C(1) -#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF) - -/* Minimum and maximum number of threads */ -#define ARGON2_MIN_THREADS UINT32_C(1) -#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF) - -/* Number of synchronization points between lanes per pass */ -#define ARGON2_SYNC_POINTS UINT32_C(4) - -/* Minimum and maximum digest size in bytes */ -#define ARGON2_MIN_OUTLEN UINT32_C(4) -#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */ -#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */ - -#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b)) -/* Max memory size is half the addressing space, topping at 2^32 blocks (4 TB) - */ -#define ARGON2_MAX_MEMORY_BITS \ - ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1)) -#define ARGON2_MAX_MEMORY \ - ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS) - -/* Minimum and maximum number of passes */ -#define ARGON2_MIN_TIME UINT32_C(1) -#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum password length in bytes */ -#define ARGON2_MIN_PWD_LENGTH UINT32_C(0) -#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum associated data length in bytes */ -#define ARGON2_MIN_AD_LENGTH UINT32_C(0) -#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum salt length in bytes */ -#define ARGON2_MIN_SALT_LENGTH UINT32_C(8) -#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum key length in bytes */ -#define ARGON2_MIN_SECRET UINT32_C(0) -#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF) - -#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0) -#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1) -#define ARGON2_FLAG_CLEAR_MEMORY (UINT32_C(1) << 2) -#define ARGON2_DEFAULT_FLAGS \ - (ARGON2_FLAG_CLEAR_PASSWORD | ARGON2_FLAG_CLEAR_MEMORY) - -/* Error codes */ -typedef enum Argon2_ErrorCodes { - ARGON2_OK = 0, - - ARGON2_OUTPUT_PTR_NULL = 1, - - ARGON2_OUTPUT_TOO_SHORT = 2, - ARGON2_OUTPUT_TOO_LONG = 3, - - ARGON2_PWD_TOO_SHORT = 4, - ARGON2_PWD_TOO_LONG = 5, - - ARGON2_SALT_TOO_SHORT = 6, - ARGON2_SALT_TOO_LONG = 7, - - ARGON2_AD_TOO_SHORT = 8, - ARGON2_AD_TOO_LONG = 9, - - ARGON2_SECRET_TOO_SHORT = 10, - ARGON2_SECRET_TOO_LONG = 11, - - ARGON2_TIME_TOO_SMALL = 12, - ARGON2_TIME_TOO_LARGE = 13, - - ARGON2_MEMORY_TOO_LITTLE = 14, - ARGON2_MEMORY_TOO_MUCH = 15, - - ARGON2_LANES_TOO_FEW = 16, - ARGON2_LANES_TOO_MANY = 17, - - ARGON2_PWD_PTR_MISMATCH = 18, /* NULL ptr with non-zero length */ - ARGON2_SALT_PTR_MISMATCH = 19, /* NULL ptr with non-zero length */ - ARGON2_SECRET_PTR_MISMATCH = 20, /* NULL ptr with non-zero length */ - ARGON2_AD_PTR_MISMATCH = 21, /* NULL ptr with non-zero length */ - - ARGON2_MEMORY_ALLOCATION_ERROR = 22, - - ARGON2_FREE_MEMORY_CBK_NULL = 23, - ARGON2_ALLOCATE_MEMORY_CBK_NULL = 24, - - ARGON2_INCORRECT_PARAMETER = 25, - ARGON2_INCORRECT_TYPE = 26, - - ARGON2_OUT_PTR_MISMATCH = 27, - - ARGON2_THREADS_TOO_FEW = 28, - ARGON2_THREADS_TOO_MANY = 29, - - ARGON2_MISSING_ARGS = 30, - - ARGON2_ERROR_CODES_LENGTH /* Do NOT remove; Do NOT add error codes after - this - error code */ -} argon2_error_codes; - -/* Memory allocator types --- for external allocation */ -typedef int (*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate); -typedef void (*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate); - -/* Argon2 external data structures */ - -/* - *****Context: structure to hold Argon2 inputs: - * output array and its length, - * password and its length, - * salt and its length, - * secret and its length, - * associated data and its length, - * number of passes, amount of used memory (in KBytes, can be rounded up a bit) - * number of parallel threads that will be run. - * All the parameters above affect the output hash value. - * Additionally, two function pointers can be provided to allocate and - deallocate the memory (if NULL, memory will be allocated internally). - * Also, three flags indicate whether to erase password, secret as soon as they - are pre-hashed (and thus not needed anymore), and the entire memory - **************************** - Simplest situation: you have output array out[8], password is stored in - pwd[32], salt is stored in salt[16], you do not have keys nor associated data. - You need to spend 1 GB of RAM and you run 5 passes of Argon2d with 4 parallel - lanes. - You want to erase the password, but you're OK with last pass not being erased. - You want to use the default memory allocator. - */ -typedef struct Argon2_Context { - uint8_t *out; /* output array */ - uint8_t *pwd; /* password array */ - uint8_t *salt; /* salt array */ - /*uint8_t *secret;*/ /* key array */ - /*uint8_t *ad;*/ /* associated data array */ - - allocate_fptr allocate_cbk; /* pointer to memory allocator */ - deallocate_fptr free_cbk; /* pointer to memory deallocator */ - - /*uint32_t outlen;*/ /* digest length */ - uint32_t pwdlen; /* password length */ - /*uint32_t saltlen;*/ /* salt length */ - /*uint32_t secretlen;*/ /* key length */ - /*uint32_t adlen;*/ /* associated data length */ - /*uint32_t t_cost;*/ /* number of passes */ - /*uint32_t m_cost;*/ /* amount of memory requested (KB) */ - /*uint32_t lanes;*/ /* number of lanes */ - /*uint32_t threads;*/ /* maximum number of threads */ - /*uint32_t flags;*/ /* array of bool options */ - -} argon2_context; - -/** - * Function to hash the inputs in the memory-hard fashion (uses Argon2i) - * @param out Pointer to the memory where the hash digest will be written - * @param outlen Digest length in bytes - * @param in Pointer to the input (password) - * @param inlen Input length in bytes - * @param salt Pointer to the salt - * @param saltlen Salt length in bytes - * @pre @a out must have at least @a outlen bytes allocated - * @pre @a in must be at least @inlen bytes long - * @pre @a saltlen must be at least @saltlen bytes long - * @return Zero if successful, 1 otherwise. - */ -/*int hash_argon2i(void *out, size_t outlen, const void *in, size_t inlen, - const void *salt, size_t saltlen, unsigned int t_cost, - unsigned int m_cost);*/ - -/* same for argon2d */ -/*int hash_argon2d(void *out, size_t outlen, const void *in, size_t inlen, - const void *salt, size_t saltlen, unsigned int t_cost, - unsigned int m_cost);*/ - -/* - * **************Argon2d: Version of Argon2 that picks memory blocks depending - * on the password and salt. Only for side-channel-free - * environment!!*************** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise - */ -int argon2d(argon2_context *context); - -/* - * * **************Argon2i: Version of Argon2 that picks memory blocks - *independent on the password and salt. Good for side-channels, - ******************* but worse w.r.t. tradeoff attacks if - *******************only one pass is used*************** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise - */ -int argon2i(argon2_context *context); - -/* - * * **************Argon2di: Reserved name*************** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise - */ -int argon2di(argon2_context *context); - -/* - * * **************Argon2ds: Argon2d hardened against GPU attacks, 20% - * slower*************** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise - */ -int argon2ds(argon2_context *context); - -/* - * * **************Argon2id: First half-pass over memory is - *password-independent, the rest are password-dependent - ********************OK against side channels: they reduce to 1/2-pass - *Argon2i*************** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise - */ -int argon2id(argon2_context *context); - -/* - * Verify if a given password is correct for Argon2d hashing - * @param context Pointer to current Argon2 context - * @param hash The password hash to verify. The length of the hash is - * specified by the context outlen member - * @return Zero if successful, a non zero error code otherwise - */ -int ar2_verify_d(argon2_context *context, const char *hash); - -/* - * Get the associated error message for given error code - * @return The error message associated with the given error code - */ -const char *error_message(int error_code); - -/* ==================================================================== */ -/* - * Code specific to Argon2i. - * - * The code below applies the following format: - * - * $argon2i$m=,t=,p=[,keyid=][,data=][$[$]] - * - * where is a decimal integer (positive, fits in an 'unsigned long') - * and is Base64-encoded data (no '=' padding characters, no newline - * or whitespace). The "keyid" is a binary identifier for a key (up to 8 - * bytes); "data" is associated data (up to 32 bytes). When the 'keyid' - * (resp. the 'data') is empty, then it is ommitted from the output. - * - * The last two binary chunks (encoded in Base64) are, in that order, - * the salt and the output. Both are optional, but you cannot have an - * output without a salt. The binary salt length is between 8 and 48 bytes. - * The output length is always exactly 32 bytes. - */ - -int ar2_encode_string(char *dst, size_t dst_len, argon2_context *ctx); - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/algo/argon2/argon2a/ar2/bench.c b/algo/argon2/argon2a/ar2/bench.c deleted file mode 100644 index be61c440..00000000 --- a/algo/argon2/argon2a/ar2/bench.c +++ /dev/null @@ -1,114 +0,0 @@ -#include -#include -#include -#include -#include -#ifdef _MSC_VER -#include -#endif - -#include "argon2.h" - -static uint64_t rdtsc(void) -{ -#ifdef _MSC_VER - return __rdtsc(); -#else -#if defined(__amd64__) || defined(__x86_64__) - uint64_t rax, rdx; - __asm__ __volatile__("rdtsc" : "=a"(rax), "=d"(rdx) : :); - return (rdx << 32) | rax; -#elif defined(__i386__) || defined(__i386) || defined(__X86__) - uint64_t rax; - __asm__ __volatile__("rdtsc" : "=A"(rax) : :); - return rax; -#else -#error "Not implemented!" -#endif -#endif -} - -/* - * Benchmarks Argon2 with salt length 16, password length 16, t_cost 1, - and different m_cost and threads - */ -static void benchmark() -{ -#define BENCH_OUTLEN 16 -#define BENCH_INLEN 16 - const uint32_t inlen = BENCH_INLEN; - const unsigned outlen = BENCH_OUTLEN; - unsigned char out[BENCH_OUTLEN]; - unsigned char pwd_array[BENCH_INLEN]; - unsigned char salt_array[BENCH_INLEN]; -#undef BENCH_INLEN -#undef BENCH_OUTLEN - - uint32_t t_cost = 1; - uint32_t m_cost; - uint32_t thread_test[6] = {1, 2, 4, 6, 8, 16}; - - memset(pwd_array, 0, inlen); - memset(salt_array, 1, inlen); - - for (m_cost = (uint32_t)1 << 10; m_cost <= (uint32_t)1 << 22; m_cost *= 2) { - unsigned i; - for (i = 0; i < 6; ++i) { - argon2_context context; - uint32_t thread_n = thread_test[i]; - uint64_t stop_cycles, stop_cycles_i; - clock_t stop_time; - uint64_t delta_d, delta_i; - double mcycles_d, mcycles_i, run_time; - - clock_t start_time = clock(); - uint64_t start_cycles = rdtsc(); - - context.out = out; - context.outlen = outlen; - context.pwd = pwd_array; - context.pwdlen = inlen; - context.salt = salt_array; - context.saltlen = inlen; - context.secret = NULL; - context.secretlen = 0; - context.ad = NULL; - context.adlen = 0; - context.t_cost = t_cost; - context.m_cost = m_cost; - context.lanes = thread_n; - context.threads = thread_n; - context.allocate_cbk = NULL; - context.free_cbk = NULL; - context.flags = 0; - - argon2d(&context); - stop_cycles = rdtsc(); - argon2i(&context); - stop_cycles_i = rdtsc(); - stop_time = clock(); - - delta_d = (stop_cycles - start_cycles) / (m_cost); - delta_i = (stop_cycles_i - stop_cycles) / (m_cost); - mcycles_d = (double)(stop_cycles - start_cycles) / (1UL << 20); - mcycles_i = (double)(stop_cycles_i - stop_cycles) / (1UL << 20); - printf("Argon2d %d iterations %d MiB %d threads: %2.2f cpb %2.2f " - "Mcycles \n", - t_cost, m_cost >> 10, thread_n, (float)delta_d / 1024, - mcycles_d); - printf("Argon2i %d iterations %d MiB %d threads: %2.2f cpb %2.2f " - "Mcycles \n", - t_cost, m_cost >> 10, thread_n, (float)delta_i / 1024, - mcycles_i); - - run_time = ((double)stop_time - start_time) / (CLOCKS_PER_SEC); - printf("%2.4f seconds\n\n", run_time); - } - } -} - -int main() -{ - benchmark(); - return ARGON2_OK; -} diff --git a/algo/argon2/argon2a/ar2/blake2/blake2-impl.h b/algo/argon2/argon2a/ar2/blake2/blake2-impl.h deleted file mode 100644 index 9bab8e26..00000000 --- a/algo/argon2/argon2a/ar2/blake2/blake2-impl.h +++ /dev/null @@ -1,143 +0,0 @@ -#ifndef PORTABLE_BLAKE2_IMPL_H -#define PORTABLE_BLAKE2_IMPL_H - -#include -#include - -#if defined(_MSC_VER) -#define BLAKE2_INLINE __inline -#elif defined(__GNUC__) || defined(__clang__) -#define BLAKE2_INLINE __inline__ -#else -#define BLAKE2_INLINE -#endif - -/* Argon2 Team - Begin Code */ -/* - Not an exhaustive list, but should cover the majority of modern platforms - Additionally, the code will always be correct---this is only a performance - tweak. -*/ -#if (defined(__BYTE_ORDER__) && \ - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \ - defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \ - defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \ - defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \ - defined(_M_ARM) -#define NATIVE_LITTLE_ENDIAN -#endif -/* Argon2 Team - End Code */ - -static BLAKE2_INLINE uint32_t load32(const void *src) { -#if defined(NATIVE_LITTLE_ENDIAN) - uint32_t w; - memcpy(&w, src, sizeof w); - return w; -#else - const uint8_t *p = (const uint8_t *)src; - uint32_t w = *p++; - w |= (uint32_t)(*p++) << 8; - w |= (uint32_t)(*p++) << 16; - w |= (uint32_t)(*p++) << 24; - return w; -#endif -} - -static BLAKE2_INLINE uint64_t load64(const void *src) { -#if defined(NATIVE_LITTLE_ENDIAN) - uint64_t w; - memcpy(&w, src, sizeof w); - return w; -#else - const uint8_t *p = (const uint8_t *)src; - uint64_t w = *p++; - w |= (uint64_t)(*p++) << 8; - w |= (uint64_t)(*p++) << 16; - w |= (uint64_t)(*p++) << 24; - w |= (uint64_t)(*p++) << 32; - w |= (uint64_t)(*p++) << 40; - w |= (uint64_t)(*p++) << 48; - w |= (uint64_t)(*p++) << 56; - return w; -#endif -} - -static BLAKE2_INLINE void store32(void *dst, uint32_t w) { -#if defined(NATIVE_LITTLE_ENDIAN) - memcpy(dst, &w, sizeof w); -#else - uint8_t *p = (uint8_t *)dst; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; -#endif -} - -static BLAKE2_INLINE void store64(void *dst, uint64_t w) { -#if defined(NATIVE_LITTLE_ENDIAN) - memcpy(dst, &w, sizeof w); -#else - uint8_t *p = (uint8_t *)dst; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; -#endif -} - -static BLAKE2_INLINE uint64_t load48(const void *src) { - const uint8_t *p = (const uint8_t *)src; - uint64_t w = *p++; - w |= (uint64_t)(*p++) << 8; - w |= (uint64_t)(*p++) << 16; - w |= (uint64_t)(*p++) << 24; - w |= (uint64_t)(*p++) << 32; - w |= (uint64_t)(*p++) << 40; - return w; -} - -static BLAKE2_INLINE void store48(void *dst, uint64_t w) { - uint8_t *p = (uint8_t *)dst; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; -} - -static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) { - return (w >> c) | (w << (32 - c)); -} - -static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) { - return (w >> c) | (w << (64 - c)); -} - -/* prevents compiler optimizing out memset() */ -static BLAKE2_INLINE void burn(void *v, size_t n) { - static void *(*const volatile memset_v)(void *, int, size_t) = &memset; - memset_v(v, 0, n); -} - -#endif diff --git a/algo/argon2/argon2a/ar2/blake2/blake2.h b/algo/argon2/argon2a/ar2/blake2/blake2.h deleted file mode 100644 index 90f2a50d..00000000 --- a/algo/argon2/argon2a/ar2/blake2/blake2.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef PORTABLE_BLAKE2_H -#define PORTABLE_BLAKE2_H - -#include -#include -#include - -#if defined(__cplusplus) -extern "C" { -#endif - -enum blake2b_constant { - BLAKE2B_BLOCKBYTES = 128, - BLAKE2B_OUTBYTES = 64, - BLAKE2B_KEYBYTES = 64, - BLAKE2B_SALTBYTES = 16, - BLAKE2B_PERSONALBYTES = 16 -}; - -#pragma pack(push, 1) -typedef struct __blake2b_param { - uint8_t digest_length; /* 1 */ - uint8_t key_length; /* 2 */ - uint8_t fanout; /* 3 */ - uint8_t depth; /* 4 */ - uint32_t leaf_length; /* 8 */ - uint64_t node_offset; /* 16 */ - uint8_t node_depth; /* 17 */ - uint8_t inner_length; /* 18 */ - uint8_t reserved[14]; /* 32 */ - uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ - uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ -} blake2b_param; -#pragma pack(pop) - -typedef struct __blake2b_state { - uint64_t h[8]; - uint64_t t[2]; - uint64_t f[2]; - unsigned buflen; - unsigned outlen; - uint8_t last_node; - uint8_t buf[BLAKE2B_BLOCKBYTES]; -} blake2b_state; - -/* Ensure param structs have not been wrongly padded */ -/* Poor man's static_assert */ -enum { - blake2_size_check_0 = 1 / !!(CHAR_BIT == 8), - blake2_size_check_2 = - 1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT) -}; - -/* Streaming API */ -int ar2_blake2b_init(blake2b_state *S, size_t outlen); -int ar2_blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, - size_t keylen); -int ar2_blake2b_init_param(blake2b_state *S, const blake2b_param *P); -int ar2_blake2b_update(blake2b_state *S, const void *in, size_t inlen); -void my_blake2b_update(blake2b_state *S, const void *in, size_t inlen); -int ar2_blake2b_final(blake2b_state *S, void *out, size_t outlen); - -/* Simple API */ -int ar2_blake2b(void *out, const void *in, const void *key, size_t keylen); - -/* Argon2 Team - Begin Code */ -int ar2_blake2b_long(void *out, const void *in); -/* Argon2 Team - End Code */ -/* Miouyouyou */ -void ar2_blake2b_too(void *out, const void *in); - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/algo/argon2/argon2a/ar2/blake2/blamka-round-opt.h b/algo/argon2/argon2a/ar2/blake2/blamka-round-opt.h deleted file mode 100644 index 690686d9..00000000 --- a/algo/argon2/argon2a/ar2/blake2/blamka-round-opt.h +++ /dev/null @@ -1,162 +0,0 @@ -#ifndef BLAKE_ROUND_MKA_OPT_H -#define BLAKE_ROUND_MKA_OPT_H - -#include "blake2-impl.h" - -#if defined(_MSC_VER) -#include -#endif - -#include -#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__)) -#include -#endif - -#if !defined(__XOP__) -#if defined(__SSSE3__) -#define r16 \ - (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) -#define r24 \ - (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) -#define _mm_roti_epi64(x, c) \ - (-(c) == 32) \ - ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \ - : (-(c) == 24) \ - ? _mm_shuffle_epi8((x), r24) \ - : (-(c) == 16) \ - ? _mm_shuffle_epi8((x), r16) \ - : (-(c) == 63) \ - ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ - _mm_add_epi64((x), (x))) \ - : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ - _mm_slli_epi64((x), 64 - (-(c)))) -#else /* defined(__SSE2__) */ -#define _mm_roti_epi64(r, c) \ - _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c)))) -#endif -#else -#endif - -static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) { - const __m128i z = _mm_mul_epu32(x, y); - return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); -} - -#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = fBlaMka(A0, B0); \ - A1 = fBlaMka(A1, B1); \ - \ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ - \ - D0 = _mm_roti_epi64(D0, -32); \ - D1 = _mm_roti_epi64(D1, -32); \ - \ - C0 = fBlaMka(C0, D0); \ - C1 = fBlaMka(C1, D1); \ - \ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ - \ - B0 = _mm_roti_epi64(B0, -24); \ - B1 = _mm_roti_epi64(B1, -24); \ - } while ((void)0, 0) - -#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = fBlaMka(A0, B0); \ - A1 = fBlaMka(A1, B1); \ - \ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ - \ - D0 = _mm_roti_epi64(D0, -16); \ - D1 = _mm_roti_epi64(D1, -16); \ - \ - C0 = fBlaMka(C0, D0); \ - C1 = fBlaMka(C1, D1); \ - \ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ - \ - B0 = _mm_roti_epi64(B0, -63); \ - B1 = _mm_roti_epi64(B1, -63); \ - } while ((void)0, 0) - -#if defined(__SSSE3__) -#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ - __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ - B0 = t0; \ - B1 = t1; \ - \ - t0 = C0; \ - C0 = C1; \ - C1 = t0; \ - \ - t0 = _mm_alignr_epi8(D1, D0, 8); \ - t1 = _mm_alignr_epi8(D0, D1, 8); \ - D0 = t1; \ - D1 = t0; \ - } while ((void)0, 0) - -#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ - __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ - B0 = t0; \ - B1 = t1; \ - \ - t0 = C0; \ - C0 = C1; \ - C1 = t0; \ - \ - t0 = _mm_alignr_epi8(D0, D1, 8); \ - t1 = _mm_alignr_epi8(D1, D0, 8); \ - D0 = t1; \ - D1 = t0; \ - } while ((void)0, 0) -#else /* SSE2 */ -#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = D0; \ - __m128i t1 = B0; \ - D0 = C0; \ - C0 = C1; \ - C1 = D0; \ - D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \ - D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \ - B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \ - B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \ - } while ((void)0, 0) - -#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = C0; \ - C0 = C1; \ - C1 = t0; \ - t0 = B0; \ - __m128i t1 = D0; \ - B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \ - B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \ - D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \ - D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \ - } while ((void)0, 0) -#endif - -#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ - \ - DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ - \ - UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - } while ((void)0, 0) - -#endif diff --git a/algo/argon2/argon2a/ar2/blake2/blamka-round-ref.h b/algo/argon2/argon2a/ar2/blake2/blamka-round-ref.h deleted file mode 100644 index f497e10c..00000000 --- a/algo/argon2/argon2a/ar2/blake2/blamka-round-ref.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef BLAKE_ROUND_MKA_H -#define BLAKE_ROUND_MKA_H - -#include "blake2.h" -#include "blake2-impl.h" - -/*designed by the Lyra PHC team */ -static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) { - const uint64_t m = UINT64_C(0xFFFFFFFF); - const uint64_t xy = (x & m) * (y & m); - return x + y + 2 * xy; -} - -#define G(a, b, c, d) \ - do { \ - a = fBlaMka(a, b); \ - d = rotr64(d ^ a, 32); \ - c = fBlaMka(c, d); \ - b = rotr64(b ^ c, 24); \ - a = fBlaMka(a, b); \ - d = rotr64(d ^ a, 16); \ - c = fBlaMka(c, d); \ - b = rotr64(b ^ c, 63); \ - } while ((void)0, 0) - -#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \ - v12, v13, v14, v15) \ - do { \ - G(v0, v4, v8, v12); \ - G(v1, v5, v9, v13); \ - G(v2, v6, v10, v14); \ - G(v3, v7, v11, v15); \ - G(v0, v5, v10, v15); \ - G(v1, v6, v11, v12); \ - G(v2, v7, v8, v13); \ - G(v3, v4, v9, v14); \ - } while ((void)0, 0) - -#endif diff --git a/algo/argon2/argon2a/ar2/blake2b.c b/algo/argon2/argon2a/ar2/blake2b.c deleted file mode 100644 index 90f2e0ec..00000000 --- a/algo/argon2/argon2a/ar2/blake2b.c +++ /dev/null @@ -1,316 +0,0 @@ -#include -#include -#include -#include - -#include "blake2/blake2.h" -#include "blake2/blake2-impl.h" - -#if defined(_MSC_VER) -// i know there is a trick but nvm :p -#define PRIu64 "%llu" -#define PRIx64 "%llx" -#endif - -static const uint64_t blake2b_IV[8] = { - UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), - UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), - UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), - UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) -}; - -static const unsigned int blake2b_sigma[12][16] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, - {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, - {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, - {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, - {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, - {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, - {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, -}; - -static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) { - S->f[1] = (uint64_t)-1; -} - -static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) { - if (S->last_node) { - blake2b_set_lastnode(S); - } - S->f[0] = (uint64_t)-1; -} - -static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S, uint64_t inc) { - S->t[0] += inc; - S->t[1] += (S->t[0] < inc); -} - -static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) { - burn(S, sizeof(*S)); /* wipe */ - blake2b_set_lastblock(S); /* invalidate for further use */ -} - -static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) { - memset(S, 0, sizeof(*S)); - memcpy(S->h, blake2b_IV, sizeof(S->h)); -} - -/* -void print_state(blake2b_state BlakeHash) -{ - printf(".h = {UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 "),\n" - "UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 "),\n" - "UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 "),\n" - "UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 ")},\n" - ".t = {UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 ")},\n" - ".f = {UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 ")}\n", - BlakeHash.h[0], BlakeHash.h[1], BlakeHash.h[2], BlakeHash.h[3], - BlakeHash.h[4], BlakeHash.h[5], BlakeHash.h[6], BlakeHash.h[7], - BlakeHash.t[0], BlakeHash.t[1], - BlakeHash.f[0], BlakeHash.f[1]); - printf(".buf = {"); - for (register uint8_t i = 0; i < BLAKE2B_BLOCKBYTES; i++) - printf("%" PRIu8 ", ", BlakeHash.buf[i]); - puts("\n"); - printf("}\n.buflen = %d\n.outlen = %d\n", - BlakeHash.buflen, BlakeHash.outlen); - printf(".last_node = %" PRIu8 "\n", BlakeHash.last_node); - fflush(stdout); -} -*/ - -static const blake2b_state miou = { - .h = { - UINT64_C(7640891576939301128), UINT64_C(13503953896175478587), - UINT64_C(4354685564936845355), UINT64_C(11912009170470909681), - UINT64_C(5840696475078001361), UINT64_C(11170449401992604703), - UINT64_C(2270897969802886507), UINT64_C(6620516959819538809) - }, - .t = {UINT64_C(0), UINT64_C(0)}, - .f = {UINT64_C(0), UINT64_C(0)}, - .buf = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }, - .buflen = 0, - .outlen = 64, - .last_node = 0 -}; - - -int ar2_blake2b_init_param(blake2b_state *S, const blake2b_param *P) -{ - const unsigned char *p = (const unsigned char *)P; - unsigned int i; - - if (NULL == P || NULL == S) { - return -1; - } - - blake2b_init0(S); - /* IV XOR Parameter Block */ - for (i = 0; i < 8; ++i) { - S->h[i] ^= load64(&p[i * sizeof(S->h[i])]); - } - S->outlen = P->digest_length; - return 0; -} - -void compare_buffs(uint64_t *h, size_t outlen) -{ - // printf("CMP : %d", memcmp(h, miou.h, 8*(sizeof(uint64_t)))); - printf("miou : %" PRIu64 " - h : %" PRIu64 " - outlen : %ld\n", miou.h[0], h[0], outlen); - fflush(stdout); -} - -/* Sequential blake2b initialization */ -int ar2_blake2b_init(blake2b_state *S, size_t outlen) -{ - memcpy(S, &miou, sizeof(*S)); - S->h[0] += outlen; - return 0; -} - -void print64(const char *name, const uint64_t *array, uint16_t size) -{ - printf("%s = {", name); - for (uint8_t i = 0; i < size; i++) printf("UINT64_C(%" PRIu64 "), ", array[i]); - printf("};\n"); -} - -int ar2_blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen) -{ - return 0; -} - -static void blake2b_compress(blake2b_state *S, const uint8_t *block) -{ - uint64_t m[16]; - uint64_t v[16]; - unsigned int i, r; - - for (i = 0; i < 16; ++i) { - m[i] = load64(block + i * 8); - } - - for (i = 0; i < 8; ++i) { - v[i] = S->h[i]; - } - - v[8] = blake2b_IV[0]; - v[9] = blake2b_IV[1]; - v[10] = blake2b_IV[2]; - v[11] = blake2b_IV[3]; - v[12] = blake2b_IV[4] ^ S->t[0]; - v[13] = blake2b_IV[5]/* ^ S->t[1]*/; - v[14] = blake2b_IV[6] ^ S->f[0]; - v[15] = blake2b_IV[7]/* ^ S->f[1]*/; - -#define G(r, i, a, b, c, d) \ - do { \ - a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ - d = rotr64(d ^ a, 32); \ - c = c + d; \ - b = rotr64(b ^ c, 24); \ - a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ - d = rotr64(d ^ a, 16); \ - c = c + d; \ - b = rotr64(b ^ c, 63); \ - } while ((void)0, 0) - -#define ROUND(r) \ - do { \ - G(r, 0, v[0], v[4], v[8], v[12]); \ - G(r, 1, v[1], v[5], v[9], v[13]); \ - G(r, 2, v[2], v[6], v[10], v[14]); \ - G(r, 3, v[3], v[7], v[11], v[15]); \ - G(r, 4, v[0], v[5], v[10], v[15]); \ - G(r, 5, v[1], v[6], v[11], v[12]); \ - G(r, 6, v[2], v[7], v[8], v[13]); \ - G(r, 7, v[3], v[4], v[9], v[14]); \ - } while ((void)0, 0) - - for (r = 0; r < 12; ++r) ROUND(r); - - for (i = 0; i < 8; ++i) S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; - -#undef G -#undef ROUND -} - -int ar2_blake2b_update(blake2b_state *S, const void *in, size_t inlen) -{ - const uint8_t *pin = (const uint8_t *)in; - /* Complete current block */ - memcpy(&S->buf[4], pin, 124); - blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); - blake2b_compress(S, S->buf); - S->buflen = 0; - pin += 124; - - register int8_t i = 7; - /* Avoid buffer copies when possible */ - while (i--) { - blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); - blake2b_compress(S, pin); - pin += BLAKE2B_BLOCKBYTES; - } - memcpy(&S->buf[S->buflen], pin, 4); - S->buflen += 4; - return 0; -} - -void my_blake2b_update(blake2b_state *S, const void *in, size_t inlen) -{ - memcpy(&S->buf[S->buflen], in, inlen); - S->buflen += (unsigned int)inlen; -} - -int ar2_blake2b_final(blake2b_state *S, void *out, size_t outlen) -{ - uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; - unsigned int i; - - blake2b_increment_counter(S, S->buflen); - blake2b_set_lastblock(S); - memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ - blake2b_compress(S, S->buf); - - for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ - store64(buffer + sizeof(S->h[i]) * i, S->h[i]); - } - - memcpy(out, buffer, S->outlen); - - burn(buffer, sizeof(buffer)); - burn(S->buf, sizeof(S->buf)); - burn(S->h, sizeof(S->h)); - return 0; -} - -int ar2_blake2b(void *out, const void *in, const void *key, size_t keylen) -{ - blake2b_state S; - - ar2_blake2b_init(&S, 64); - my_blake2b_update(&S, in, 64); - ar2_blake2b_final(&S, out, 64); - burn(&S, sizeof(S)); - return 0; -} - -void ar2_blake2b_too(void *pout, const void *in) -{ - uint8_t *out = (uint8_t *)pout; - uint8_t out_buffer[64]; - uint8_t in_buffer[64]; - - blake2b_state blake_state; - ar2_blake2b_init(&blake_state, 64); - blake_state.buflen = blake_state.buf[1] = 4; - my_blake2b_update(&blake_state, in, 72); - ar2_blake2b_final(&blake_state, out_buffer, 64); - memcpy(out, out_buffer, 32); - out += 32; - - register uint8_t i = 29; - while (i--) { - memcpy(in_buffer, out_buffer, 64); - ar2_blake2b(out_buffer, in_buffer, NULL, 0); - memcpy(out, out_buffer, 32); - out += 32; - } - - memcpy(in_buffer, out_buffer, 64); - ar2_blake2b(out_buffer, in_buffer, NULL, 0); - memcpy(out, out_buffer, 64); - - burn(&blake_state, sizeof(blake_state)); -} - -/* Argon2 Team - Begin Code */ -int ar2_blake2b_long(void *pout, const void *in) -{ - uint8_t *out = (uint8_t *)pout; - blake2b_state blake_state; - uint8_t outlen_bytes[sizeof(uint32_t)] = {0}; - - store32(outlen_bytes, 32); - - ar2_blake2b_init(&blake_state, 32); - my_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)); - ar2_blake2b_update(&blake_state, in, 1024); - ar2_blake2b_final(&blake_state, out, 32); - burn(&blake_state, sizeof(blake_state)); - return 0; -} -/* Argon2 Team - End Code */ diff --git a/algo/argon2/argon2a/ar2/cores.c b/algo/argon2/argon2a/ar2/cores.c deleted file mode 100644 index 152a0ba6..00000000 --- a/algo/argon2/argon2a/ar2/cores.c +++ /dev/null @@ -1,349 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -/*For memory wiping*/ -#ifdef _MSC_VER -#include -#include /* For SecureZeroMemory */ -#endif -#if defined __STDC_LIB_EXT1__ -#define __STDC_WANT_LIB_EXT1__ 1 -#endif -#define VC_GE_2005(version) (version >= 1400) - -#include -#include -#include -#include - -#include "argon2.h" -#include "cores.h" -#include "blake2/blake2.h" -#include "blake2/blake2-impl.h" - -#ifdef GENKAT -#include "genkat.h" -#endif - -#if defined(__clang__) -#if __has_attribute(optnone) -#define NOT_OPTIMIZED __attribute__((optnone)) -#endif -#elif defined(__GNUC__) -#define GCC_VERSION \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) -#if GCC_VERSION >= 40400 -#define NOT_OPTIMIZED __attribute__((optimize("O0"))) -#endif -#endif -#ifndef NOT_OPTIMIZED -#define NOT_OPTIMIZED -#endif - -/***************Instance and Position constructors**********/ -void ar2_init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); } -//inline void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); } - -void ar2_copy_block(block *dst, const block *src) { -//inline void copy_block(block *dst, const block *src) { - memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_WORDS_IN_BLOCK); -} - -void ar2_xor_block(block *dst, const block *src) { -//inline void xor_block(block *dst, const block *src) { - int i; - for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) { - dst->v[i] ^= src->v[i]; - } -} - -static void ar2_load_block(block *dst, const void *input) { -//static inline void load_block(block *dst, const void *input) { - unsigned i; - for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) { - dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i])); - } -} - -static void ar2_store_block(void *output, const block *src) { -//static inline void store_block(void *output, const block *src) { - unsigned i; - for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) { - store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]); - } -} - -/***************Memory allocators*****************/ -int ar2_allocate_memory(block **memory, uint32_t m_cost) { - if (memory != NULL) { - size_t memory_size = sizeof(block) * m_cost; - if (m_cost != 0 && - memory_size / m_cost != - sizeof(block)) { /*1. Check for multiplication overflow*/ - return ARGON2_MEMORY_ALLOCATION_ERROR; - } - - *memory = (block *)malloc(memory_size); /*2. Try to allocate*/ - - if (!*memory) { - return ARGON2_MEMORY_ALLOCATION_ERROR; - } - - return ARGON2_OK; - } else { - return ARGON2_MEMORY_ALLOCATION_ERROR; - } -} - -void ar2_secure_wipe_memory(void *v, size_t n) { memset(v, 0, n); } -//inline void secure_wipe_memory(void *v, size_t n) { memset(v, 0, n); } - -/*********Memory functions*/ - -void ar2_clear_memory(argon2_instance_t *instance, int clear) { -//inline void clear_memory(argon2_instance_t *instance, int clear) { - if (instance->memory != NULL && clear) { - ar2_secure_wipe_memory(instance->memory, - sizeof(block) * /*instance->memory_blocks*/16); - } -} - -void ar2_free_memory(block *memory) { free(memory); } -//inline void free_memory(block *memory) { free(memory); } - -void ar2_finalize(const argon2_context *context, argon2_instance_t *instance) { - if (context != NULL && instance != NULL) { - block blockhash; - ar2_copy_block(&blockhash, instance->memory + 15); - - /* Hash the result */ - { - uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; - ar2_store_block(blockhash_bytes, &blockhash); - ar2_blake2b_long(context->out, blockhash_bytes); - ar2_secure_wipe_memory(blockhash.v, ARGON2_BLOCK_SIZE); - ar2_secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); /* clear blockhash_bytes */ - } - -#ifdef GENKAT - print_tag(context->out, context->outlen); -#endif - - /* Clear memory */ - // clear_memory(instance, 1); - - ar2_free_memory(instance->memory); - } -} - -uint32_t ar2_index_alpha(const argon2_instance_t *instance, - const argon2_position_t *position, uint32_t pseudo_rand, - int same_lane) { - /* - * Pass 0: - * This lane : all already finished segments plus already constructed - * blocks in this segment - * Other lanes : all already finished segments - * Pass 1+: - * This lane : (SYNC_POINTS - 1) last segments plus already constructed - * blocks in this segment - * Other lanes : (SYNC_POINTS - 1) last segments - */ - uint32_t reference_area_size; - uint64_t relative_position; - uint32_t start_position, absolute_position; - - if (0 == position->pass) { - /* First pass */ - if (0 == position->slice) { - /* First slice */ - reference_area_size = - position->index - 1; /* all but the previous */ - } else { - if (same_lane) { - /* The same lane => add current segment */ - reference_area_size = - position->slice * 4 + - position->index - 1; - } else { - reference_area_size = - position->slice * 4 + - ((position->index == 0) ? (-1) : 0); - } - } - } else { - /* Second pass */ - if (same_lane) {reference_area_size = 11 + position->index;} - else {reference_area_size = 12 - (position->index == 0);} - } - - /* 1.2.4. Mapping pseudo_rand to 0.. and produce - * relative position */ - relative_position = pseudo_rand; - relative_position = relative_position * relative_position >> 32; - relative_position = reference_area_size - 1 - - (reference_area_size * relative_position >> 32); - - /* 1.2.5 Computing starting position */ - start_position = 0; - - if (0 != position->pass) { - start_position = (position->slice == ARGON2_SYNC_POINTS - 1) - ? 0 : (position->slice + 1) * 4; - } - - /* 1.2.6. Computing absolute position */ - absolute_position = (start_position + relative_position) % 16; - return absolute_position; -} - -void ar2_fill_memory_blocks(argon2_instance_t *instance) { - uint32_t r, s; - - for (r = 0; r < 2; ++r) { - for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { - - argon2_position_t position; - position.pass = r; - position.lane = 0; - position.slice = (uint8_t)s; - position.index = 0; - ar2_fill_segment(instance, position); - } - -#ifdef GENKAT - internal_kat(instance, r); /* Print all memory blocks */ -#endif - } -} - -void ar2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) { - /* Make the first and second block in each lane as G(H0||i||0) or - G(H0||i||1) */ - uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0); - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, 0); - ar2_blake2b_too(blockhash_bytes, blockhash); - ar2_load_block(&instance->memory[0], blockhash_bytes); - - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1); - ar2_blake2b_too(blockhash_bytes, blockhash); - ar2_load_block(&instance->memory[1], blockhash_bytes); - ar2_secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); -} - - -static const blake2b_state base_hash = { - .h = { - UINT64_C(7640891576939301192), UINT64_C(13503953896175478587), - UINT64_C(4354685564936845355), UINT64_C(11912009170470909681), - UINT64_C(5840696475078001361), UINT64_C(11170449401992604703), - UINT64_C(2270897969802886507), UINT64_C(6620516959819538809) - }, - .t = {UINT64_C(0),UINT64_C(0)}, - .f = {UINT64_C(0),UINT64_C(0)}, - .buf = { - 1, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 2, 0, 0, 0, 16, 0, 0, 0, 1, 0, - 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - .buflen = 28, - .outlen = 64, - .last_node = 0 -}; - -#define PWDLEN 32 -#define SALTLEN 32 -#define SECRETLEN 0 -#define ADLEN 0 -void ar2_initial_hash(uint8_t *blockhash, argon2_context *context, - argon2_type type) { - - uint8_t value[sizeof(uint32_t)]; - - /* Is it generating cache invalidation between cores ? */ - blake2b_state BlakeHash = base_hash; - BlakeHash.buf[20] = (uint8_t) type; - my_blake2b_update(&BlakeHash, (const uint8_t *)context->pwd, - PWDLEN); - - - ar2_secure_wipe_memory(context->pwd, PWDLEN); - context->pwdlen = 0; - - store32(&value, SALTLEN); - my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - my_blake2b_update(&BlakeHash, (const uint8_t *)context->salt, - SALTLEN); - - store32(&value, SECRETLEN); - my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, ADLEN); - my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - ar2_blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); -} - -int ar2_initialize(argon2_instance_t *instance, argon2_context *context) { - /* 1. Memory allocation */ - - - ar2_allocate_memory(&(instance->memory), 16); - - /* 2. Initial hashing */ - /* H_0 + 8 extra bytes to produce the first blocks */ - /* Hashing all inputs */ - uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; - ar2_initial_hash(blockhash, context, instance->type); - /* Zeroing 8 extra bytes */ - ar2_secure_wipe_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, - ARGON2_PREHASH_SEED_LENGTH - - ARGON2_PREHASH_DIGEST_LENGTH); - -#ifdef GENKAT - initial_kat(blockhash, context, instance->type); -#endif - - /* 3. Creating first blocks, we always have at least two blocks in a slice - */ - ar2_fill_first_blocks(blockhash, instance); - /* Clearing the hash */ - ar2_secure_wipe_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH); - - return ARGON2_OK; -} - -int ar2_argon2_core(argon2_context *context, argon2_type type) { - argon2_instance_t instance; - instance.memory = NULL; - instance.type = type; - - /* 3. Initialization: Hashing inputs, allocating memory, filling first - * blocks - */ - - int result = ar2_initialize(&instance, context); - if (ARGON2_OK != result) return result; - - /* 4. Filling memory */ - ar2_fill_memory_blocks(&instance); - - /* 5. Finalization */ - ar2_finalize(context, &instance); - - return ARGON2_OK; -} diff --git a/algo/argon2/argon2a/ar2/cores.h b/algo/argon2/argon2a/ar2/cores.h deleted file mode 100644 index e3f183fa..00000000 --- a/algo/argon2/argon2a/ar2/cores.h +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#ifndef ARGON2_CORES_H -#define ARGON2_CORES_H - -#if defined(_MSC_VER) -#include -#include -#define ALIGN(n) __declspec(align(n)) -#elif defined(__GNUC__) || defined(__clang) -#define ALIGN(x) __attribute__((__aligned__(x))) -#else -#define ALIGN(x) -#endif - -/*************************Argon2 internal - * constants**************************************************/ - -enum argon2_core_constants { - /* Version of the algorithm */ - ARGON2_VERSION_NUMBER = 0x10, - - /* Memory block size in bytes */ - ARGON2_BLOCK_SIZE = 1024, - ARGON2_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8, - ARGON2_QWORDS_IN_BLOCK = 64, - - /* Number of pseudo-random values generated by one call to Blake in Argon2i - to - generate reference block positions */ - ARGON2_ADDRESSES_IN_BLOCK = 128, - - /* Pre-hashing digest length and its extension*/ - ARGON2_PREHASH_DIGEST_LENGTH = 64, - ARGON2_PREHASH_SEED_LENGTH = 72 -}; - -/* Argon2 primitive type */ -typedef enum Argon2_type { Argon2_d = 0, Argon2_i = 1 } argon2_type; - -/*************************Argon2 internal data - * types**************************************************/ - -/* - * Structure for the (1KB) memory block implemented as 128 64-bit words. - * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no - * bounds checking). - */ -typedef struct _block { uint64_t v[ARGON2_WORDS_IN_BLOCK]; } ALIGN(16) block; - -/*****************Functions that work with the block******************/ - -/* Initialize each byte of the block with @in */ -void ar2_init_block_value(block *b, uint8_t in); - -/* Copy block @src to block @dst */ -void ar2_copy_block(block *dst, const block *src); - -/* XOR @src onto @dst bytewise */ -void ar2_xor_block(block *dst, const block *src); - -/* - * Argon2 instance: memory pointer, number of passes, amount of memory, type, - * and derived values. - * Used to evaluate the number and location of blocks to construct in each - * thread - */ -typedef struct Argon2_instance_t { - block *memory; /* Memory pointer */ - argon2_type type; - int print_internals; /* whether to print the memory blocks */ -} argon2_instance_t; - -/* - * Argon2 position: where we construct the block right now. Used to distribute - * work between threads. - */ -typedef struct Argon2_position_t { - uint32_t pass; - uint32_t lane; - uint8_t slice; - uint32_t index; -} argon2_position_t; - -/*************************Argon2 core - * functions**************************************************/ - -/* Allocates memory to the given pointer - * @param memory pointer to the pointer to the memory - * @param m_cost number of blocks to allocate in the memory - * @return ARGON2_OK if @memory is a valid pointer and memory is allocated - */ -int ar2_allocate_memory(block **memory, uint32_t m_cost); - -/* Function that securely cleans the memory - * @param mem Pointer to the memory - * @param s Memory size in bytes - */ -void ar2_secure_wipe_memory(void *v, size_t n); - -/* Clears memory - * @param instance pointer to the current instance - * @param clear_memory indicates if we clear the memory with zeros. - */ -void ar2_clear_memory(argon2_instance_t *instance, int clear); - -/* Deallocates memory - * @param memory pointer to the blocks - */ -void ar2_free_memory(block *memory); - -/* - * Computes absolute position of reference block in the lane following a skewed - * distribution and using a pseudo-random value as input - * @param instance Pointer to the current instance - * @param position Pointer to the current position - * @param pseudo_rand 32-bit pseudo-random value used to determine the position - * @param same_lane Indicates if the block will be taken from the current lane. - * If so we can reference the current segment - * @pre All pointers must be valid - */ -uint32_t ar2_index_alpha(const argon2_instance_t *instance, - const argon2_position_t *position, uint32_t pseudo_rand, - int same_lane); - -/* - * Function that validates all inputs against predefined restrictions and return - * an error code - * @param context Pointer to current Argon2 context - * @return ARGON2_OK if everything is all right, otherwise one of error codes - * (all defined in - */ -int ar2_validate_inputs(const argon2_context *context); - -/* - * Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears - * password and secret if needed - * @param context Pointer to the Argon2 internal structure containing memory - * pointer, and parameters for time and space requirements. - * @param blockhash Buffer for pre-hashing digest - * @param type Argon2 type - * @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes - * allocated - */ -void ar2_initial_hash(uint8_t *blockhash, argon2_context *context, - argon2_type type); - -/* - * Function creates first 2 blocks per lane - * @param instance Pointer to the current instance - * @param blockhash Pointer to the pre-hashing digest - * @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values - */ -void ar2_fill_firsts_blocks(uint8_t *blockhash, const argon2_instance_t *instance); - -/* - * Function allocates memory, hashes the inputs with Blake, and creates first - * two blocks. Returns the pointer to the main memory with 2 blocks per lane - * initialized - * @param context Pointer to the Argon2 internal structure containing memory - * pointer, and parameters for time and space requirements. - * @param instance Current Argon2 instance - * @return Zero if successful, -1 if memory failed to allocate. @context->state - * will be modified if successful. - */ -int ar2_initialize(argon2_instance_t *instance, argon2_context *context); - -/* - * XORing the last block of each lane, hashing it, making the tag. Deallocates - * the memory. - * @param context Pointer to current Argon2 context (use only the out parameters - * from it) - * @param instance Pointer to current instance of Argon2 - * @pre instance->state must point to necessary amount of memory - * @pre context->out must point to outlen bytes of memory - * @pre if context->free_cbk is not NULL, it should point to a function that - * deallocates memory - */ -void ar2_finalize(const argon2_context *context, argon2_instance_t *instance); - -/* - * Function that fills the segment using previous segments also from other - * threads - * @param instance Pointer to the current instance - * @param position Current position - * @pre all block pointers must be valid - */ -void ar2_fill_segment(const argon2_instance_t *instance, - argon2_position_t position); - -/* - * Function that fills the entire memory t_cost times based on the first two - * blocks in each lane - * @param instance Pointer to the current instance - */ -void ar2_fill_memory_blocks(argon2_instance_t *instance); - -/* - * Function that performs memory-hard hashing with certain degree of parallelism - * @param context Pointer to the Argon2 internal structure - * @return Error code if smth is wrong, ARGON2_OK otherwise - */ -int ar2_argon2_core(argon2_context *context, argon2_type type); - -#endif diff --git a/algo/argon2/argon2a/ar2/genkat.c.hide b/algo/argon2/argon2a/ar2/genkat.c.hide deleted file mode 100644 index 07042b2a..00000000 --- a/algo/argon2/argon2a/ar2/genkat.c.hide +++ /dev/null @@ -1,186 +0,0 @@ -#include -#include -#include -#include - -#include "argon2.h" -#include "cores.h" - -void initial_kat(const uint8_t *blockhash, const argon2_context *context, - argon2_type type) -{ - unsigned i; - - if (blockhash != NULL && context != NULL) { - printf("======================================="); - - switch (type) { - case Argon2_d: - printf("Argon2d\n"); - break; - - case Argon2_i: - printf("Argon2i\n"); - break; - - default: - break; - } - - printf("Memory: %u KiB, Iterations: %u, Parallelism: %u lanes, Tag " - "length: %u bytes\n", - context->m_cost, context->t_cost, context->lanes, - context->outlen); - - printf("Password[%u]: ", context->pwdlen); - - if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { - printf("CLEARED\n"); - } else { - for (i = 0; i < context->pwdlen; ++i) { - printf("%2.2x ", ((unsigned char *)context->pwd)[i]); - } - - printf("\n"); - } - - printf("Salt[%u]: ", context->saltlen); - - for (i = 0; i < context->saltlen; ++i) { - printf("%2.2x ", ((unsigned char *)context->salt)[i]); - } - - printf("\n"); - - printf("Secret[%u]: ", context->secretlen); - - if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { - printf("CLEARED\n"); - } else { - for (i = 0; i < context->secretlen; ++i) { - printf("%2.2x ", ((unsigned char *)context->secret)[i]); - } - - printf("\n"); - } - - printf("Associated data[%u]: ", context->adlen); - - for (i = 0; i < context->adlen; ++i) { - printf("%2.2x ", ((unsigned char *)context->ad)[i]); - } - - printf("\n"); - - printf("Pre-hashing digest: "); - - for (i = 0; i < ARGON2_PREHASH_DIGEST_LENGTH; ++i) { - printf("%2.2x ", ((unsigned char *)blockhash)[i]); - } - - printf("\n"); - } -} - -void print_tag(const void *out, uint32_t outlen) -{ - unsigned i; - if (out != NULL) { - printf("Tag: "); - - for (i = 0; i < outlen; ++i) { - printf("%2.2x ", ((uint8_t *)out)[i]); - } - - printf("\n"); - } -} - -void internal_kat(const argon2_instance_t *instance, uint32_t pass) -{ - if (instance != NULL) { - uint32_t i, j; - printf("\n After pass %u:\n", pass); - - for (i = 0; i < instance->memory_blocks; ++i) { - uint32_t how_many_words = - (instance->memory_blocks > ARGON2_WORDS_IN_BLOCK) - ? 1 - : ARGON2_WORDS_IN_BLOCK; - - for (j = 0; j < how_many_words; ++j) - printf("Block %.4u [%3u]: %016" PRIx64 "\n", i, j, - instance->memory[i].v[j]); - } - } -} - -static void fatal(const char *error) { - fprintf(stderr, "Error: %s\n", error); - exit(1); -} - -static void generate_testvectors(const char *type) -{ -#define TEST_OUTLEN 32 -#define TEST_PWDLEN 32 -#define TEST_SALTLEN 16 -#define TEST_SECRETLEN 8 -#define TEST_ADLEN 12 - argon2_context context; - - unsigned char out[TEST_OUTLEN]; - unsigned char pwd[TEST_PWDLEN]; - unsigned char salt[TEST_SALTLEN]; - unsigned char secret[TEST_SECRETLEN]; - unsigned char ad[TEST_ADLEN]; - const allocate_fptr myown_allocator = NULL; - const deallocate_fptr myown_deallocator = NULL; - - unsigned t_cost = 3; - unsigned m_cost = 16; - unsigned lanes = 4; - - memset(pwd, 1, TEST_OUTLEN); - memset(salt, 2, TEST_SALTLEN); - memset(secret, 3, TEST_SECRETLEN); - memset(ad, 4, TEST_ADLEN); - - context.out = out; - context.outlen = TEST_OUTLEN; - context.pwd = pwd; - context.pwdlen = TEST_PWDLEN; - context.salt = salt; - context.saltlen = TEST_SALTLEN; - context.secret = secret; - context.secretlen = TEST_SECRETLEN; - context.ad = ad; - context.adlen = TEST_ADLEN; - context.t_cost = t_cost; - context.m_cost = m_cost; - context.lanes = lanes; - context.threads = lanes; - context.allocate_cbk = myown_allocator; - context.free_cbk = myown_deallocator; - context.flags = 0; - -#undef TEST_OUTLEN -#undef TEST_PWDLEN -#undef TEST_SALTLEN -#undef TEST_SECRETLEN -#undef TEST_ADLEN - - if (!strcmp(type, "d")) { - argon2d(&context); - } else if (!strcmp(type, "i")) { - argon2i(&context); - } else - fatal("wrong Argon2 type"); -} - -int main(int argc, char *argv[]) -{ - const char *type = (argc > 1) ? argv[1] : "i"; - generate_testvectors(type); - return ARGON2_OK; -} diff --git a/algo/argon2/argon2a/ar2/genkat.h.hide b/algo/argon2/argon2a/ar2/genkat.h.hide deleted file mode 100644 index 9c776bf5..00000000 --- a/algo/argon2/argon2a/ar2/genkat.h.hide +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#ifndef ARGON2_KAT_H -#define ARGON2_KAT_H - -/* - * Initial KAT function that prints the inputs to the file - * @param blockhash Array that contains pre-hashing digest - * @param context Holds inputs - * @param type Argon2 type - * @pre blockhash must point to INPUT_INITIAL_HASH_LENGTH bytes - * @pre context member pointers must point to allocated memory of size according - * to the length values - */ -void initial_kat(const uint8_t *blockhash, const argon2_context *context, - argon2_type type); - -/* - * Function that prints the output tag - * @param out output array pointer - * @param outlen digest length - * @pre out must point to @a outlen bytes - **/ -void print_tag(const void *out, uint32_t outlen); - -/* - * Function that prints the internal state at given moment - * @param instance pointer to the current instance - * @param pass current pass number - * @pre instance must have necessary memory allocated - **/ -void internal_kat(const argon2_instance_t *instance, uint32_t pass); - -#endif diff --git a/algo/argon2/argon2a/ar2/opt.c b/algo/argon2/argon2a/ar2/opt.c deleted file mode 100644 index feda8672..00000000 --- a/algo/argon2/argon2a/ar2/opt.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#include -#include -#include -#include -#include - -#include - -#include "argon2.h" -#include "cores.h" -#include "opt.h" - -#include "blake2/blake2.h" -#include "blake2/blamka-round-opt.h" - -void ar2_fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block) -{ - __m128i ALIGN(16) block_XY[ARGON2_QWORDS_IN_BLOCK]; - uint32_t i; - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; i++) { - block_XY[i] = state[i] = _mm_xor_si128( - state[i], _mm_load_si128(&ref_block[i])); - } - - BLAKE2_ROUND(state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7]); - BLAKE2_ROUND(state[8], state[9], state[10], state[11], state[12], state[13], state[14], state[15]); - BLAKE2_ROUND(state[16], state[17], state[18], state[19], state[20], state[21], state[22], state[23]); - BLAKE2_ROUND(state[24], state[25], state[26], state[27], state[28], state[29], state[30], state[31]); - BLAKE2_ROUND(state[32], state[33], state[34], state[35], state[36], state[37], state[38], state[39]); - BLAKE2_ROUND(state[40], state[41], state[42], state[43], state[44], state[45], state[46], state[47]); - BLAKE2_ROUND(state[48], state[49], state[50], state[51], state[52], state[53], state[54], state[55]); - BLAKE2_ROUND(state[56], state[57], state[58], state[59], state[60], state[61], state[62], state[63]); - /*for (i = 0; i < 8; ++i) { - BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], - state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], - state[8 * i + 6], state[8 * i + 7]); - }*/ - - BLAKE2_ROUND(state[0], state[8], state[16], state[24], state[32], state[40], state[48], state[56]); - BLAKE2_ROUND(state[1], state[9], state[17], state[25], state[33], state[41], state[49], state[57]); - BLAKE2_ROUND(state[2], state[10], state[18], state[26], state[34], state[42], state[50], state[58]); - BLAKE2_ROUND(state[3], state[11], state[19], state[27], state[35], state[43], state[51], state[59]); - BLAKE2_ROUND(state[4], state[12], state[20], state[28], state[36], state[44], state[52], state[60]); - BLAKE2_ROUND(state[5], state[13], state[21], state[29], state[37], state[45], state[53], state[61]); - BLAKE2_ROUND(state[6], state[14], state[22], state[30], state[38], state[46], state[54], state[62]); - BLAKE2_ROUND(state[7], state[15], state[23], state[31], state[39], state[47], state[55], state[63]); - /*for (i = 0; i < 8; ++i) { - BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], - state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], - state[8 * 6 + i], state[8 * 7 + i]); - }*/ - - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; i++) { - state[i] = _mm_xor_si128(state[i], block_XY[i]); - _mm_storeu_si128(&next_block[i], state[i]); - } -} - -static const uint64_t bad_rands[32] = { - UINT64_C(17023632018251376180), UINT64_C(4911461131397773491), - UINT64_C(15927076453364631751), UINT64_C(7860239898779391109), - - UINT64_C(11820267568857244377), UINT64_C(12188179869468676617), - UINT64_C(3732913385414474778), UINT64_C(7651458777762572084), - - UINT64_C(3062274162574341415), UINT64_C(17922653540258786897), - UINT64_C(17393848266100524980), UINT64_C(8539695715554563839), - - UINT64_C(13824538050656654359), UINT64_C(12078939433126460936), - UINT64_C(15331979418564540430), UINT64_C(12058346794217174273), - - UINT64_C(13593922096015221049), UINT64_C(18356682276374416500), - UINT64_C(4968040514092703824), UINT64_C(11202790346130235567), - - UINT64_C(2276229735041314644), UINT64_C(220837743321691382), - UINT64_C(4861211596230784273), UINT64_C(6330592584132590331), - - UINT64_C(3515580430960296763), UINT64_C(9869356316971855173), - UINT64_C(485533243489193056), UINT64_C(14596447761048148032), - - UINT64_C(16531790085730132900), UINT64_C(17328824500878824371), - UINT64_C(8548260058287621283), UINT64_C(8641748798041936364) -}; - -void ar2_generate_addresses(const argon2_instance_t *instance, - const argon2_position_t *position, - uint64_t *pseudo_rands) -{ - uint8_t offset = position->pass * 16 + position->slice * 4; - pseudo_rands[0] = bad_rands[offset++]; - pseudo_rands[1] = bad_rands[offset++]; - pseudo_rands[2] = bad_rands[offset++]; - pseudo_rands[3] = bad_rands[offset++]; - - /*if ((position->pass == 1 && position->slice == 3)) - print64("pseudo_rands", pseudo_rands, 4);*/ -} - -#define SEGMENT_LENGTH 4 -#define LANE_LENGTH 16 -#define POS_LANE 0 - -void ar2_fill_segment(const argon2_instance_t *instance, - argon2_position_t position) -{ - block *ref_block = NULL, *curr_block = NULL; - uint64_t pseudo_rand, ref_index; - uint32_t prev_offset, curr_offset; - uint8_t i; - __m128i state[64]; - int data_independent_addressing = (instance->type == Argon2_i); - - /* Pseudo-random values that determine the reference block position */ - uint64_t *pseudo_rands = NULL; - - pseudo_rands = (uint64_t *)malloc(/*sizeof(uint64_t) * 4*/32); - - if (data_independent_addressing) { - ar2_generate_addresses(instance, &position, pseudo_rands); - } - - i = 0; - - if ((0 == position.pass) && (0 == position.slice)) { - i = 2; /* we have already generated the first two blocks */ - } - - /*printf("Position.lane = %d\nPosition.slice = %d\nStarting index : %d\n", position.lane, position.slice, starting_index);*/ - /* Offset of the current block */ - curr_offset = position.slice * 4 + i; - - if (0 == curr_offset % 16) { - /* Last block in this lane */ - prev_offset = curr_offset + /*instance->lane_length - 1*/15; - } else { - /* Previous block */ - prev_offset = curr_offset - 1; - } - - memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); - - for (; i < SEGMENT_LENGTH; - ++i, ++curr_offset, ++prev_offset) { - /*1.1 Rotating prev_offset if needed */ - if (curr_offset % LANE_LENGTH == 1) { - prev_offset = curr_offset - 1; - } - - /* 1.2 Computing the index of the reference block */ - /* 1.2.1 Taking pseudo-random value from the previous block */ - if (data_independent_addressing) { - pseudo_rand = pseudo_rands[i]; - } else { - pseudo_rand = instance->memory[prev_offset].v[0]; - } - - /* 1.2.2 Computing the lane of the reference block */ - - /* 1.2.3 Computing the number of possible reference block within the - * lane. - */ - position.index = i; - ref_index = ar2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,1); - - /* 2 Creating a new block */ - ref_block = instance->memory + ref_index; - curr_block = instance->memory + curr_offset; - ar2_fill_block(state, (__m128i const *)ref_block->v, (__m128i *)curr_block->v); - } - - free(pseudo_rands); -} diff --git a/algo/argon2/argon2a/ar2/opt.h b/algo/argon2/argon2a/ar2/opt.h deleted file mode 100644 index 8e3b5aac..00000000 --- a/algo/argon2/argon2a/ar2/opt.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#ifndef ARGON2_OPT_H -#define ARGON2_OPT_H - -/* - * Function fills a new memory block. Differs from the - * @param state Pointer to the just produced block. Content will be updated(!) - * @param ref_block Pointer to the reference block - * @param next_block Pointer to the block to be constructed - * @pre all block pointers must be valid - */ -void ar2_fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block); - -/* - * Generate pseudo-random values to reference blocks in the segment and puts - * them into the array - * @param instance Pointer to the current instance - * @param position Pointer to the current position - * @param pseudo_rands Pointer to the array of 64-bit values - * @pre pseudo_rands must point to @a instance->segment_length allocated values - */ -void ar2_generate_addresses(const argon2_instance_t *instance, - const argon2_position_t *position, - uint64_t *pseudo_rands); - -/* - * Function that fills the segment using previous segments also from other - * threads. - * Identical to the reference code except that it calls optimized FillBlock() - * @param instance Pointer to the current instance - * @param position Current position - * @pre all block pointers must be valid - */ -void ar2_fill_segment(const argon2_instance_t *instance, - argon2_position_t position); - -#endif /* ARGON2_OPT_H */ diff --git a/algo/argon2/argon2a/ar2/ref.c.hide b/algo/argon2/argon2a/ar2/ref.c.hide deleted file mode 100644 index 98ae07c9..00000000 --- a/algo/argon2/argon2a/ar2/ref.c.hide +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#include -#include -#include - -#include "argon2.h" -#include "cores.h" -#include "ref.h" - -#include "blake2/blamka-round-ref.h" -#include "blake2/blake2-impl.h" -#include "blake2/blake2.h" - -void fill_block(const block *prev_block, const block *ref_block, - block *next_block) { - block blockR, block_tmp; - unsigned i; - - copy_block(&blockR, ref_block); - xor_block(&blockR, prev_block); - copy_block(&block_tmp, &blockR); - - /* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then - (16,17,..31)... finally (112,113,...127) */ - for (i = 0; i < 8; ++i) { - BLAKE2_ROUND_NOMSG( - blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2], - blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5], - blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8], - blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11], - blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14], - blockR.v[16 * i + 15]); - } - - /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then - (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */ - for (i = 0; i < 8; i++) { - BLAKE2_ROUND_NOMSG( - blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16], - blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33], - blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64], - blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81], - blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112], - blockR.v[2 * i + 113]); - } - - copy_block(next_block, &block_tmp); - xor_block(next_block, &blockR); -} - -void generate_addresses(const argon2_instance_t *instance, - const argon2_position_t *position, - uint64_t *pseudo_rands) { - block zero_block, input_block, address_block; - uint32_t i; - - init_block_value(&zero_block, 0); - init_block_value(&input_block, 0); - init_block_value(&address_block, 0); - - if (instance != NULL && position != NULL) { - input_block.v[0] = position->pass; - input_block.v[1] = position->lane; - input_block.v[2] = position->slice; - input_block.v[3] = 16; - input_block.v[4] = 2; - input_block.v[5] = instance->type; - - for (i = 0; i < 4; ++i) { - if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { - input_block.v[6]++; - fill_block(&zero_block, &input_block, &address_block); - fill_block(&zero_block, &address_block, &address_block); - } - - pseudo_rands[i] = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; - } - } -} - -void fill_segment(const argon2_instance_t *instance, - argon2_position_t position) { - block *ref_block = NULL, *curr_block = NULL; - uint64_t pseudo_rand, ref_index, ref_lane; - uint32_t prev_offset, curr_offset; - uint32_t starting_index; - uint32_t i; - int data_independent_addressing = (instance->type == Argon2_i); - /* Pseudo-random values that determine the reference block position */ - uint64_t *pseudo_rands = NULL; - - if (instance == NULL) { - return; - } - - pseudo_rands = - (uint64_t *)malloc(sizeof(uint64_t) * 4); - - if (pseudo_rands == NULL) { - return; - } - - if (data_independent_addressing) { - generate_addresses(instance, &position, pseudo_rands); - } - - starting_index = 0; - - if ((0 == position.pass) && (0 == position.slice)) { - starting_index = 2; /* we have already generated the first two blocks */ - } - - /* Offset of the current block */ - curr_offset = position.lane * 16 + - position.slice * 4 + starting_index; - - if (0 == curr_offset % 16) { - /* Last block in this lane */ - prev_offset = curr_offset + 16 - 1; - } else { - /* Previous block */ - prev_offset = curr_offset - 1; - } - - for (i = starting_index; i < 4; ++i, ++curr_offset, ++prev_offset) { - /*1.1 Rotating prev_offset if needed */ - if (curr_offset % 16 == 1) { - prev_offset = curr_offset - 1; - } - - /* 1.2 Computing the index of the reference block */ - /* 1.2.1 Taking pseudo-random value from the previous block */ - if (data_independent_addressing) { - pseudo_rand = pseudo_rands[i]; - } else { - pseudo_rand = instance->memory[prev_offset].v[0]; - } - - /* 1.2.2 Computing the lane of the reference block */ - ref_lane = ((pseudo_rand >> 32)) % 1; - - if ((position.pass == 0) && (position.slice == 0)) { - /* Can not reference other lanes yet */ - ref_lane = position.lane; - } - - /* 1.2.3 Computing the number of possible reference block within the - * lane. - */ - position.index = i; - ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, - ref_lane == position.lane); - - /* 2 Creating a new block */ - ref_block = - instance->memory + 16 * ref_lane + ref_index; - curr_block = instance->memory + curr_offset; - fill_block(instance->memory + prev_offset, ref_block, curr_block); - } - - free(pseudo_rands); -} diff --git a/algo/argon2/argon2a/ar2/ref.h.hide b/algo/argon2/argon2a/ar2/ref.h.hide deleted file mode 100644 index 7ee22eef..00000000 --- a/algo/argon2/argon2a/ar2/ref.h.hide +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#ifndef ARGON2_REF_H -#define ARGON2_REF_H - -/* - * Function fills a new memory block - * @param prev_block Pointer to the previous block - * @param ref_block Pointer to the reference block - * @param next_block Pointer to the block to be constructed - * @pre all block pointers must be valid - */ -void fill_block(const block *prev_block, const block *ref_block, - block *next_block); - -/* - * Generate pseudo-random values to reference blocks in the segment and puts - * them into the array - * @param instance Pointer to the current instance - * @param position Pointer to the current position - * @param pseudo_rands Pointer to the array of 64-bit values - * @pre pseudo_rands must point to @a instance->segment_length allocated values - */ -void generate_addresses(const argon2_instance_t *instance, - const argon2_position_t *position, - uint64_t *pseudo_rands); - -/* - * Function that fills the segment using previous segments also from other - * threads - * @param instance Pointer to the current instance - * @param position Current position - * @pre all block pointers must be valid - */ -void fill_segment(const argon2_instance_t *instance, - argon2_position_t position); - -#endif /* ARGON2_REF_H */ diff --git a/algo/argon2/argon2a/ar2/run.c.hide b/algo/argon2/argon2a/ar2/run.c.hide deleted file mode 100644 index 2b1b30a3..00000000 --- a/algo/argon2/argon2a/ar2/run.c.hide +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#include -#include -#include -#include -#include -#include - -#include "argon2.h" -#include "cores.h" - -#define T_COST_DEF 3 -#define LOG_M_COST_DEF 12 /* 2^12 = 4 MiB */ -#define LANES_DEF 1 -#define THREADS_DEF 1 -#define OUT_LEN 32 -#define SALT_LEN 16 - -#define UNUSED_PARAMETER(x) (void)(x) - -static void usage(const char *cmd) { - printf("Usage: %s pwd salt [-y version] [-t iterations] [-m memory] [-p " - "parallelism]\n", - cmd); - - printf("Parameters:\n"); - printf("\tpwd\t\tThe password to hash\n"); - printf("\tsalt\t\tThe salt to use, at most 16 characters\n"); - printf("\t-d\t\tUse Argon2d instead of Argon2i (which is the default)\n"); - printf("\t-t N\t\tSets the number of iterations to N (default = %d)\n", - T_COST_DEF); - printf("\t-m N\t\tSets the memory usage of 2^N KiB (default %d)\n", - LOG_M_COST_DEF); - printf("\t-p N\t\tSets parallelism to N threads (default %d)\n", - THREADS_DEF); -} - -static void fatal(const char *error) { - fprintf(stderr, "Error: %s\n", error); - exit(1); -} - -/* -Runs Argon2 with certain inputs and parameters, inputs not cleared. Prints the -Base64-encoded hash string -@out output array with at least 32 bytes allocated -@pwd NULL-terminated string, presumably from argv[] -@salt salt array with at least SALTLEN_DEF bytes allocated -@t_cost number of iterations -@m_cost amount of requested memory in KB -@lanes amount of requested parallelism -@threads actual parallelism -@type String, only "d" and "i" are accepted -*/ -static void run(uint8_t *out, char *pwd, uint8_t *salt, uint32_t t_cost, - uint32_t m_cost, uint32_t lanes, uint32_t threads, - const char *type) { - clock_t start_time, stop_time; - unsigned pwd_length; - argon2_context context; - int i; - - start_time = clock(); - - if (!pwd) { - fatal("password missing"); - } - - if (!salt) { - secure_wipe_memory(pwd, strlen(pwd)); - fatal("salt missing"); - } - - pwd_length = strlen(pwd); - - UNUSED_PARAMETER(threads); - - context.out = out; - context.outlen = OUT_LEN; - context.pwd = (uint8_t *)pwd; - context.pwdlen = pwd_length; - context.salt = salt; - context.saltlen = SALT_LEN; - context.secret = NULL; - context.secretlen = 0; - context.ad = NULL; - context.adlen = 0; - context.t_cost = t_cost; - context.m_cost = m_cost; - context.lanes = lanes; - context.threads = lanes; - context.allocate_cbk = NULL; - context.free_cbk = NULL; - context.flags = ARGON2_FLAG_CLEAR_PASSWORD; - - if (!strcmp(type, "d")) { - int result = argon2d(&context); - if (result != ARGON2_OK) - fatal(error_message(result)); - } else if (!strcmp(type, "i")) { - int result = argon2i(&context); - if (result != ARGON2_OK) - fatal(error_message(result)); - } else { - secure_wipe_memory(pwd, strlen(pwd)); - fatal("wrong Argon2 type"); - } - - stop_time = clock(); - - /* add back when proper decoding */ - /* - char encoded[300]; - encode_string(encoded, sizeof encoded, &context); - printf("%s\n", encoded); - */ - printf("Hash:\t\t"); - for (i = 0; i < context.outlen; ++i) { - printf("%02x", context.out[i]); - } - printf("\n"); - - printf("%2.3f seconds\n", - ((double)stop_time - start_time) / (CLOCKS_PER_SEC)); -} - -int main(int argc, char *argv[]) { - unsigned char out[OUT_LEN]; - uint32_t m_cost = 1 << LOG_M_COST_DEF; - uint32_t t_cost = T_COST_DEF; - uint32_t lanes = LANES_DEF; - uint32_t threads = THREADS_DEF; - char *pwd = NULL; - uint8_t salt[SALT_LEN]; - const char *type = "i"; - int i; - - if (argc < 3) { - usage(argv[0]); - return ARGON2_MISSING_ARGS; - } - - /* get password and salt from command line */ - pwd = argv[1]; - if (strlen(argv[2]) > SALT_LEN) { - fatal("salt too long"); - } - memset(salt, 0x00, SALT_LEN); /* pad with null bytes */ - memcpy(salt, argv[2], strlen(argv[2])); - - /* parse options */ - for (i = 3; i < argc; i++) { - const char *a = argv[i]; - unsigned long input = 0; - if (!strcmp(a, "-m")) { - if (i < argc - 1) { - i++; - input = strtoul(argv[i], NULL, 10); - if (input == 0 || input == ULONG_MAX || - input > ARGON2_MAX_MEMORY_BITS) { - fatal("bad numeric input for -m"); - } - m_cost = ARGON2_MIN(UINT64_C(1) << input, UINT32_C(0xFFFFFFFF)); - if (m_cost > ARGON2_MAX_MEMORY) { - fatal("m_cost overflow"); - } - continue; - } else { - fatal("missing -m argument"); - } - } else if (!strcmp(a, "-t")) { - if (i < argc - 1) { - i++; - input = strtoul(argv[i], NULL, 10); - if (input == 0 || input == ULONG_MAX || - input > ARGON2_MAX_TIME) { - fatal("bad numeric input for -t"); - } - t_cost = input; - continue; - } else { - fatal("missing -t argument"); - } - } else if (!strcmp(a, "-p")) { - if (i < argc - 1) { - i++; - input = strtoul(argv[i], NULL, 10); - if (input == 0 || input == ULONG_MAX || - input > ARGON2_MAX_THREADS || input > ARGON2_MAX_LANES) { - fatal("bad numeric input for -p"); - } - threads = input; - lanes = threads; - continue; - } else { - fatal("missing -p argument"); - } - } else if (!strcmp(a, "-d")) { - type = "d"; - } else { - fatal("unknown argument"); - } - } - printf("Type:\t\tArgon2%c\n", type[0]); - printf("Iterations:\t%" PRIu32 " \n", t_cost); - printf("Memory:\t\t%" PRIu32 " KiB\n", m_cost); - printf("Parallelism:\t%" PRIu32 " \n", lanes); - run(out, pwd, salt, t_cost, m_cost, lanes, threads, type); - - return ARGON2_OK; -} diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-hash.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-hash.h deleted file mode 100644 index 3a48bf5d..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-hash.h +++ /dev/null @@ -1,38 +0,0 @@ -#if defined(SCRYPT_SKEIN512) -#include "scrypt-jane-hash_skein512.h" -#else - #define SCRYPT_HASH "ERROR" - #define SCRYPT_HASH_BLOCK_SIZE 64 - #define SCRYPT_HASH_DIGEST_SIZE 64 - typedef struct scrypt_hash_state_t { size_t dummy; } scrypt_hash_state; - typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; - static void scrypt_hash_init(scrypt_hash_state *S) {} - static void scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {} - static void scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {} - static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {0}; - #error must define a hash function! -#endif - -#include "scrypt-jane-pbkdf2.h" - -#define SCRYPT_TEST_HASH_LEN 257 /* (2 * largest block size) + 1 */ - -static int -scrypt_test_hash(void) { - scrypt_hash_state st; - scrypt_hash_digest hash, final; - uint8_t msg[SCRYPT_TEST_HASH_LEN]; - size_t i; - - for (i = 0; i < SCRYPT_TEST_HASH_LEN; i++) - msg[i] = (uint8_t)i; - - scrypt_hash_init(&st); - for (i = 0; i < SCRYPT_TEST_HASH_LEN + 1; i++) { - scrypt_hash(hash, msg, i); - scrypt_hash_update(&st, hash, sizeof(hash)); - } - scrypt_hash_finish(&st, final); - return scrypt_verify(final, scrypt_test_hash_expected, SCRYPT_HASH_DIGEST_SIZE); -} - diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-hash_skein512.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-hash_skein512.h deleted file mode 100644 index 838df2c2..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-hash_skein512.h +++ /dev/null @@ -1,188 +0,0 @@ -#define SCRYPT_HASH "Skein-512" -#define SCRYPT_HASH_BLOCK_SIZE 64 -#define SCRYPT_HASH_DIGEST_SIZE 64 - -typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; - -typedef struct scrypt_hash_state_t { - uint64_t X[8], T[2]; - uint32_t leftover; - uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE]; -} scrypt_hash_state; - -#include - -static void -skein512_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks, size_t add) { - uint64_t X[8], key[8], Xt[9+18], T[3+1]; - size_t r; - - while (blocks--) { - T[0] = S->T[0] + add; - T[1] = S->T[1]; - T[2] = T[0] ^ T[1]; - key[0] = U8TO64_LE(in + 0); Xt[0] = S->X[0]; X[0] = key[0] + Xt[0]; - key[1] = U8TO64_LE(in + 8); Xt[1] = S->X[1]; X[1] = key[1] + Xt[1]; - key[2] = U8TO64_LE(in + 16); Xt[2] = S->X[2]; X[2] = key[2] + Xt[2]; - key[3] = U8TO64_LE(in + 24); Xt[3] = S->X[3]; X[3] = key[3] + Xt[3]; - key[4] = U8TO64_LE(in + 32); Xt[4] = S->X[4]; X[4] = key[4] + Xt[4]; - key[5] = U8TO64_LE(in + 40); Xt[5] = S->X[5]; X[5] = key[5] + Xt[5] + T[0]; - key[6] = U8TO64_LE(in + 48); Xt[6] = S->X[6]; X[6] = key[6] + Xt[6] + T[1]; - key[7] = U8TO64_LE(in + 56); Xt[7] = S->X[7]; X[7] = key[7] + Xt[7]; - Xt[8] = 0x1BD11BDAA9FC1A22ull ^ Xt[0] ^ Xt[1] ^ Xt[2] ^ Xt[3] ^ Xt[4] ^ Xt[5] ^ Xt[6] ^ Xt[7]; - in += SCRYPT_HASH_BLOCK_SIZE; - - for (r = 0; r < 18; r++) - Xt[r + 9] = Xt[r + 0]; - - for (r = 0; r < 18; r += 2) { - X[0] += X[1]; X[1] = ROTL64(X[1], 46) ^ X[0]; - X[2] += X[3]; X[3] = ROTL64(X[3], 36) ^ X[2]; - X[4] += X[5]; X[5] = ROTL64(X[5], 19) ^ X[4]; - X[6] += X[7]; X[7] = ROTL64(X[7], 37) ^ X[6]; - X[2] += X[1]; X[1] = ROTL64(X[1], 33) ^ X[2]; - X[0] += X[3]; X[3] = ROTL64(X[3], 42) ^ X[0]; - X[6] += X[5]; X[5] = ROTL64(X[5], 14) ^ X[6]; - X[4] += X[7]; X[7] = ROTL64(X[7], 27) ^ X[4]; - X[4] += X[1]; X[1] = ROTL64(X[1], 17) ^ X[4]; - X[6] += X[3]; X[3] = ROTL64(X[3], 49) ^ X[6]; - X[0] += X[5]; X[5] = ROTL64(X[5], 36) ^ X[0]; - X[2] += X[7]; X[7] = ROTL64(X[7], 39) ^ X[2]; - X[6] += X[1]; X[1] = ROTL64(X[1], 44) ^ X[6]; - X[4] += X[3]; X[3] = ROTL64(X[3], 56) ^ X[4]; - X[2] += X[5]; X[5] = ROTL64(X[5], 54) ^ X[2]; - X[0] += X[7]; X[7] = ROTL64(X[7], 9) ^ X[0]; - - X[0] += Xt[r + 1]; - X[1] += Xt[r + 2]; - X[2] += Xt[r + 3]; - X[3] += Xt[r + 4]; - X[4] += Xt[r + 5]; - X[5] += Xt[r + 6] + T[1]; - X[6] += Xt[r + 7] + T[2]; - X[7] += Xt[r + 8] + r + 1; - - T[3] = T[0]; - T[0] = T[1]; - T[1] = T[2]; - T[2] = T[3]; - - X[0] += X[1]; X[1] = ROTL64(X[1], 39) ^ X[0]; - X[2] += X[3]; X[3] = ROTL64(X[3], 30) ^ X[2]; - X[4] += X[5]; X[5] = ROTL64(X[5], 34) ^ X[4]; - X[6] += X[7]; X[7] = ROTL64(X[7], 24) ^ X[6]; - X[2] += X[1]; X[1] = ROTL64(X[1], 13) ^ X[2]; - X[0] += X[3]; X[3] = ROTL64(X[3], 17) ^ X[0]; - X[6] += X[5]; X[5] = ROTL64(X[5], 10) ^ X[6]; - X[4] += X[7]; X[7] = ROTL64(X[7], 50) ^ X[4]; - X[4] += X[1]; X[1] = ROTL64(X[1], 25) ^ X[4]; - X[6] += X[3]; X[3] = ROTL64(X[3], 29) ^ X[6]; - X[0] += X[5]; X[5] = ROTL64(X[5], 39) ^ X[0]; - X[2] += X[7]; X[7] = ROTL64(X[7], 43) ^ X[2]; - X[6] += X[1]; X[1] = ROTL64(X[1], 8) ^ X[6]; - X[4] += X[3]; X[3] = ROTL64(X[3], 22) ^ X[4]; - X[2] += X[5]; X[5] = ROTL64(X[5], 56) ^ X[2]; - X[0] += X[7]; X[7] = ROTL64(X[7], 35) ^ X[0]; - - X[0] += Xt[r + 2]; - X[1] += Xt[r + 3]; - X[2] += Xt[r + 4]; - X[3] += Xt[r + 5]; - X[4] += Xt[r + 6]; - X[5] += Xt[r + 7] + T[1]; - X[6] += Xt[r + 8] + T[2]; - X[7] += Xt[r + 9] + r + 2; - - T[3] = T[0]; - T[0] = T[1]; - T[1] = T[2]; - T[2] = T[3]; - } - - S->X[0] = key[0] ^ X[0]; - S->X[1] = key[1] ^ X[1]; - S->X[2] = key[2] ^ X[2]; - S->X[3] = key[3] ^ X[3]; - S->X[4] = key[4] ^ X[4]; - S->X[5] = key[5] ^ X[5]; - S->X[6] = key[6] ^ X[6]; - S->X[7] = key[7] ^ X[7]; - - S->T[0] = T[0]; - S->T[1] = T[1] & ~0x4000000000000000ull; - } -} - -static void -scrypt_hash_init(scrypt_hash_state *S) { - S->X[0] = 0x4903ADFF749C51CEull; - S->X[1] = 0x0D95DE399746DF03ull; - S->X[2] = 0x8FD1934127C79BCEull; - S->X[3] = 0x9A255629FF352CB1ull; - S->X[4] = 0x5DB62599DF6CA7B0ull; - S->X[5] = 0xEABE394CA9D5C3F4ull; - S->X[6] = 0x991112C71A75B523ull; - S->X[7] = 0xAE18A40B660FCC33ull; - S->T[0] = 0x0000000000000000ull; - S->T[1] = 0x7000000000000000ull; - S->leftover = 0; -} - -static void -scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) { - size_t blocks, want; - - /* skein processes the final <=64 bytes raw, so we can only update if there are at least 64+1 bytes available */ - if ((S->leftover + inlen) > SCRYPT_HASH_BLOCK_SIZE) { - /* handle the previous data, we know there is enough for at least one block */ - if (S->leftover) { - want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover); - memcpy(S->buffer + S->leftover, in, want); - in += want; - inlen -= want; - S->leftover = 0; - skein512_blocks(S, S->buffer, 1, SCRYPT_HASH_BLOCK_SIZE); - } - - /* handle the current data if there's more than one block */ - if (inlen > SCRYPT_HASH_BLOCK_SIZE) { - blocks = ((inlen - 1) & ~(SCRYPT_HASH_BLOCK_SIZE - 1)); - skein512_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE, SCRYPT_HASH_BLOCK_SIZE); - inlen -= blocks; - in += blocks; - } - } - - /* handle leftover data */ - memcpy(S->buffer + S->leftover, in, inlen); - S->leftover += (int) inlen; -} - -static void -scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) { - memset(S->buffer + S->leftover, 0, SCRYPT_HASH_BLOCK_SIZE - S->leftover); - S->T[1] |= 0x8000000000000000ull; - skein512_blocks(S, S->buffer, 1, S->leftover); - - memset(S->buffer, 0, SCRYPT_HASH_BLOCK_SIZE); - S->T[0] = 0; - S->T[1] = 0xff00000000000000ull; - skein512_blocks(S, S->buffer, 1, 8); - - U64TO8_LE(&hash[ 0], S->X[0]); - U64TO8_LE(&hash[ 8], S->X[1]); - U64TO8_LE(&hash[16], S->X[2]); - U64TO8_LE(&hash[24], S->X[3]); - U64TO8_LE(&hash[32], S->X[4]); - U64TO8_LE(&hash[40], S->X[5]); - U64TO8_LE(&hash[48], S->X[6]); - U64TO8_LE(&hash[56], S->X[7]); -} - - -static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = { - 0x4d,0x52,0x29,0xff,0x10,0xbc,0xd2,0x62,0xd1,0x61,0x83,0xc8,0xe6,0xf0,0x83,0xc4, - 0x9f,0xf5,0x6a,0x42,0x75,0x2a,0x26,0x4e,0xf0,0x28,0x72,0x28,0x47,0xe8,0x23,0xdf, - 0x1e,0x64,0xf1,0x51,0x38,0x35,0x9d,0xc2,0x83,0xfc,0x35,0x4e,0xc0,0x52,0x5f,0x41, - 0x6a,0x0b,0x7d,0xf5,0xce,0x98,0xde,0x6f,0x36,0xd8,0x51,0x15,0x78,0x78,0x93,0x67, -}; diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-avx.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-avx.h deleted file mode 100644 index 663d8335..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-avx.h +++ /dev/null @@ -1,367 +0,0 @@ -/* x64 */ -#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) - -#define SCRYPT_SALSA64_AVX - -asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) -asm_naked_fn(scrypt_ChunkMix_avx) - a1(push rbp) - a2(mov rbp, rsp) - a2(and rsp, ~63) - a2(sub rsp, 128) - a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ - a2(shl rcx,7) - a2(lea r9,[rcx-128]) - a2(lea rax,[rsi+r9]) - a2(lea r9,[rdx+r9]) - a2(and rdx, rdx) - a2(vmovdqa xmm0,[rax+0]) - a2(vmovdqa xmm1,[rax+16]) - a2(vmovdqa xmm2,[rax+32]) - a2(vmovdqa xmm3,[rax+48]) - a2(vmovdqa xmm4,[rax+64]) - a2(vmovdqa xmm5,[rax+80]) - a2(vmovdqa xmm6,[rax+96]) - a2(vmovdqa xmm7,[rax+112]) - aj(jz scrypt_ChunkMix_avx_no_xor1) - a3(vpxor xmm0,xmm0,[r9+0]) - a3(vpxor xmm1,xmm1,[r9+16]) - a3(vpxor xmm2,xmm2,[r9+32]) - a3(vpxor xmm3,xmm3,[r9+48]) - a3(vpxor xmm4,xmm4,[r9+64]) - a3(vpxor xmm5,xmm5,[r9+80]) - a3(vpxor xmm6,xmm6,[r9+96]) - a3(vpxor xmm7,xmm7,[r9+112]) - a1(scrypt_ChunkMix_avx_no_xor1:) - a2(xor r9,r9) - a2(xor r8,r8) - a1(scrypt_ChunkMix_avx_loop:) - a2(and rdx, rdx) - a3(vpxor xmm0,xmm0,[rsi+r9+0]) - a3(vpxor xmm1,xmm1,[rsi+r9+16]) - a3(vpxor xmm2,xmm2,[rsi+r9+32]) - a3(vpxor xmm3,xmm3,[rsi+r9+48]) - a3(vpxor xmm4,xmm4,[rsi+r9+64]) - a3(vpxor xmm5,xmm5,[rsi+r9+80]) - a3(vpxor xmm6,xmm6,[rsi+r9+96]) - a3(vpxor xmm7,xmm7,[rsi+r9+112]) - aj(jz scrypt_ChunkMix_avx_no_xor2) - a3(vpxor xmm0,xmm0,[rdx+r9+0]) - a3(vpxor xmm1,xmm1,[rdx+r9+16]) - a3(vpxor xmm2,xmm2,[rdx+r9+32]) - a3(vpxor xmm3,xmm3,[rdx+r9+48]) - a3(vpxor xmm4,xmm4,[rdx+r9+64]) - a3(vpxor xmm5,xmm5,[rdx+r9+80]) - a3(vpxor xmm6,xmm6,[rdx+r9+96]) - a3(vpxor xmm7,xmm7,[rdx+r9+112]) - a1(scrypt_ChunkMix_avx_no_xor2:) - a2(vmovdqa [rsp+0],xmm0) - a2(vmovdqa [rsp+16],xmm1) - a2(vmovdqa [rsp+32],xmm2) - a2(vmovdqa [rsp+48],xmm3) - a2(vmovdqa [rsp+64],xmm4) - a2(vmovdqa [rsp+80],xmm5) - a2(vmovdqa [rsp+96],xmm6) - a2(vmovdqa [rsp+112],xmm7) - a2(mov rax,8) - a1(scrypt_salsa64_avx_loop: ) - a3(vpaddq xmm8, xmm0, xmm2) - a3(vpaddq xmm9, xmm1, xmm3) - a3(vpshufd xmm8, xmm8, 0xb1) - a3(vpshufd xmm9, xmm9, 0xb1) - a3(vpxor xmm6, xmm6, xmm8) - a3(vpxor xmm7, xmm7, xmm9) - a3(vpaddq xmm10, xmm0, xmm6) - a3(vpaddq xmm11, xmm1, xmm7) - a3(vpsrlq xmm8, xmm10, 51) - a3(vpsrlq xmm9, xmm11, 51) - a3(vpsllq xmm10, xmm10, 13) - a3(vpsllq xmm11, xmm11, 13) - a3(vpxor xmm4, xmm4, xmm8) - a3(vpxor xmm5, xmm5, xmm9) - a3(vpxor xmm4, xmm4, xmm10) - a3(vpxor xmm5, xmm5, xmm11) - a3(vpaddq xmm8, xmm6, xmm4) - a3(vpaddq xmm9, xmm7, xmm5) - a3(vpsrlq xmm10, xmm8, 25) - a3(vpsrlq xmm11, xmm9, 25) - a3(vpsllq xmm8, xmm8, 39) - a3(vpsllq xmm9, xmm9, 39) - a3(vpxor xmm2, xmm2, xmm10) - a3(vpxor xmm3, xmm3, xmm11) - a3(vpxor xmm2, xmm2, xmm8) - a3(vpxor xmm3, xmm3, xmm9) - a3(vpaddq xmm10, xmm4, xmm2) - a3(vpaddq xmm11, xmm5, xmm3) - a3(vpshufd xmm10, xmm10, 0xb1) - a3(vpshufd xmm11, xmm11, 0xb1) - a3(vpxor xmm0, xmm0, xmm10) - a3(vpxor xmm1, xmm1, xmm11) - a2(vmovdqa xmm8, xmm2) - a2(vmovdqa xmm9, xmm3) - a4(vpalignr xmm2, xmm6, xmm7, 8) - a4(vpalignr xmm3, xmm7, xmm6, 8) - a4(vpalignr xmm6, xmm9, xmm8, 8) - a4(vpalignr xmm7, xmm8, xmm9, 8) - a3(vpaddq xmm10, xmm0, xmm2) - a3(vpaddq xmm11, xmm1, xmm3) - a3(vpshufd xmm10, xmm10, 0xb1) - a3(vpshufd xmm11, xmm11, 0xb1) - a3(vpxor xmm6, xmm6, xmm10) - a3(vpxor xmm7, xmm7, xmm11) - a3(vpaddq xmm8, xmm0, xmm6) - a3(vpaddq xmm9, xmm1, xmm7) - a3(vpsrlq xmm10, xmm8, 51) - a3(vpsrlq xmm11, xmm9, 51) - a3(vpsllq xmm8, xmm8, 13) - a3(vpsllq xmm9, xmm9, 13) - a3(vpxor xmm5, xmm5, xmm10) - a3(vpxor xmm4, xmm4, xmm11) - a3(vpxor xmm5, xmm5, xmm8) - a3(vpxor xmm4, xmm4, xmm9) - a3(vpaddq xmm10, xmm6, xmm5) - a3(vpaddq xmm11, xmm7, xmm4) - a3(vpsrlq xmm8, xmm10, 25) - a3(vpsrlq xmm9, xmm11, 25) - a3(vpsllq xmm10, xmm10, 39) - a3(vpsllq xmm11, xmm11, 39) - a3(vpxor xmm2, xmm2, xmm8) - a3(vpxor xmm3, xmm3, xmm9) - a3(vpxor xmm2, xmm2, xmm10) - a3(vpxor xmm3, xmm3, xmm11) - a3(vpaddq xmm8, xmm5, xmm2) - a3(vpaddq xmm9, xmm4, xmm3) - a3(vpshufd xmm8, xmm8, 0xb1) - a3(vpshufd xmm9, xmm9, 0xb1) - a3(vpxor xmm0, xmm0, xmm8) - a3(vpxor xmm1, xmm1, xmm9) - a2(vmovdqa xmm10, xmm2) - a2(vmovdqa xmm11, xmm3) - a4(vpalignr xmm2, xmm6, xmm7, 8) - a4(vpalignr xmm3, xmm7, xmm6, 8) - a4(vpalignr xmm6, xmm11, xmm10, 8) - a4(vpalignr xmm7, xmm10, xmm11, 8) - a2(sub rax, 2) - aj(ja scrypt_salsa64_avx_loop) - a3(vpaddq xmm0,xmm0,[rsp+0]) - a3(vpaddq xmm1,xmm1,[rsp+16]) - a3(vpaddq xmm2,xmm2,[rsp+32]) - a3(vpaddq xmm3,xmm3,[rsp+48]) - a3(vpaddq xmm4,xmm4,[rsp+64]) - a3(vpaddq xmm5,xmm5,[rsp+80]) - a3(vpaddq xmm6,xmm6,[rsp+96]) - a3(vpaddq xmm7,xmm7,[rsp+112]) - a2(lea rax,[r8+r9]) - a2(xor r8,rcx) - a2(and rax,~0xff) - a2(add r9,128) - a2(shr rax,1) - a2(add rax, rdi) - a2(cmp r9,rcx) - a2(vmovdqa [rax+0],xmm0) - a2(vmovdqa [rax+16],xmm1) - a2(vmovdqa [rax+32],xmm2) - a2(vmovdqa [rax+48],xmm3) - a2(vmovdqa [rax+64],xmm4) - a2(vmovdqa [rax+80],xmm5) - a2(vmovdqa [rax+96],xmm6) - a2(vmovdqa [rax+112],xmm7) - aj(jne scrypt_ChunkMix_avx_loop) - a2(mov rsp, rbp) - a1(pop rbp) - a1(ret) -asm_naked_fn_end(scrypt_ChunkMix_avx) - -#endif - - -/* intrinsic */ -#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) - -#define SCRYPT_SALSA64_AVX - -static void asm_calling_convention -scrypt_ChunkMix_avx(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) { - uint32_t i, blocksPerChunk = r * 2, half = 0; - xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3; - size_t rounds; - - /* 1: X = B_{2r - 1} */ - xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); - x0 = xmmp[0]; - x1 = xmmp[1]; - x2 = xmmp[2]; - x3 = xmmp[3]; - x4 = xmmp[4]; - x5 = xmmp[5]; - x6 = xmmp[6]; - x7 = xmmp[7]; - - if (Bxor) { - xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); - x0 = _mm_xor_si128(x0, xmmp[0]); - x1 = _mm_xor_si128(x1, xmmp[1]); - x2 = _mm_xor_si128(x2, xmmp[2]); - x3 = _mm_xor_si128(x3, xmmp[3]); - x4 = _mm_xor_si128(x4, xmmp[4]); - x5 = _mm_xor_si128(x5, xmmp[5]); - x6 = _mm_xor_si128(x6, xmmp[6]); - x7 = _mm_xor_si128(x7, xmmp[7]); - } - - /* 2: for i = 0 to 2r - 1 do */ - for (i = 0; i < blocksPerChunk; i++, half ^= r) { - /* 3: X = H(X ^ B_i) */ - xmmp = (xmmi *)scrypt_block(Bin, i); - x0 = _mm_xor_si128(x0, xmmp[0]); - x1 = _mm_xor_si128(x1, xmmp[1]); - x2 = _mm_xor_si128(x2, xmmp[2]); - x3 = _mm_xor_si128(x3, xmmp[3]); - x4 = _mm_xor_si128(x4, xmmp[4]); - x5 = _mm_xor_si128(x5, xmmp[5]); - x6 = _mm_xor_si128(x6, xmmp[6]); - x7 = _mm_xor_si128(x7, xmmp[7]); - - if (Bxor) { - xmmp = (xmmi *)scrypt_block(Bxor, i); - x0 = _mm_xor_si128(x0, xmmp[0]); - x1 = _mm_xor_si128(x1, xmmp[1]); - x2 = _mm_xor_si128(x2, xmmp[2]); - x3 = _mm_xor_si128(x3, xmmp[3]); - x4 = _mm_xor_si128(x4, xmmp[4]); - x5 = _mm_xor_si128(x5, xmmp[5]); - x6 = _mm_xor_si128(x6, xmmp[6]); - x7 = _mm_xor_si128(x7, xmmp[7]); - } - - t0 = x0; - t1 = x1; - t2 = x2; - t3 = x3; - t4 = x4; - t5 = x5; - t6 = x6; - t7 = x7; - - for (rounds = 8; rounds; rounds -= 2) { - z0 = _mm_add_epi64(x0, x2); - z1 = _mm_add_epi64(x1, x3); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x6 = _mm_xor_si128(x6, z0); - x7 = _mm_xor_si128(x7, z1); - - z0 = _mm_add_epi64(x6, x0); - z1 = _mm_add_epi64(x7, x1); - z2 = _mm_srli_epi64(z0, 64-13); - z3 = _mm_srli_epi64(z1, 64-13); - z0 = _mm_slli_epi64(z0, 13); - z1 = _mm_slli_epi64(z1, 13); - x4 = _mm_xor_si128(x4, z2); - x5 = _mm_xor_si128(x5, z3); - x4 = _mm_xor_si128(x4, z0); - x5 = _mm_xor_si128(x5, z1); - - z0 = _mm_add_epi64(x4, x6); - z1 = _mm_add_epi64(x5, x7); - z2 = _mm_srli_epi64(z0, 64-39); - z3 = _mm_srli_epi64(z1, 64-39); - z0 = _mm_slli_epi64(z0, 39); - z1 = _mm_slli_epi64(z1, 39); - x2 = _mm_xor_si128(x2, z2); - x3 = _mm_xor_si128(x3, z3); - x2 = _mm_xor_si128(x2, z0); - x3 = _mm_xor_si128(x3, z1); - - z0 = _mm_add_epi64(x2, x4); - z1 = _mm_add_epi64(x3, x5); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x0 = _mm_xor_si128(x0, z0); - x1 = _mm_xor_si128(x1, z1); - - z0 = x2; - z1 = x3; - x2 = _mm_alignr_epi8(x6, x7, 8); - x3 = _mm_alignr_epi8(x7, x6, 8); - x6 = _mm_alignr_epi8(z1, z0, 8); - x7 = _mm_alignr_epi8(z0, z1, 8); - - z0 = _mm_add_epi64(x0, x2); - z1 = _mm_add_epi64(x1, x3); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x6 = _mm_xor_si128(x6, z0); - x7 = _mm_xor_si128(x7, z1); - - z0 = _mm_add_epi64(x6, x0); - z1 = _mm_add_epi64(x7, x1); - z2 = _mm_srli_epi64(z0, 64-13); - z3 = _mm_srli_epi64(z1, 64-13); - z0 = _mm_slli_epi64(z0, 13); - z1 = _mm_slli_epi64(z1, 13); - x5 = _mm_xor_si128(x5, z2); - x4 = _mm_xor_si128(x4, z3); - x5 = _mm_xor_si128(x5, z0); - x4 = _mm_xor_si128(x4, z1); - - z0 = _mm_add_epi64(x5, x6); - z1 = _mm_add_epi64(x4, x7); - z2 = _mm_srli_epi64(z0, 64-39); - z3 = _mm_srli_epi64(z1, 64-39); - z0 = _mm_slli_epi64(z0, 39); - z1 = _mm_slli_epi64(z1, 39); - x2 = _mm_xor_si128(x2, z2); - x3 = _mm_xor_si128(x3, z3); - x2 = _mm_xor_si128(x2, z0); - x3 = _mm_xor_si128(x3, z1); - - z0 = _mm_add_epi64(x2, x5); - z1 = _mm_add_epi64(x3, x4); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x0 = _mm_xor_si128(x0, z0); - x1 = _mm_xor_si128(x1, z1); - - z0 = x2; - z1 = x3; - x2 = _mm_alignr_epi8(x6, x7, 8); - x3 = _mm_alignr_epi8(x7, x6, 8); - x6 = _mm_alignr_epi8(z1, z0, 8); - x7 = _mm_alignr_epi8(z0, z1, 8); - } - - x0 = _mm_add_epi64(x0, t0); - x1 = _mm_add_epi64(x1, t1); - x2 = _mm_add_epi64(x2, t2); - x3 = _mm_add_epi64(x3, t3); - x4 = _mm_add_epi64(x4, t4); - x5 = _mm_add_epi64(x5, t5); - x6 = _mm_add_epi64(x6, t6); - x7 = _mm_add_epi64(x7, t7); - - /* 4: Y_i = X */ - /* 6: B'[0..r-1] = Y_even */ - /* 6: B'[r..2r-1] = Y_odd */ - xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); - xmmp[0] = x0; - xmmp[1] = x1; - xmmp[2] = x2; - xmmp[3] = x3; - xmmp[4] = x4; - xmmp[5] = x5; - xmmp[6] = x6; - xmmp[7] = x7; - } -} - -#endif - -#if defined(SCRYPT_SALSA64_AVX) - /* uses salsa64_core_tangle_sse2 */ - - #undef SCRYPT_MIX - #define SCRYPT_MIX "Salsa64/8-AVX" - #undef SCRYPT_SALSA64_INCLUDED - #define SCRYPT_SALSA64_INCLUDED -#endif diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-avx2.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-avx2.h deleted file mode 100644 index 81813026..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-avx2.h +++ /dev/null @@ -1,221 +0,0 @@ -/* x64 */ -#if defined(X86_64ASM_AVX2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) - -#define SCRYPT_SALSA64_AVX2 - -asm_naked_fn_proto(void, scrypt_ChunkMix_avx2)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) -asm_naked_fn(scrypt_ChunkMix_avx2) - a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ - a2(shl rcx,7) - a2(lea r9,[rcx-128]) - a2(lea rax,[rsi+r9]) - a2(lea r9,[rdx+r9]) - a2(and rdx, rdx) - a2(vmovdqa ymm0,[rax+0]) - a2(vmovdqa ymm1,[rax+32]) - a2(vmovdqa ymm2,[rax+64]) - a2(vmovdqa ymm3,[rax+96]) - aj(jz scrypt_ChunkMix_avx2_no_xor1) - a3(vpxor ymm0,ymm0,[r9+0]) - a3(vpxor ymm1,ymm1,[r9+32]) - a3(vpxor ymm2,ymm2,[r9+64]) - a3(vpxor ymm3,ymm3,[r9+96]) - a1(scrypt_ChunkMix_avx2_no_xor1:) - a2(xor r9,r9) - a2(xor r8,r8) - a1(scrypt_ChunkMix_avx2_loop:) - a2(and rdx, rdx) - a3(vpxor ymm0,ymm0,[rsi+r9+0]) - a3(vpxor ymm1,ymm1,[rsi+r9+32]) - a3(vpxor ymm2,ymm2,[rsi+r9+64]) - a3(vpxor ymm3,ymm3,[rsi+r9+96]) - aj(jz scrypt_ChunkMix_avx2_no_xor2) - a3(vpxor ymm0,ymm0,[rdx+r9+0]) - a3(vpxor ymm1,ymm1,[rdx+r9+32]) - a3(vpxor ymm2,ymm2,[rdx+r9+64]) - a3(vpxor ymm3,ymm3,[rdx+r9+96]) - a1(scrypt_ChunkMix_avx2_no_xor2:) - a2(vmovdqa ymm6,ymm0) - a2(vmovdqa ymm7,ymm1) - a2(vmovdqa ymm8,ymm2) - a2(vmovdqa ymm9,ymm3) - a2(mov rax,4) - a1(scrypt_salsa64_avx2_loop: ) - a3(vpaddq ymm4, ymm1, ymm0) - a3(vpshufd ymm4, ymm4, 0xb1) - a3(vpxor ymm3, ymm3, ymm4) - a3(vpaddq ymm4, ymm0, ymm3) - a3(vpsrlq ymm5, ymm4, 51) - a3(vpxor ymm2, ymm2, ymm5) - a3(vpsllq ymm4, ymm4, 13) - a3(vpxor ymm2, ymm2, ymm4) - a3(vpaddq ymm4, ymm3, ymm2) - a3(vpsrlq ymm5, ymm4, 25) - a3(vpxor ymm1, ymm1, ymm5) - a3(vpsllq ymm4, ymm4, 39) - a3(vpxor ymm1, ymm1, ymm4) - a3(vpaddq ymm4, ymm2, ymm1) - a3(vpshufd ymm4, ymm4, 0xb1) - a3(vpermq ymm1, ymm1, 0x39) - a3(vpermq ymm10, ymm2, 0x4e) - a3(vpxor ymm0, ymm0, ymm4) - a3(vpermq ymm3, ymm3, 0x93) - a3(vpaddq ymm4, ymm3, ymm0) - a3(vpshufd ymm4, ymm4, 0xb1) - a3(vpxor ymm1, ymm1, ymm4) - a3(vpaddq ymm4, ymm0, ymm1) - a3(vpsrlq ymm5, ymm4, 51) - a3(vpxor ymm10, ymm10, ymm5) - a3(vpsllq ymm4, ymm4, 13) - a3(vpxor ymm10, ymm10, ymm4) - a3(vpaddq ymm4, ymm1, ymm10) - a3(vpsrlq ymm5, ymm4, 25) - a3(vpxor ymm3, ymm3, ymm5) - a3(vpsllq ymm4, ymm4, 39) - a3(vpermq ymm1, ymm1, 0x93) - a3(vpxor ymm3, ymm3, ymm4) - a3(vpermq ymm2, ymm10, 0x4e) - a3(vpaddq ymm4, ymm10, ymm3) - a3(vpshufd ymm4, ymm4, 0xb1) - a3(vpermq ymm3, ymm3, 0x39) - a3(vpxor ymm0, ymm0, ymm4) - a1(dec rax) - aj(jnz scrypt_salsa64_avx2_loop) - a3(vpaddq ymm0,ymm0,ymm6) - a3(vpaddq ymm1,ymm1,ymm7) - a3(vpaddq ymm2,ymm2,ymm8) - a3(vpaddq ymm3,ymm3,ymm9) - a2(lea rax,[r8+r9]) - a2(xor r8,rcx) - a2(and rax,~0xff) - a2(add r9,128) - a2(shr rax,1) - a2(add rax, rdi) - a2(cmp r9,rcx) - a2(vmovdqa [rax+0],ymm0) - a2(vmovdqa [rax+32],ymm1) - a2(vmovdqa [rax+64],ymm2) - a2(vmovdqa [rax+96],ymm3) - aj(jne scrypt_ChunkMix_avx2_loop) - a1(vzeroupper) - a1(ret) -asm_naked_fn_end(scrypt_ChunkMix_avx2) - -#endif - - -/* intrinsic */ -#if defined(X86_INTRINSIC_AVX2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) - -#define SCRYPT_SALSA64_AVX2 - -static void asm_calling_convention -scrypt_ChunkMix_avx2(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) { - uint32_t i, blocksPerChunk = r * 2, half = 0; - ymmi *ymmp,y0,y1,y2,y3,t0,t1,t2,t3,z0,z1; - size_t rounds; - - /* 1: X = B_{2r - 1} */ - ymmp = (ymmi *)scrypt_block(Bin, blocksPerChunk - 1); - y0 = ymmp[0]; - y1 = ymmp[1]; - y2 = ymmp[2]; - y3 = ymmp[3]; - - if (Bxor) { - ymmp = (ymmi *)scrypt_block(Bxor, blocksPerChunk - 1); - y0 = _mm256_xor_si256(y0, ymmp[0]); - y1 = _mm256_xor_si256(y1, ymmp[1]); - y2 = _mm256_xor_si256(y2, ymmp[2]); - y3 = _mm256_xor_si256(y3, ymmp[3]); - } - - /* 2: for i = 0 to 2r - 1 do */ - for (i = 0; i < blocksPerChunk; i++, half ^= r) { - /* 3: X = H(X ^ B_i) */ - ymmp = (ymmi *)scrypt_block(Bin, i); - y0 = _mm256_xor_si256(y0, ymmp[0]); - y1 = _mm256_xor_si256(y1, ymmp[1]); - y2 = _mm256_xor_si256(y2, ymmp[2]); - y3 = _mm256_xor_si256(y3, ymmp[3]); - - if (Bxor) { - ymmp = (ymmi *)scrypt_block(Bxor, i); - y0 = _mm256_xor_si256(y0, ymmp[0]); - y1 = _mm256_xor_si256(y1, ymmp[1]); - y2 = _mm256_xor_si256(y2, ymmp[2]); - y3 = _mm256_xor_si256(y3, ymmp[3]); - } - - t0 = y0; - t1 = y1; - t2 = y2; - t3 = y3; - - for (rounds = 8; rounds; rounds -= 2) { - z0 = _mm256_add_epi64(y0, y1); - z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - y3 = _mm256_xor_si256(y3, z0); - z0 = _mm256_add_epi64(y3, y0); - z1 = _mm256_srli_epi64(z0, 64-13); - y2 = _mm256_xor_si256(y2, z1); - z0 = _mm256_slli_epi64(z0, 13); - y2 = _mm256_xor_si256(y2, z0); - z0 = _mm256_add_epi64(y2, y3); - z1 = _mm256_srli_epi64(z0, 64-39); - y1 = _mm256_xor_si256(y1, z1); - z0 = _mm256_slli_epi64(z0, 39); - y1 = _mm256_xor_si256(y1, z0); - y1 = _mm256_permute4x64_epi64(y1, _MM_SHUFFLE(0,3,2,1)); - y2 = _mm256_permute4x64_epi64(y2, _MM_SHUFFLE(1,0,3,2)); - y3 = _mm256_permute4x64_epi64(y3, _MM_SHUFFLE(2,1,0,3)); - z0 = _mm256_add_epi64(y1, y2); - z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - y0 = _mm256_xor_si256(y0, z0); - z0 = _mm256_add_epi64(y0, y3); - z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - y1 = _mm256_xor_si256(y1, z0); - z0 = _mm256_add_epi64(y1, y0); - z1 = _mm256_srli_epi64(z0, 64-13); - y2 = _mm256_xor_si256(y2, z1); - z0 = _mm256_slli_epi64(z0, 13); - y2 = _mm256_xor_si256(y2, z0); - z0 = _mm256_add_epi64(y2, y1); - z1 = _mm256_srli_epi64(z0, 64-39); - y3 = _mm256_xor_si256(y3, z1); - z0 = _mm256_slli_epi64(z0, 39); - y3 = _mm256_xor_si256(y3, z0); - z0 = _mm256_add_epi64(y3, y2); - z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - y0 = _mm256_xor_si256(y0, z0); - y1 = _mm256_permute4x64_epi64(y1, _MM_SHUFFLE(2,1,0,3)); - y2 = _mm256_permute4x64_epi64(y2, _MM_SHUFFLE(1,0,3,2)); - y3 = _mm256_permute4x64_epi64(y3, _MM_SHUFFLE(0,3,2,1)); - } - - y0 = _mm256_add_epi64(y0, t0); - y1 = _mm256_add_epi64(y1, t1); - y2 = _mm256_add_epi64(y2, t2); - y3 = _mm256_add_epi64(y3, t3); - - /* 4: Y_i = X */ - /* 6: B'[0..r-1] = Y_even */ - /* 6: B'[r..2r-1] = Y_odd */ - ymmp = (ymmi *)scrypt_block(Bout, (i / 2) + half); - ymmp[0] = y0; - ymmp[1] = y1; - ymmp[2] = y2; - ymmp[3] = y3; - } -} - -#endif - -#if defined(SCRYPT_SALSA64_AVX2) - /* uses salsa64_core_tangle_sse2 */ - - #undef SCRYPT_MIX - #define SCRYPT_MIX "Salsa64/8-AVX2" - #undef SCRYPT_SALSA64_INCLUDED - #define SCRYPT_SALSA64_INCLUDED -#endif diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-sse2.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-sse2.h deleted file mode 100644 index 971d98a3..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-sse2.h +++ /dev/null @@ -1,449 +0,0 @@ -/* x64 */ -#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) - -#define SCRYPT_SALSA64_SSE2 - -asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) -asm_naked_fn(scrypt_ChunkMix_sse2) - a1(push rbp) - a2(mov rbp, rsp) - a2(and rsp, ~63) - a2(sub rsp, 128) - a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ - a2(shl rcx,7) - a2(lea r9,[rcx-128]) - a2(lea rax,[rsi+r9]) - a2(lea r9,[rdx+r9]) - a2(and rdx, rdx) - a2(movdqa xmm0,[rax+0]) - a2(movdqa xmm1,[rax+16]) - a2(movdqa xmm2,[rax+32]) - a2(movdqa xmm3,[rax+48]) - a2(movdqa xmm4,[rax+64]) - a2(movdqa xmm5,[rax+80]) - a2(movdqa xmm6,[rax+96]) - a2(movdqa xmm7,[rax+112]) - aj(jz scrypt_ChunkMix_sse2_no_xor1) - a2(pxor xmm0,[r9+0]) - a2(pxor xmm1,[r9+16]) - a2(pxor xmm2,[r9+32]) - a2(pxor xmm3,[r9+48]) - a2(pxor xmm4,[r9+64]) - a2(pxor xmm5,[r9+80]) - a2(pxor xmm6,[r9+96]) - a2(pxor xmm7,[r9+112]) - a1(scrypt_ChunkMix_sse2_no_xor1:) - a2(xor r9,r9) - a2(xor r8,r8) - a1(scrypt_ChunkMix_sse2_loop:) - a2(and rdx, rdx) - a2(pxor xmm0,[rsi+r9+0]) - a2(pxor xmm1,[rsi+r9+16]) - a2(pxor xmm2,[rsi+r9+32]) - a2(pxor xmm3,[rsi+r9+48]) - a2(pxor xmm4,[rsi+r9+64]) - a2(pxor xmm5,[rsi+r9+80]) - a2(pxor xmm6,[rsi+r9+96]) - a2(pxor xmm7,[rsi+r9+112]) - aj(jz scrypt_ChunkMix_sse2_no_xor2) - a2(pxor xmm0,[rdx+r9+0]) - a2(pxor xmm1,[rdx+r9+16]) - a2(pxor xmm2,[rdx+r9+32]) - a2(pxor xmm3,[rdx+r9+48]) - a2(pxor xmm4,[rdx+r9+64]) - a2(pxor xmm5,[rdx+r9+80]) - a2(pxor xmm6,[rdx+r9+96]) - a2(pxor xmm7,[rdx+r9+112]) - a1(scrypt_ChunkMix_sse2_no_xor2:) - a2(movdqa [rsp+0],xmm0) - a2(movdqa [rsp+16],xmm1) - a2(movdqa [rsp+32],xmm2) - a2(movdqa [rsp+48],xmm3) - a2(movdqa [rsp+64],xmm4) - a2(movdqa [rsp+80],xmm5) - a2(movdqa [rsp+96],xmm6) - a2(movdqa [rsp+112],xmm7) - a2(mov rax,8) - a1(scrypt_salsa64_sse2_loop: ) - a2(movdqa xmm8, xmm0) - a2(movdqa xmm9, xmm1) - a2(paddq xmm8, xmm2) - a2(paddq xmm9, xmm3) - a3(pshufd xmm8, xmm8, 0xb1) - a3(pshufd xmm9, xmm9, 0xb1) - a2(pxor xmm6, xmm8) - a2(pxor xmm7, xmm9) - a2(movdqa xmm10, xmm0) - a2(movdqa xmm11, xmm1) - a2(paddq xmm10, xmm6) - a2(paddq xmm11, xmm7) - a2(movdqa xmm8, xmm10) - a2(movdqa xmm9, xmm11) - a2(psrlq xmm10, 51) - a2(psrlq xmm11, 51) - a2(psllq xmm8, 13) - a2(psllq xmm9, 13) - a2(pxor xmm4, xmm10) - a2(pxor xmm5, xmm11) - a2(pxor xmm4, xmm8) - a2(pxor xmm5, xmm9) - a2(movdqa xmm10, xmm6) - a2(movdqa xmm11, xmm7) - a2(paddq xmm10, xmm4) - a2(paddq xmm11, xmm5) - a2(movdqa xmm8, xmm10) - a2(movdqa xmm9, xmm11) - a2(psrlq xmm10, 25) - a2(psrlq xmm11, 25) - a2(psllq xmm8, 39) - a2(psllq xmm9, 39) - a2(pxor xmm2, xmm10) - a2(pxor xmm3, xmm11) - a2(pxor xmm2, xmm8) - a2(pxor xmm3, xmm9) - a2(movdqa xmm8, xmm4) - a2(movdqa xmm9, xmm5) - a2(paddq xmm8, xmm2) - a2(paddq xmm9, xmm3) - a3(pshufd xmm8, xmm8, 0xb1) - a3(pshufd xmm9, xmm9, 0xb1) - a2(pxor xmm0, xmm8) - a2(pxor xmm1, xmm9) - a2(movdqa xmm8, xmm2) - a2(movdqa xmm9, xmm3) - a2(movdqa xmm10, xmm6) - a2(movdqa xmm11, xmm7) - a2(movdqa xmm2, xmm7) - a2(movdqa xmm3, xmm6) - a2(punpcklqdq xmm10, xmm6) - a2(punpcklqdq xmm11, xmm7) - a2(movdqa xmm6, xmm8) - a2(movdqa xmm7, xmm9) - a2(punpcklqdq xmm9, xmm9) - a2(punpcklqdq xmm8, xmm8) - a2(punpckhqdq xmm2, xmm10) - a2(punpckhqdq xmm3, xmm11) - a2(punpckhqdq xmm6, xmm9) - a2(punpckhqdq xmm7, xmm8) - a2(sub rax, 2) - a2(movdqa xmm8, xmm0) - a2(movdqa xmm9, xmm1) - a2(paddq xmm8, xmm2) - a2(paddq xmm9, xmm3) - a3(pshufd xmm8, xmm8, 0xb1) - a3(pshufd xmm9, xmm9, 0xb1) - a2(pxor xmm6, xmm8) - a2(pxor xmm7, xmm9) - a2(movdqa xmm10, xmm0) - a2(movdqa xmm11, xmm1) - a2(paddq xmm10, xmm6) - a2(paddq xmm11, xmm7) - a2(movdqa xmm8, xmm10) - a2(movdqa xmm9, xmm11) - a2(psrlq xmm10, 51) - a2(psrlq xmm11, 51) - a2(psllq xmm8, 13) - a2(psllq xmm9, 13) - a2(pxor xmm5, xmm10) - a2(pxor xmm4, xmm11) - a2(pxor xmm5, xmm8) - a2(pxor xmm4, xmm9) - a2(movdqa xmm10, xmm6) - a2(movdqa xmm11, xmm7) - a2(paddq xmm10, xmm5) - a2(paddq xmm11, xmm4) - a2(movdqa xmm8, xmm10) - a2(movdqa xmm9, xmm11) - a2(psrlq xmm10, 25) - a2(psrlq xmm11, 25) - a2(psllq xmm8, 39) - a2(psllq xmm9, 39) - a2(pxor xmm2, xmm10) - a2(pxor xmm3, xmm11) - a2(pxor xmm2, xmm8) - a2(pxor xmm3, xmm9) - a2(movdqa xmm8, xmm5) - a2(movdqa xmm9, xmm4) - a2(paddq xmm8, xmm2) - a2(paddq xmm9, xmm3) - a3(pshufd xmm8, xmm8, 0xb1) - a3(pshufd xmm9, xmm9, 0xb1) - a2(pxor xmm0, xmm8) - a2(pxor xmm1, xmm9) - a2(movdqa xmm8, xmm2) - a2(movdqa xmm9, xmm3) - a2(movdqa xmm10, xmm6) - a2(movdqa xmm11, xmm7) - a2(movdqa xmm2, xmm7) - a2(movdqa xmm3, xmm6) - a2(punpcklqdq xmm10, xmm6) - a2(punpcklqdq xmm11, xmm7) - a2(movdqa xmm6, xmm8) - a2(movdqa xmm7, xmm9) - a2(punpcklqdq xmm9, xmm9) - a2(punpcklqdq xmm8, xmm8) - a2(punpckhqdq xmm2, xmm10) - a2(punpckhqdq xmm3, xmm11) - a2(punpckhqdq xmm6, xmm9) - a2(punpckhqdq xmm7, xmm8) - aj(ja scrypt_salsa64_sse2_loop) - a2(paddq xmm0,[rsp+0]) - a2(paddq xmm1,[rsp+16]) - a2(paddq xmm2,[rsp+32]) - a2(paddq xmm3,[rsp+48]) - a2(paddq xmm4,[rsp+64]) - a2(paddq xmm5,[rsp+80]) - a2(paddq xmm6,[rsp+96]) - a2(paddq xmm7,[rsp+112]) - a2(lea rax,[r8+r9]) - a2(xor r8,rcx) - a2(and rax,~0xff) - a2(add r9,128) - a2(shr rax,1) - a2(add rax, rdi) - a2(cmp r9,rcx) - a2(movdqa [rax+0],xmm0) - a2(movdqa [rax+16],xmm1) - a2(movdqa [rax+32],xmm2) - a2(movdqa [rax+48],xmm3) - a2(movdqa [rax+64],xmm4) - a2(movdqa [rax+80],xmm5) - a2(movdqa [rax+96],xmm6) - a2(movdqa [rax+112],xmm7) - aj(jne scrypt_ChunkMix_sse2_loop) - a2(mov rsp, rbp) - a1(pop rbp) - a1(ret) -asm_naked_fn_end(scrypt_ChunkMix_sse2) - -#endif - - -/* intrinsic */ -#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) - -#define SCRYPT_SALSA64_SSE2 - -static void asm_calling_convention -scrypt_ChunkMix_sse2(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) { - uint32_t i, blocksPerChunk = r * 2, half = 0; - xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3; - size_t rounds; - - /* 1: X = B_{2r - 1} */ - xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); - x0 = xmmp[0]; - x1 = xmmp[1]; - x2 = xmmp[2]; - x3 = xmmp[3]; - x4 = xmmp[4]; - x5 = xmmp[5]; - x6 = xmmp[6]; - x7 = xmmp[7]; - - if (Bxor) { - xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); - x0 = _mm_xor_si128(x0, xmmp[0]); - x1 = _mm_xor_si128(x1, xmmp[1]); - x2 = _mm_xor_si128(x2, xmmp[2]); - x3 = _mm_xor_si128(x3, xmmp[3]); - x4 = _mm_xor_si128(x4, xmmp[4]); - x5 = _mm_xor_si128(x5, xmmp[5]); - x6 = _mm_xor_si128(x6, xmmp[6]); - x7 = _mm_xor_si128(x7, xmmp[7]); - } - - /* 2: for i = 0 to 2r - 1 do */ - for (i = 0; i < blocksPerChunk; i++, half ^= r) { - /* 3: X = H(X ^ B_i) */ - xmmp = (xmmi *)scrypt_block(Bin, i); - x0 = _mm_xor_si128(x0, xmmp[0]); - x1 = _mm_xor_si128(x1, xmmp[1]); - x2 = _mm_xor_si128(x2, xmmp[2]); - x3 = _mm_xor_si128(x3, xmmp[3]); - x4 = _mm_xor_si128(x4, xmmp[4]); - x5 = _mm_xor_si128(x5, xmmp[5]); - x6 = _mm_xor_si128(x6, xmmp[6]); - x7 = _mm_xor_si128(x7, xmmp[7]); - - if (Bxor) { - xmmp = (xmmi *)scrypt_block(Bxor, i); - x0 = _mm_xor_si128(x0, xmmp[0]); - x1 = _mm_xor_si128(x1, xmmp[1]); - x2 = _mm_xor_si128(x2, xmmp[2]); - x3 = _mm_xor_si128(x3, xmmp[3]); - x4 = _mm_xor_si128(x4, xmmp[4]); - x5 = _mm_xor_si128(x5, xmmp[5]); - x6 = _mm_xor_si128(x6, xmmp[6]); - x7 = _mm_xor_si128(x7, xmmp[7]); - } - - t0 = x0; - t1 = x1; - t2 = x2; - t3 = x3; - t4 = x4; - t5 = x5; - t6 = x6; - t7 = x7; - - for (rounds = 8; rounds; rounds -= 2) { - z0 = _mm_add_epi64(x0, x2); - z1 = _mm_add_epi64(x1, x3); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x6 = _mm_xor_si128(x6, z0); - x7 = _mm_xor_si128(x7, z1); - - z0 = _mm_add_epi64(x6, x0); - z1 = _mm_add_epi64(x7, x1); - z2 = _mm_srli_epi64(z0, 64-13); - z3 = _mm_srli_epi64(z1, 64-13); - z0 = _mm_slli_epi64(z0, 13); - z1 = _mm_slli_epi64(z1, 13); - x4 = _mm_xor_si128(x4, z2); - x5 = _mm_xor_si128(x5, z3); - x4 = _mm_xor_si128(x4, z0); - x5 = _mm_xor_si128(x5, z1); - - z0 = _mm_add_epi64(x4, x6); - z1 = _mm_add_epi64(x5, x7); - z2 = _mm_srli_epi64(z0, 64-39); - z3 = _mm_srli_epi64(z1, 64-39); - z0 = _mm_slli_epi64(z0, 39); - z1 = _mm_slli_epi64(z1, 39); - x2 = _mm_xor_si128(x2, z2); - x3 = _mm_xor_si128(x3, z3); - x2 = _mm_xor_si128(x2, z0); - x3 = _mm_xor_si128(x3, z1); - - z0 = _mm_add_epi64(x2, x4); - z1 = _mm_add_epi64(x3, x5); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x0 = _mm_xor_si128(x0, z0); - x1 = _mm_xor_si128(x1, z1); - - z0 = x4; - z1 = x5; - z2 = x2; - z3 = x3; - x4 = z1; - x5 = z0; - x2 = _mm_unpackhi_epi64(x7, _mm_unpacklo_epi64(x6, x6)); - x3 = _mm_unpackhi_epi64(x6, _mm_unpacklo_epi64(x7, x7)); - x6 = _mm_unpackhi_epi64(z2, _mm_unpacklo_epi64(z3, z3)); - x7 = _mm_unpackhi_epi64(z3, _mm_unpacklo_epi64(z2, z2)); - - z0 = _mm_add_epi64(x0, x2); - z1 = _mm_add_epi64(x1, x3); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x6 = _mm_xor_si128(x6, z0); - x7 = _mm_xor_si128(x7, z1); - - z0 = _mm_add_epi64(x6, x0); - z1 = _mm_add_epi64(x7, x1); - z2 = _mm_srli_epi64(z0, 64-13); - z3 = _mm_srli_epi64(z1, 64-13); - z0 = _mm_slli_epi64(z0, 13); - z1 = _mm_slli_epi64(z1, 13); - x4 = _mm_xor_si128(x4, z2); - x5 = _mm_xor_si128(x5, z3); - x4 = _mm_xor_si128(x4, z0); - x5 = _mm_xor_si128(x5, z1); - - z0 = _mm_add_epi64(x4, x6); - z1 = _mm_add_epi64(x5, x7); - z2 = _mm_srli_epi64(z0, 64-39); - z3 = _mm_srli_epi64(z1, 64-39); - z0 = _mm_slli_epi64(z0, 39); - z1 = _mm_slli_epi64(z1, 39); - x2 = _mm_xor_si128(x2, z2); - x3 = _mm_xor_si128(x3, z3); - x2 = _mm_xor_si128(x2, z0); - x3 = _mm_xor_si128(x3, z1); - - z0 = _mm_add_epi64(x2, x4); - z1 = _mm_add_epi64(x3, x5); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x0 = _mm_xor_si128(x0, z0); - x1 = _mm_xor_si128(x1, z1); - - z0 = x4; - z1 = x5; - z2 = x2; - z3 = x3; - x4 = z1; - x5 = z0; - x2 = _mm_unpackhi_epi64(x7, _mm_unpacklo_epi64(x6, x6)); - x3 = _mm_unpackhi_epi64(x6, _mm_unpacklo_epi64(x7, x7)); - x6 = _mm_unpackhi_epi64(z2, _mm_unpacklo_epi64(z3, z3)); - x7 = _mm_unpackhi_epi64(z3, _mm_unpacklo_epi64(z2, z2)); - } - - x0 = _mm_add_epi64(x0, t0); - x1 = _mm_add_epi64(x1, t1); - x2 = _mm_add_epi64(x2, t2); - x3 = _mm_add_epi64(x3, t3); - x4 = _mm_add_epi64(x4, t4); - x5 = _mm_add_epi64(x5, t5); - x6 = _mm_add_epi64(x6, t6); - x7 = _mm_add_epi64(x7, t7); - - /* 4: Y_i = X */ - /* 6: B'[0..r-1] = Y_even */ - /* 6: B'[r..2r-1] = Y_odd */ - xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); - xmmp[0] = x0; - xmmp[1] = x1; - xmmp[2] = x2; - xmmp[3] = x3; - xmmp[4] = x4; - xmmp[5] = x5; - xmmp[6] = x6; - xmmp[7] = x7; - } -} - -#endif - -#if defined(SCRYPT_SALSA64_SSE2) - #undef SCRYPT_MIX - #define SCRYPT_MIX "Salsa64/8-SSE2" - #undef SCRYPT_SALSA64_INCLUDED - #define SCRYPT_SALSA64_INCLUDED -#endif - -/* sse3/avx use this as well */ -#if defined(SCRYPT_SALSA64_INCLUDED) - /* - Default layout: - 0 1 2 3 - 4 5 6 7 - 8 9 10 11 - 12 13 14 15 - - SSE2 layout: - 0 5 10 15 - 12 1 6 11 - 8 13 2 7 - 4 9 14 3 - */ - - - static void asm_calling_convention - salsa64_core_tangle_sse2(uint64_t *blocks, size_t count) { - uint64_t t; - while (count--) { - t = blocks[1]; blocks[1] = blocks[5]; blocks[5] = t; - t = blocks[2]; blocks[2] = blocks[10]; blocks[10] = t; - t = blocks[3]; blocks[3] = blocks[15]; blocks[15] = t; - t = blocks[4]; blocks[4] = blocks[12]; blocks[12] = t; - t = blocks[7]; blocks[7] = blocks[11]; blocks[11] = t; - t = blocks[9]; blocks[9] = blocks[13]; blocks[13] = t; - blocks += 16; - } - } -#endif \ No newline at end of file diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-ssse3.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-ssse3.h deleted file mode 100644 index 21e94c99..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-ssse3.h +++ /dev/null @@ -1,399 +0,0 @@ -/* x64 */ -#if defined(X86_64ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) - -#define SCRYPT_SALSA64_SSSE3 - -asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) -asm_naked_fn(scrypt_ChunkMix_ssse3) - a1(push rbp) - a2(mov rbp, rsp) - a2(and rsp, ~63) - a2(sub rsp, 128) - a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ - a2(shl rcx,7) - a2(lea r9,[rcx-128]) - a2(lea rax,[rsi+r9]) - a2(lea r9,[rdx+r9]) - a2(and rdx, rdx) - a2(movdqa xmm0,[rax+0]) - a2(movdqa xmm1,[rax+16]) - a2(movdqa xmm2,[rax+32]) - a2(movdqa xmm3,[rax+48]) - a2(movdqa xmm4,[rax+64]) - a2(movdqa xmm5,[rax+80]) - a2(movdqa xmm6,[rax+96]) - a2(movdqa xmm7,[rax+112]) - aj(jz scrypt_ChunkMix_ssse3_no_xor1) - a2(pxor xmm0,[r9+0]) - a2(pxor xmm1,[r9+16]) - a2(pxor xmm2,[r9+32]) - a2(pxor xmm3,[r9+48]) - a2(pxor xmm4,[r9+64]) - a2(pxor xmm5,[r9+80]) - a2(pxor xmm6,[r9+96]) - a2(pxor xmm7,[r9+112]) - a1(scrypt_ChunkMix_ssse3_no_xor1:) - a2(xor r9,r9) - a2(xor r8,r8) - a1(scrypt_ChunkMix_ssse3_loop:) - a2(and rdx, rdx) - a2(pxor xmm0,[rsi+r9+0]) - a2(pxor xmm1,[rsi+r9+16]) - a2(pxor xmm2,[rsi+r9+32]) - a2(pxor xmm3,[rsi+r9+48]) - a2(pxor xmm4,[rsi+r9+64]) - a2(pxor xmm5,[rsi+r9+80]) - a2(pxor xmm6,[rsi+r9+96]) - a2(pxor xmm7,[rsi+r9+112]) - aj(jz scrypt_ChunkMix_ssse3_no_xor2) - a2(pxor xmm0,[rdx+r9+0]) - a2(pxor xmm1,[rdx+r9+16]) - a2(pxor xmm2,[rdx+r9+32]) - a2(pxor xmm3,[rdx+r9+48]) - a2(pxor xmm4,[rdx+r9+64]) - a2(pxor xmm5,[rdx+r9+80]) - a2(pxor xmm6,[rdx+r9+96]) - a2(pxor xmm7,[rdx+r9+112]) - a1(scrypt_ChunkMix_ssse3_no_xor2:) - a2(movdqa [rsp+0],xmm0) - a2(movdqa [rsp+16],xmm1) - a2(movdqa [rsp+32],xmm2) - a2(movdqa [rsp+48],xmm3) - a2(movdqa [rsp+64],xmm4) - a2(movdqa [rsp+80],xmm5) - a2(movdqa [rsp+96],xmm6) - a2(movdqa [rsp+112],xmm7) - a2(mov rax,8) - a1(scrypt_salsa64_ssse3_loop: ) - a2(movdqa xmm8, xmm0) - a2(movdqa xmm9, xmm1) - a2(paddq xmm8, xmm2) - a2(paddq xmm9, xmm3) - a3(pshufd xmm8, xmm8, 0xb1) - a3(pshufd xmm9, xmm9, 0xb1) - a2(pxor xmm6, xmm8) - a2(pxor xmm7, xmm9) - a2(movdqa xmm10, xmm0) - a2(movdqa xmm11, xmm1) - a2(paddq xmm10, xmm6) - a2(paddq xmm11, xmm7) - a2(movdqa xmm8, xmm10) - a2(movdqa xmm9, xmm11) - a2(psrlq xmm10, 51) - a2(psrlq xmm11, 51) - a2(psllq xmm8, 13) - a2(psllq xmm9, 13) - a2(pxor xmm4, xmm10) - a2(pxor xmm5, xmm11) - a2(pxor xmm4, xmm8) - a2(pxor xmm5, xmm9) - a2(movdqa xmm10, xmm6) - a2(movdqa xmm11, xmm7) - a2(paddq xmm10, xmm4) - a2(paddq xmm11, xmm5) - a2(movdqa xmm8, xmm10) - a2(movdqa xmm9, xmm11) - a2(psrlq xmm10, 25) - a2(psrlq xmm11, 25) - a2(psllq xmm8, 39) - a2(psllq xmm9, 39) - a2(pxor xmm2, xmm10) - a2(pxor xmm3, xmm11) - a2(pxor xmm2, xmm8) - a2(pxor xmm3, xmm9) - a2(movdqa xmm8, xmm4) - a2(movdqa xmm9, xmm5) - a2(paddq xmm8, xmm2) - a2(paddq xmm9, xmm3) - a3(pshufd xmm8, xmm8, 0xb1) - a3(pshufd xmm9, xmm9, 0xb1) - a2(pxor xmm0, xmm8) - a2(pxor xmm1, xmm9) - a2(movdqa xmm10, xmm2) - a2(movdqa xmm11, xmm3) - a2(movdqa xmm2, xmm6) - a2(movdqa xmm3, xmm7) - a3(palignr xmm2, xmm7, 8) - a3(palignr xmm3, xmm6, 8) - a2(movdqa xmm6, xmm11) - a2(movdqa xmm7, xmm10) - a3(palignr xmm6, xmm10, 8) - a3(palignr xmm7, xmm11, 8) - a2(sub rax, 2) - a2(movdqa xmm8, xmm0) - a2(movdqa xmm9, xmm1) - a2(paddq xmm8, xmm2) - a2(paddq xmm9, xmm3) - a3(pshufd xmm8, xmm8, 0xb1) - a3(pshufd xmm9, xmm9, 0xb1) - a2(pxor xmm6, xmm8) - a2(pxor xmm7, xmm9) - a2(movdqa xmm10, xmm0) - a2(movdqa xmm11, xmm1) - a2(paddq xmm10, xmm6) - a2(paddq xmm11, xmm7) - a2(movdqa xmm8, xmm10) - a2(movdqa xmm9, xmm11) - a2(psrlq xmm10, 51) - a2(psrlq xmm11, 51) - a2(psllq xmm8, 13) - a2(psllq xmm9, 13) - a2(pxor xmm5, xmm10) - a2(pxor xmm4, xmm11) - a2(pxor xmm5, xmm8) - a2(pxor xmm4, xmm9) - a2(movdqa xmm10, xmm6) - a2(movdqa xmm11, xmm7) - a2(paddq xmm10, xmm5) - a2(paddq xmm11, xmm4) - a2(movdqa xmm8, xmm10) - a2(movdqa xmm9, xmm11) - a2(psrlq xmm10, 25) - a2(psrlq xmm11, 25) - a2(psllq xmm8, 39) - a2(psllq xmm9, 39) - a2(pxor xmm2, xmm10) - a2(pxor xmm3, xmm11) - a2(pxor xmm2, xmm8) - a2(pxor xmm3, xmm9) - a2(movdqa xmm8, xmm5) - a2(movdqa xmm9, xmm4) - a2(paddq xmm8, xmm2) - a2(paddq xmm9, xmm3) - a3(pshufd xmm8, xmm8, 0xb1) - a3(pshufd xmm9, xmm9, 0xb1) - a2(pxor xmm0, xmm8) - a2(pxor xmm1, xmm9) - a2(movdqa xmm10, xmm2) - a2(movdqa xmm11, xmm3) - a2(movdqa xmm2, xmm6) - a2(movdqa xmm3, xmm7) - a3(palignr xmm2, xmm7, 8) - a3(palignr xmm3, xmm6, 8) - a2(movdqa xmm6, xmm11) - a2(movdqa xmm7, xmm10) - a3(palignr xmm6, xmm10, 8) - a3(palignr xmm7, xmm11, 8) - aj(ja scrypt_salsa64_ssse3_loop) - a2(paddq xmm0,[rsp+0]) - a2(paddq xmm1,[rsp+16]) - a2(paddq xmm2,[rsp+32]) - a2(paddq xmm3,[rsp+48]) - a2(paddq xmm4,[rsp+64]) - a2(paddq xmm5,[rsp+80]) - a2(paddq xmm6,[rsp+96]) - a2(paddq xmm7,[rsp+112]) - a2(lea rax,[r8+r9]) - a2(xor r8,rcx) - a2(and rax,~0xff) - a2(add r9,128) - a2(shr rax,1) - a2(add rax, rdi) - a2(cmp r9,rcx) - a2(movdqa [rax+0],xmm0) - a2(movdqa [rax+16],xmm1) - a2(movdqa [rax+32],xmm2) - a2(movdqa [rax+48],xmm3) - a2(movdqa [rax+64],xmm4) - a2(movdqa [rax+80],xmm5) - a2(movdqa [rax+96],xmm6) - a2(movdqa [rax+112],xmm7) - aj(jne scrypt_ChunkMix_ssse3_loop) - a2(mov rsp, rbp) - a1(pop rbp) - a1(ret) -asm_naked_fn_end(scrypt_ChunkMix_ssse3) - -#endif - - -/* intrinsic */ -#if defined(X86_INTRINSIC_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) - -#define SCRYPT_SALSA64_SSSE3 - -static void asm_calling_convention -scrypt_ChunkMix_ssse3(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) { - uint32_t i, blocksPerChunk = r * 2, half = 0; - xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3; - size_t rounds; - - /* 1: X = B_{2r - 1} */ - xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); - x0 = xmmp[0]; - x1 = xmmp[1]; - x2 = xmmp[2]; - x3 = xmmp[3]; - x4 = xmmp[4]; - x5 = xmmp[5]; - x6 = xmmp[6]; - x7 = xmmp[7]; - - if (Bxor) { - xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); - x0 = _mm_xor_si128(x0, xmmp[0]); - x1 = _mm_xor_si128(x1, xmmp[1]); - x2 = _mm_xor_si128(x2, xmmp[2]); - x3 = _mm_xor_si128(x3, xmmp[3]); - x4 = _mm_xor_si128(x4, xmmp[4]); - x5 = _mm_xor_si128(x5, xmmp[5]); - x6 = _mm_xor_si128(x6, xmmp[6]); - x7 = _mm_xor_si128(x7, xmmp[7]); - } - - /* 2: for i = 0 to 2r - 1 do */ - for (i = 0; i < blocksPerChunk; i++, half ^= r) { - /* 3: X = H(X ^ B_i) */ - xmmp = (xmmi *)scrypt_block(Bin, i); - x0 = _mm_xor_si128(x0, xmmp[0]); - x1 = _mm_xor_si128(x1, xmmp[1]); - x2 = _mm_xor_si128(x2, xmmp[2]); - x3 = _mm_xor_si128(x3, xmmp[3]); - x4 = _mm_xor_si128(x4, xmmp[4]); - x5 = _mm_xor_si128(x5, xmmp[5]); - x6 = _mm_xor_si128(x6, xmmp[6]); - x7 = _mm_xor_si128(x7, xmmp[7]); - - if (Bxor) { - xmmp = (xmmi *)scrypt_block(Bxor, i); - x0 = _mm_xor_si128(x0, xmmp[0]); - x1 = _mm_xor_si128(x1, xmmp[1]); - x2 = _mm_xor_si128(x2, xmmp[2]); - x3 = _mm_xor_si128(x3, xmmp[3]); - x4 = _mm_xor_si128(x4, xmmp[4]); - x5 = _mm_xor_si128(x5, xmmp[5]); - x6 = _mm_xor_si128(x6, xmmp[6]); - x7 = _mm_xor_si128(x7, xmmp[7]); - } - - t0 = x0; - t1 = x1; - t2 = x2; - t3 = x3; - t4 = x4; - t5 = x5; - t6 = x6; - t7 = x7; - - for (rounds = 8; rounds; rounds -= 2) { - z0 = _mm_add_epi64(x0, x2); - z1 = _mm_add_epi64(x1, x3); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x6 = _mm_xor_si128(x6, z0); - x7 = _mm_xor_si128(x7, z1); - - z0 = _mm_add_epi64(x6, x0); - z1 = _mm_add_epi64(x7, x1); - z2 = _mm_srli_epi64(z0, 64-13); - z3 = _mm_srli_epi64(z1, 64-13); - z0 = _mm_slli_epi64(z0, 13); - z1 = _mm_slli_epi64(z1, 13); - x4 = _mm_xor_si128(x4, z2); - x5 = _mm_xor_si128(x5, z3); - x4 = _mm_xor_si128(x4, z0); - x5 = _mm_xor_si128(x5, z1); - - z0 = _mm_add_epi64(x4, x6); - z1 = _mm_add_epi64(x5, x7); - z2 = _mm_srli_epi64(z0, 64-39); - z3 = _mm_srli_epi64(z1, 64-39); - z0 = _mm_slli_epi64(z0, 39); - z1 = _mm_slli_epi64(z1, 39); - x2 = _mm_xor_si128(x2, z2); - x3 = _mm_xor_si128(x3, z3); - x2 = _mm_xor_si128(x2, z0); - x3 = _mm_xor_si128(x3, z1); - - z0 = _mm_add_epi64(x2, x4); - z1 = _mm_add_epi64(x3, x5); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x0 = _mm_xor_si128(x0, z0); - x1 = _mm_xor_si128(x1, z1); - - z0 = x2; - z1 = x3; - x2 = _mm_alignr_epi8(x6, x7, 8); - x3 = _mm_alignr_epi8(x7, x6, 8); - x6 = _mm_alignr_epi8(z1, z0, 8); - x7 = _mm_alignr_epi8(z0, z1, 8); - - z0 = _mm_add_epi64(x0, x2); - z1 = _mm_add_epi64(x1, x3); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x6 = _mm_xor_si128(x6, z0); - x7 = _mm_xor_si128(x7, z1); - - z0 = _mm_add_epi64(x6, x0); - z1 = _mm_add_epi64(x7, x1); - z2 = _mm_srli_epi64(z0, 64-13); - z3 = _mm_srli_epi64(z1, 64-13); - z0 = _mm_slli_epi64(z0, 13); - z1 = _mm_slli_epi64(z1, 13); - x5 = _mm_xor_si128(x5, z2); - x4 = _mm_xor_si128(x4, z3); - x5 = _mm_xor_si128(x5, z0); - x4 = _mm_xor_si128(x4, z1); - - z0 = _mm_add_epi64(x5, x6); - z1 = _mm_add_epi64(x4, x7); - z2 = _mm_srli_epi64(z0, 64-39); - z3 = _mm_srli_epi64(z1, 64-39); - z0 = _mm_slli_epi64(z0, 39); - z1 = _mm_slli_epi64(z1, 39); - x2 = _mm_xor_si128(x2, z2); - x3 = _mm_xor_si128(x3, z3); - x2 = _mm_xor_si128(x2, z0); - x3 = _mm_xor_si128(x3, z1); - - z0 = _mm_add_epi64(x2, x5); - z1 = _mm_add_epi64(x3, x4); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x0 = _mm_xor_si128(x0, z0); - x1 = _mm_xor_si128(x1, z1); - - z0 = x2; - z1 = x3; - x2 = _mm_alignr_epi8(x6, x7, 8); - x3 = _mm_alignr_epi8(x7, x6, 8); - x6 = _mm_alignr_epi8(z1, z0, 8); - x7 = _mm_alignr_epi8(z0, z1, 8); - } - - x0 = _mm_add_epi64(x0, t0); - x1 = _mm_add_epi64(x1, t1); - x2 = _mm_add_epi64(x2, t2); - x3 = _mm_add_epi64(x3, t3); - x4 = _mm_add_epi64(x4, t4); - x5 = _mm_add_epi64(x5, t5); - x6 = _mm_add_epi64(x6, t6); - x7 = _mm_add_epi64(x7, t7); - - /* 4: Y_i = X */ - /* 6: B'[0..r-1] = Y_even */ - /* 6: B'[r..2r-1] = Y_odd */ - xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); - xmmp[0] = x0; - xmmp[1] = x1; - xmmp[2] = x2; - xmmp[3] = x3; - xmmp[4] = x4; - xmmp[5] = x5; - xmmp[6] = x6; - xmmp[7] = x7; - } -} - -#endif - -#if defined(SCRYPT_SALSA64_SSSE3) - /* uses salsa64_core_tangle_sse2 */ - - #undef SCRYPT_MIX - #define SCRYPT_MIX "Salsa64/8-SSSE3" - #undef SCRYPT_SALSA64_INCLUDED - #define SCRYPT_SALSA64_INCLUDED -#endif diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-xop.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-xop.h deleted file mode 100644 index 94852471..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64-xop.h +++ /dev/null @@ -1,335 +0,0 @@ -/* x64 */ -#if defined(X86_64ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) - -#define SCRYPT_SALSA64_XOP - -asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) -asm_naked_fn(scrypt_ChunkMix_xop) - a1(push rbp) - a2(mov rbp, rsp) - a2(and rsp, ~63) - a2(sub rsp, 128) - a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ - a2(shl rcx,7) - a2(lea r9,[rcx-128]) - a2(lea rax,[rsi+r9]) - a2(lea r9,[rdx+r9]) - a2(and rdx, rdx) - a2(vmovdqa xmm0,[rax+0]) - a2(vmovdqa xmm1,[rax+16]) - a2(vmovdqa xmm2,[rax+32]) - a2(vmovdqa xmm3,[rax+48]) - a2(vmovdqa xmm4,[rax+64]) - a2(vmovdqa xmm5,[rax+80]) - a2(vmovdqa xmm6,[rax+96]) - a2(vmovdqa xmm7,[rax+112]) - aj(jz scrypt_ChunkMix_xop_no_xor1) - a3(vpxor xmm0,xmm0,[r9+0]) - a3(vpxor xmm1,xmm1,[r9+16]) - a3(vpxor xmm2,xmm2,[r9+32]) - a3(vpxor xmm3,xmm3,[r9+48]) - a3(vpxor xmm4,xmm4,[r9+64]) - a3(vpxor xmm5,xmm5,[r9+80]) - a3(vpxor xmm6,xmm6,[r9+96]) - a3(vpxor xmm7,xmm7,[r9+112]) - a1(scrypt_ChunkMix_xop_no_xor1:) - a2(xor r9,r9) - a2(xor r8,r8) - a1(scrypt_ChunkMix_xop_loop:) - a2(and rdx, rdx) - a3(vpxor xmm0,xmm0,[rsi+r9+0]) - a3(vpxor xmm1,xmm1,[rsi+r9+16]) - a3(vpxor xmm2,xmm2,[rsi+r9+32]) - a3(vpxor xmm3,xmm3,[rsi+r9+48]) - a3(vpxor xmm4,xmm4,[rsi+r9+64]) - a3(vpxor xmm5,xmm5,[rsi+r9+80]) - a3(vpxor xmm6,xmm6,[rsi+r9+96]) - a3(vpxor xmm7,xmm7,[rsi+r9+112]) - aj(jz scrypt_ChunkMix_xop_no_xor2) - a3(vpxor xmm0,xmm0,[rdx+r9+0]) - a3(vpxor xmm1,xmm1,[rdx+r9+16]) - a3(vpxor xmm2,xmm2,[rdx+r9+32]) - a3(vpxor xmm3,xmm3,[rdx+r9+48]) - a3(vpxor xmm4,xmm4,[rdx+r9+64]) - a3(vpxor xmm5,xmm5,[rdx+r9+80]) - a3(vpxor xmm6,xmm6,[rdx+r9+96]) - a3(vpxor xmm7,xmm7,[rdx+r9+112]) - a1(scrypt_ChunkMix_xop_no_xor2:) - a2(vmovdqa [rsp+0],xmm0) - a2(vmovdqa [rsp+16],xmm1) - a2(vmovdqa [rsp+32],xmm2) - a2(vmovdqa [rsp+48],xmm3) - a2(vmovdqa [rsp+64],xmm4) - a2(vmovdqa [rsp+80],xmm5) - a2(vmovdqa [rsp+96],xmm6) - a2(vmovdqa [rsp+112],xmm7) - a2(mov rax,8) - a1(scrypt_salsa64_xop_loop: ) - a3(vpaddq xmm8, xmm0, xmm2) - a3(vpaddq xmm9, xmm1, xmm3) - a3(vpshufd xmm8, xmm8, 0xb1) - a3(vpshufd xmm9, xmm9, 0xb1) - a3(vpxor xmm6, xmm6, xmm8) - a3(vpxor xmm7, xmm7, xmm9) - a3(vpaddq xmm10, xmm0, xmm6) - a3(vpaddq xmm11, xmm1, xmm7) - a3(vprotq xmm10, xmm10, 13) - a3(vprotq xmm11, xmm11, 13) - a3(vpxor xmm4, xmm4, xmm10) - a3(vpxor xmm5, xmm5, xmm11) - a3(vpaddq xmm8, xmm6, xmm4) - a3(vpaddq xmm9, xmm7, xmm5) - a3(vprotq xmm8, xmm8, 39) - a3(vprotq xmm9, xmm9, 39) - a3(vpxor xmm2, xmm2, xmm8) - a3(vpxor xmm3, xmm3, xmm9) - a3(vpaddq xmm10, xmm4, xmm2) - a3(vpaddq xmm11, xmm5, xmm3) - a3(vpshufd xmm10, xmm10, 0xb1) - a3(vpshufd xmm11, xmm11, 0xb1) - a3(vpxor xmm0, xmm0, xmm10) - a3(vpxor xmm1, xmm1, xmm11) - a2(vmovdqa xmm8, xmm2) - a2(vmovdqa xmm9, xmm3) - a4(vpalignr xmm2, xmm6, xmm7, 8) - a4(vpalignr xmm3, xmm7, xmm6, 8) - a4(vpalignr xmm6, xmm9, xmm8, 8) - a4(vpalignr xmm7, xmm8, xmm9, 8) - a3(vpaddq xmm10, xmm0, xmm2) - a3(vpaddq xmm11, xmm1, xmm3) - a3(vpshufd xmm10, xmm10, 0xb1) - a3(vpshufd xmm11, xmm11, 0xb1) - a3(vpxor xmm6, xmm6, xmm10) - a3(vpxor xmm7, xmm7, xmm11) - a3(vpaddq xmm8, xmm0, xmm6) - a3(vpaddq xmm9, xmm1, xmm7) - a3(vprotq xmm8, xmm8, 13) - a3(vprotq xmm9, xmm9, 13) - a3(vpxor xmm5, xmm5, xmm8) - a3(vpxor xmm4, xmm4, xmm9) - a3(vpaddq xmm10, xmm6, xmm5) - a3(vpaddq xmm11, xmm7, xmm4) - a3(vprotq xmm10, xmm10, 39) - a3(vprotq xmm11, xmm11, 39) - a3(vpxor xmm2, xmm2, xmm10) - a3(vpxor xmm3, xmm3, xmm11) - a3(vpaddq xmm8, xmm5, xmm2) - a3(vpaddq xmm9, xmm4, xmm3) - a3(vpshufd xmm8, xmm8, 0xb1) - a3(vpshufd xmm9, xmm9, 0xb1) - a3(vpxor xmm0, xmm0, xmm8) - a3(vpxor xmm1, xmm1, xmm9) - a2(vmovdqa xmm10, xmm2) - a2(vmovdqa xmm11, xmm3) - a4(vpalignr xmm2, xmm6, xmm7, 8) - a4(vpalignr xmm3, xmm7, xmm6, 8) - a4(vpalignr xmm6, xmm11, xmm10, 8) - a4(vpalignr xmm7, xmm10, xmm11, 8) - a2(sub rax, 2) - aj(ja scrypt_salsa64_xop_loop) - a3(vpaddq xmm0,xmm0,[rsp+0]) - a3(vpaddq xmm1,xmm1,[rsp+16]) - a3(vpaddq xmm2,xmm2,[rsp+32]) - a3(vpaddq xmm3,xmm3,[rsp+48]) - a3(vpaddq xmm4,xmm4,[rsp+64]) - a3(vpaddq xmm5,xmm5,[rsp+80]) - a3(vpaddq xmm6,xmm6,[rsp+96]) - a3(vpaddq xmm7,xmm7,[rsp+112]) - a2(lea rax,[r8+r9]) - a2(xor r8,rcx) - a2(and rax,~0xff) - a2(add r9,128) - a2(shr rax,1) - a2(add rax, rdi) - a2(cmp r9,rcx) - a2(vmovdqa [rax+0],xmm0) - a2(vmovdqa [rax+16],xmm1) - a2(vmovdqa [rax+32],xmm2) - a2(vmovdqa [rax+48],xmm3) - a2(vmovdqa [rax+64],xmm4) - a2(vmovdqa [rax+80],xmm5) - a2(vmovdqa [rax+96],xmm6) - a2(vmovdqa [rax+112],xmm7) - aj(jne scrypt_ChunkMix_xop_loop) - a2(mov rsp, rbp) - a1(pop rbp) - a1(ret) -asm_naked_fn_end(scrypt_ChunkMix_xop) - -#endif - - -/* intrinsic */ -#if defined(X86_INTRINSIC_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) - -#define SCRYPT_SALSA64_XOP - -static void asm_calling_convention -scrypt_ChunkMix_xop(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) { - uint32_t i, blocksPerChunk = r * 2, half = 0; - xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1; - size_t rounds; - - /* 1: X = B_{2r - 1} */ - xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); - x0 = xmmp[0]; - x1 = xmmp[1]; - x2 = xmmp[2]; - x3 = xmmp[3]; - x4 = xmmp[4]; - x5 = xmmp[5]; - x6 = xmmp[6]; - x7 = xmmp[7]; - - if (Bxor) { - xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); - x0 = _mm_xor_si128(x0, xmmp[0]); - x1 = _mm_xor_si128(x1, xmmp[1]); - x2 = _mm_xor_si128(x2, xmmp[2]); - x3 = _mm_xor_si128(x3, xmmp[3]); - x4 = _mm_xor_si128(x4, xmmp[4]); - x5 = _mm_xor_si128(x5, xmmp[5]); - x6 = _mm_xor_si128(x6, xmmp[6]); - x7 = _mm_xor_si128(x7, xmmp[7]); - } - - /* 2: for i = 0 to 2r - 1 do */ - for (i = 0; i < blocksPerChunk; i++, half ^= r) { - /* 3: X = H(X ^ B_i) */ - xmmp = (xmmi *)scrypt_block(Bin, i); - x0 = _mm_xor_si128(x0, xmmp[0]); - x1 = _mm_xor_si128(x1, xmmp[1]); - x2 = _mm_xor_si128(x2, xmmp[2]); - x3 = _mm_xor_si128(x3, xmmp[3]); - x4 = _mm_xor_si128(x4, xmmp[4]); - x5 = _mm_xor_si128(x5, xmmp[5]); - x6 = _mm_xor_si128(x6, xmmp[6]); - x7 = _mm_xor_si128(x7, xmmp[7]); - - if (Bxor) { - xmmp = (xmmi *)scrypt_block(Bxor, i); - x0 = _mm_xor_si128(x0, xmmp[0]); - x1 = _mm_xor_si128(x1, xmmp[1]); - x2 = _mm_xor_si128(x2, xmmp[2]); - x3 = _mm_xor_si128(x3, xmmp[3]); - x4 = _mm_xor_si128(x4, xmmp[4]); - x5 = _mm_xor_si128(x5, xmmp[5]); - x6 = _mm_xor_si128(x6, xmmp[6]); - x7 = _mm_xor_si128(x7, xmmp[7]); - } - - t0 = x0; - t1 = x1; - t2 = x2; - t3 = x3; - t4 = x4; - t5 = x5; - t6 = x6; - t7 = x7; - - for (rounds = 8; rounds; rounds -= 2) { - z0 = _mm_add_epi64(x0, x2); - z1 = _mm_add_epi64(x1, x3); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x6 = _mm_xor_si128(x6, z0); - x7 = _mm_xor_si128(x7, z1); - - z0 = _mm_add_epi64(x6, x0); - z1 = _mm_add_epi64(x7, x1); - z0 = _mm_roti_epi64(z0, 13); - z1 = _mm_roti_epi64(z1, 13); - x4 = _mm_xor_si128(x4, z0); - x5 = _mm_xor_si128(x5, z1); - - z0 = _mm_add_epi64(x4, x6); - z1 = _mm_add_epi64(x5, x7); - z0 = _mm_roti_epi64(z0, 39); - z1 = _mm_roti_epi64(z1, 39); - x2 = _mm_xor_si128(x2, z0); - x3 = _mm_xor_si128(x3, z1); - - z0 = _mm_add_epi64(x2, x4); - z1 = _mm_add_epi64(x3, x5); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x0 = _mm_xor_si128(x0, z0); - x1 = _mm_xor_si128(x1, z1); - - z0 = x2; - z1 = x3; - x2 = _mm_alignr_epi8(x6, x7, 8); - x3 = _mm_alignr_epi8(x7, x6, 8); - x6 = _mm_alignr_epi8(z1, z0, 8); - x7 = _mm_alignr_epi8(z0, z1, 8); - - z0 = _mm_add_epi64(x0, x2); - z1 = _mm_add_epi64(x1, x3); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x6 = _mm_xor_si128(x6, z0); - x7 = _mm_xor_si128(x7, z1); - - z0 = _mm_add_epi64(x6, x0); - z1 = _mm_add_epi64(x7, x1); - z0 = _mm_roti_epi64(z0, 13); - z1 = _mm_roti_epi64(z1, 13); - x5 = _mm_xor_si128(x5, z0); - x4 = _mm_xor_si128(x4, z1); - - z0 = _mm_add_epi64(x5, x6); - z1 = _mm_add_epi64(x4, x7); - z0 = _mm_roti_epi64(z0, 39); - z1 = _mm_roti_epi64(z1, 39); - x2 = _mm_xor_si128(x2, z0); - x3 = _mm_xor_si128(x3, z1); - - z0 = _mm_add_epi64(x2, x5); - z1 = _mm_add_epi64(x3, x4); - z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); - z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); - x0 = _mm_xor_si128(x0, z0); - x1 = _mm_xor_si128(x1, z1); - - z0 = x2; - z1 = x3; - x2 = _mm_alignr_epi8(x6, x7, 8); - x3 = _mm_alignr_epi8(x7, x6, 8); - x6 = _mm_alignr_epi8(z1, z0, 8); - x7 = _mm_alignr_epi8(z0, z1, 8); - } - - x0 = _mm_add_epi64(x0, t0); - x1 = _mm_add_epi64(x1, t1); - x2 = _mm_add_epi64(x2, t2); - x3 = _mm_add_epi64(x3, t3); - x4 = _mm_add_epi64(x4, t4); - x5 = _mm_add_epi64(x5, t5); - x6 = _mm_add_epi64(x6, t6); - x7 = _mm_add_epi64(x7, t7); - - /* 4: Y_i = X */ - /* 6: B'[0..r-1] = Y_even */ - /* 6: B'[r..2r-1] = Y_odd */ - xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); - xmmp[0] = x0; - xmmp[1] = x1; - xmmp[2] = x2; - xmmp[3] = x3; - xmmp[4] = x4; - xmmp[5] = x5; - xmmp[6] = x6; - xmmp[7] = x7; - } -} - -#endif - -#if defined(SCRYPT_SALSA64_XOP) - /* uses salsa64_core_tangle_sse2 */ - - #undef SCRYPT_MIX - #define SCRYPT_MIX "Salsa64/8-XOP" - #undef SCRYPT_SALSA64_INCLUDED - #define SCRYPT_SALSA64_INCLUDED -#endif diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64.h deleted file mode 100644 index 2aec04f3..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-mix_salsa64.h +++ /dev/null @@ -1,41 +0,0 @@ -#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED) - -#undef SCRYPT_MIX -#define SCRYPT_MIX "Salsa64/8 Ref" - -#undef SCRYPT_SALSA64_INCLUDED -#define SCRYPT_SALSA64_INCLUDED -#define SCRYPT_SALSA64_BASIC - -static void -salsa64_core_basic(uint64_t state[16]) { - const size_t rounds = 8; - uint64_t v[16], t; - size_t i; - - for (i = 0; i < 16; i++) v[i] = state[i]; - - #define G(a,b,c,d) \ - t = v[a]+v[d]; t = ROTL64(t, 32); v[b] ^= t; \ - t = v[b]+v[a]; t = ROTL64(t, 13); v[c] ^= t; \ - t = v[c]+v[b]; t = ROTL64(t, 39); v[d] ^= t; \ - t = v[d]+v[c]; t = ROTL64(t, 32); v[a] ^= t; \ - - for (i = 0; i < rounds; i += 2) { - G( 0, 4, 8,12); - G( 5, 9,13, 1); - G(10,14, 2, 6); - G(15, 3, 7,11); - G( 0, 1, 2, 3); - G( 5, 6, 7, 4); - G(10,11, 8, 9); - G(15,12,13,14); - } - - for (i = 0; i < 16; i++) state[i] += v[i]; - - #undef G -} - -#endif - diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-pbkdf2.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-pbkdf2.h deleted file mode 100644 index ddd8742b..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-pbkdf2.h +++ /dev/null @@ -1,112 +0,0 @@ -typedef struct scrypt_hmac_state_t { - scrypt_hash_state inner, outer; -} scrypt_hmac_state; - - -static void -scrypt_hash(scrypt_hash_digest hash, const uint8_t *m, size_t mlen) { - scrypt_hash_state st; - scrypt_hash_init(&st); - scrypt_hash_update(&st, m, mlen); - scrypt_hash_finish(&st, hash); -} - -/* hmac */ -static void -scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen) { - uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0}; - size_t i; - - scrypt_hash_init(&st->inner); - scrypt_hash_init(&st->outer); - - if (keylen <= SCRYPT_HASH_BLOCK_SIZE) { - /* use the key directly if it's <= blocksize bytes */ - memcpy(pad, key, keylen); - } else { - /* if it's > blocksize bytes, hash it */ - scrypt_hash(pad, key, keylen); - } - - /* inner = (key ^ 0x36) */ - /* h(inner || ...) */ - for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++) - pad[i] ^= 0x36; - scrypt_hash_update(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE); - - /* outer = (key ^ 0x5c) */ - /* h(outer || ...) */ - for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++) - pad[i] ^= (0x5c ^ 0x36); - scrypt_hash_update(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE); - - scrypt_ensure_zero(pad, sizeof(pad)); -} - -static void -scrypt_hmac_update(scrypt_hmac_state *st, const uint8_t *m, size_t mlen) { - /* h(inner || m...) */ - scrypt_hash_update(&st->inner, m, mlen); -} - -static void -scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac) { - /* h(inner || m) */ - scrypt_hash_digest innerhash; - scrypt_hash_finish(&st->inner, innerhash); - - /* h(outer || h(inner || m)) */ - scrypt_hash_update(&st->outer, innerhash, sizeof(innerhash)); - scrypt_hash_finish(&st->outer, mac); - - scrypt_ensure_zero(st, sizeof(*st)); -} - -static void -scrypt_pbkdf2(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint64_t N, uint8_t *out, size_t bytes) { - scrypt_hmac_state hmac_pw, hmac_pw_salt, work; - scrypt_hash_digest ti, u; - uint8_t be[4]; - uint32_t i, j, blocks; - uint64_t c; - - /* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */ - - /* hmac(password, ...) */ - scrypt_hmac_init(&hmac_pw, password, password_len); - - /* hmac(password, salt...) */ - hmac_pw_salt = hmac_pw; - scrypt_hmac_update(&hmac_pw_salt, salt, salt_len); - - blocks = ((uint32_t)bytes + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE; - for (i = 1; i <= blocks; i++) { - /* U1 = hmac(password, salt || be(i)) */ - U32TO8_BE(be, i); - work = hmac_pw_salt; - scrypt_hmac_update(&work, be, 4); - scrypt_hmac_finish(&work, ti); - memcpy(u, ti, sizeof(u)); - - /* T[i] = U1 ^ U2 ^ U3... */ - for (c = 0; c < N - 1; c++) { - /* UX = hmac(password, U{X-1}) */ - work = hmac_pw; - scrypt_hmac_update(&work, u, SCRYPT_HASH_DIGEST_SIZE); - scrypt_hmac_finish(&work, u); - - /* T[i] ^= UX */ - for (j = 0; j < sizeof(u); j++) - ti[j] ^= u[j]; - } - - memcpy(out, ti, (bytes > SCRYPT_HASH_DIGEST_SIZE) ? SCRYPT_HASH_DIGEST_SIZE : bytes); - out += SCRYPT_HASH_DIGEST_SIZE; - bytes -= SCRYPT_HASH_DIGEST_SIZE; - } - - scrypt_ensure_zero(ti, sizeof(ti)); - scrypt_ensure_zero(u, sizeof(u)); - scrypt_ensure_zero(&hmac_pw, sizeof(hmac_pw)); - scrypt_ensure_zero(&hmac_pw_salt, sizeof(hmac_pw_salt)); -} diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-portable-x86.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-portable-x86.h deleted file mode 100644 index c2f9edcc..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-portable-x86.h +++ /dev/null @@ -1,463 +0,0 @@ -#if defined(CPU_X86) && (defined(COMPILER_MSVC) || defined(COMPILER_GCC)) - #define X86ASM - - /* gcc 2.95 royally screws up stack alignments on variables */ - #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS6PP)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 30000))) - #define X86ASM_SSE - #define X86ASM_SSE2 - #endif - #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2005)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40102))) - #define X86ASM_SSSE3 - #endif - #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2010SP1)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40400))) - #define X86ASM_AVX - #define X86ASM_XOP - #endif - #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2012)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40700))) - #define X86ASM_AVX2 - #endif -#endif - -#if defined(CPU_X86_64) && defined(COMPILER_GCC) - #define X86_64ASM - #define X86_64ASM_SSE2 - #if (COMPILER_GCC >= 40102) - #define X86_64ASM_SSSE3 - #endif - #if (COMPILER_GCC >= 40400) - #define X86_64ASM_AVX - #define X86_64ASM_XOP - #endif - #if (COMPILER_GCC >= 40700) - #define X86_64ASM_AVX2 - #endif -#endif - -#if defined(COMPILER_MSVC) && (defined(CPU_X86_FORCE_INTRINSICS) || defined(CPU_X86_64)) - #define X86_INTRINSIC - #if defined(CPU_X86_64) || defined(X86ASM_SSE) - #define X86_INTRINSIC_SSE - #endif - #if defined(CPU_X86_64) || defined(X86ASM_SSE2) - #define X86_INTRINSIC_SSE2 - #endif - #if (COMPILER_MSVC >= COMPILER_MSVC_VS2005) - #define X86_INTRINSIC_SSSE3 - #endif - #if (COMPILER_MSVC >= COMPILER_MSVC_VS2010SP1) - #define X86_INTRINSIC_AVX - #define X86_INTRINSIC_XOP - #endif - #if (COMPILER_MSVC >= COMPILER_MSVC_VS2012) - #define X86_INTRINSIC_AVX2 - #endif -#endif - -#if defined(COMPILER_GCC) && defined(CPU_X86_FORCE_INTRINSICS) - #define X86_INTRINSIC - #if defined(__SSE__) - #define X86_INTRINSIC_SSE - #endif - #if defined(__SSE2__) - #define X86_INTRINSIC_SSE2 - #endif - #if defined(__SSSE3__) - #define X86_INTRINSIC_SSSE3 - #endif - #if defined(__AVX__) - #define X86_INTRINSIC_AVX - #endif - #if defined(__XOP__) - #define X86_INTRINSIC_XOP - #endif - #if defined(__AVX2__) - #define X86_INTRINSIC_AVX2 - #endif -#endif - -/* only use simd on windows (or SSE2 on gcc)! */ -#if defined(CPU_X86_FORCE_INTRINSICS) || defined(X86_INTRINSIC) - #if defined(X86_INTRINSIC_SSE) - #include - #include - typedef __m64 qmm; - typedef __m128 xmm; - typedef __m128d xmmd; - #endif - #if defined(X86_INTRINSIC_SSE2) - #include - typedef __m128i xmmi; - #endif - #if defined(X86_INTRINSIC_SSSE3) - #include - #endif - #if defined(X86_INTRINSIC_AVX) - #include - #endif - #if defined(X86_INTRINSIC_XOP) - #if defined(COMPILER_MSVC) - #include - #else - #include - #endif - #endif - #if defined(X86_INTRINSIC_AVX2) - typedef __m256i ymmi; - #endif -#endif - -#if defined(X86_INTRINSIC_SSE2) - typedef union packedelem8_t { - uint8_t u[16]; - xmmi v; - } packedelem8; - - typedef union packedelem32_t { - uint32_t u[4]; - xmmi v; - } packedelem32; - - typedef union packedelem64_t { - uint64_t u[2]; - xmmi v; - } packedelem64; -#else - typedef union packedelem8_t { - uint8_t u[16]; - uint32_t dw[4]; - } packedelem8; - - typedef union packedelem32_t { - uint32_t u[4]; - uint8_t b[16]; - } packedelem32; - - typedef union packedelem64_t { - uint64_t u[2]; - uint8_t b[16]; - } packedelem64; -#endif - -#if defined(X86_INTRINSIC_SSSE3) - static const packedelem8 ALIGN(16) ssse3_rotl16_32bit = {{2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13}}; - static const packedelem8 ALIGN(16) ssse3_rotl8_32bit = {{3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14}}; -#endif - -/* - x86 inline asm for gcc/msvc. usage: - - asm_naked_fn_proto(return_type, name) (type parm1, type parm2..) - asm_naked_fn(name) - a1(..) - a2(.., ..) - a3(.., .., ..) - 64bit OR 0 paramters: a1(ret) - 32bit AND n parameters: aret(4n), eg aret(16) for 4 parameters - asm_naked_fn_end(name) -*/ - -#if defined(X86ASM) || defined(X86_64ASM) - -#if defined(COMPILER_MSVC) - #pragma warning(disable : 4731) /* frame pointer modified by inline assembly */ - #define a1(x) __asm {x} - #define a2(x, y) __asm {x, y} - #define a3(x, y, z) __asm {x, y, z} - #define a4(x, y, z, w) __asm {x, y, z, w} - #define aj(x) __asm {x} - #define asm_align8 a1(ALIGN 8) - #define asm_align16 a1(ALIGN 16) - - #define asm_calling_convention STDCALL - #define aret(n) a1(ret n) - #define asm_naked_fn_proto(type, fn) static NAKED type asm_calling_convention fn - #define asm_naked_fn(fn) { - #define asm_naked_fn_end(fn) } -#elif defined(COMPILER_GCC) - #define GNU_AS1(x) #x ";\n" - #define GNU_AS2(x, y) #x ", " #y ";\n" - #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";\n" - #define GNU_AS4(x, y, z, w) #x ", " #y ", " #z ", " #w ";\n" - #define GNU_ASFN(x) "\n_" #x ":\n" #x ":\n" - #define GNU_ASJ(x) ".att_syntax prefix\n" #x "\n.intel_syntax noprefix\n" - - #define a1(x) GNU_AS1(x) - #define a2(x, y) GNU_AS2(x, y) - #define a3(x, y, z) GNU_AS3(x, y, z) - #define a4(x, y, z, w) GNU_AS4(x, y, z, w) - #define aj(x) GNU_ASJ(x) - #define asm_align8 ".p2align 3,,7" - #define asm_align16 ".p2align 4,,15" - - #if defined(OS_WINDOWS) - #define asm_calling_convention CDECL - #define aret(n) a1(ret) - - #if defined(X86_64ASM) - #define asm_naked_fn(fn) ; __asm__ ( \ - ".text\n" \ - asm_align16 GNU_ASFN(fn) \ - "subq $136, %rsp;" \ - "movdqa %xmm6, 0(%rsp);" \ - "movdqa %xmm7, 16(%rsp);" \ - "movdqa %xmm8, 32(%rsp);" \ - "movdqa %xmm9, 48(%rsp);" \ - "movdqa %xmm10, 64(%rsp);" \ - "movdqa %xmm11, 80(%rsp);" \ - "movdqa %xmm12, 96(%rsp);" \ - "movq %rdi, 112(%rsp);" \ - "movq %rsi, 120(%rsp);" \ - "movq %rcx, %rdi;" \ - "movq %rdx, %rsi;" \ - "movq %r8, %rdx;" \ - "movq %r9, %rcx;" \ - "call 1f;" \ - "movdqa 0(%rsp), %xmm6;" \ - "movdqa 16(%rsp), %xmm7;" \ - "movdqa 32(%rsp), %xmm8;" \ - "movdqa 48(%rsp), %xmm9;" \ - "movdqa 64(%rsp), %xmm10;" \ - "movdqa 80(%rsp), %xmm11;" \ - "movdqa 96(%rsp), %xmm12;" \ - "movq 112(%rsp), %rdi;" \ - "movq 120(%rsp), %rsi;" \ - "addq $136, %rsp;" \ - "ret;" \ - ".intel_syntax noprefix;" \ - ".p2align 4,,15;" \ - "1:;" - #else - #define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASFN(fn) - #endif - #else - #define asm_calling_convention STDCALL - #define aret(n) a1(ret n) - #define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASFN(fn) - #endif - - #define asm_naked_fn_proto(type, fn) extern type asm_calling_convention fn - #define asm_naked_fn_end(fn) ".att_syntax prefix;\n" ); - - #define asm_gcc() __asm__ __volatile__(".intel_syntax noprefix;\n" - #define asm_gcc_parms() ".att_syntax prefix;" - #define asm_gcc_trashed() __asm__ __volatile__("" ::: - #define asm_gcc_end() ); -#else - need x86 asm -#endif - -#endif /* X86ASM || X86_64ASM */ - - -#if defined(CPU_X86) || defined(CPU_X86_64) - -typedef enum cpu_flags_x86_t { - cpu_mmx = 1 << 0, - cpu_sse = 1 << 1, - cpu_sse2 = 1 << 2, - cpu_sse3 = 1 << 3, - cpu_ssse3 = 1 << 4, - cpu_sse4_1 = 1 << 5, - cpu_sse4_2 = 1 << 6, - cpu_avx = 1 << 7, - cpu_xop = 1 << 8, - cpu_avx2 = 1 << 9 -} cpu_flags_x86; - -typedef enum cpu_vendors_x86_t { - cpu_nobody, - cpu_intel, - cpu_amd -} cpu_vendors_x86; - -typedef struct x86_regs_t { - uint32_t eax, ebx, ecx, edx; -} x86_regs; - -#if defined(X86ASM) -asm_naked_fn_proto(int, has_cpuid)(void) -asm_naked_fn(has_cpuid) - a1(pushfd) - a1(pop eax) - a2(mov ecx, eax) - a2(xor eax, 0x200000) - a1(push eax) - a1(popfd) - a1(pushfd) - a1(pop eax) - a2(xor eax, ecx) - a2(shr eax, 21) - a2(and eax, 1) - a1(push ecx) - a1(popfd) - a1(ret) -asm_naked_fn_end(has_cpuid) -#endif /* X86ASM */ - - -static void NOINLINE -get_cpuid(x86_regs *regs, uint32_t flags) { -#if defined(COMPILER_MSVC) - __cpuid((int *)regs, (int)flags); -#else - #if defined(CPU_X86_64) - #define cpuid_bx rbx - #else - #define cpuid_bx ebx - #endif - - asm_gcc() - a1(push cpuid_bx) - a2(xor ecx, ecx) - a1(cpuid) - a2(mov [%1 + 0], eax) - a2(mov [%1 + 4], ebx) - a2(mov [%1 + 8], ecx) - a2(mov [%1 + 12], edx) - a1(pop cpuid_bx) - asm_gcc_parms() : "+a"(flags) : "S"(regs) : "%ecx", "%edx", "cc" - asm_gcc_end() -#endif -} - -#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX) -static uint64_t NOINLINE -get_xgetbv(uint32_t flags) { -#if defined(COMPILER_MSVC) - return _xgetbv(flags); -#else - uint32_t lo, hi; - asm_gcc() - a1(xgetbv) - asm_gcc_parms() : "+c"(flags), "=a" (lo), "=d" (hi) - asm_gcc_end() - return ((uint64_t)lo | ((uint64_t)hi << 32)); -#endif -} -#endif // AVX support - -#if defined(SCRYPT_TEST_SPEED) -size_t cpu_detect_mask = (size_t)-1; -#endif - -static size_t -detect_cpu(void) { - //union { uint8_t s[12]; uint32_t i[3]; } vendor_string; - //cpu_vendors_x86 vendor = cpu_nobody; - x86_regs regs; regs.eax = regs.ebx = regs.ecx = 0; - uint32_t max_level, max_ext_level; - size_t cpu_flags = 0; -#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX) - uint64_t xgetbv_flags; -#endif - -#if defined(CPU_X86) - if (!has_cpuid()) - return cpu_flags; -#endif - - get_cpuid(®s, 0); - max_level = regs.eax; -#if 0 - vendor_string.i[0] = regs.ebx; - vendor_string.i[1] = regs.edx; - vendor_string.i[2] = regs.ecx; - - if (scrypt_verify(vendor_string.s, (const uint8_t *)"GenuineIntel", 12)) - vendor = cpu_intel; - else if (scrypt_verify(vendor_string.s, (const uint8_t *)"AuthenticAMD", 12)) - vendor = cpu_amd; -#endif - if (max_level & 0x00000500) { - /* "Intel P5 pre-B0" */ - cpu_flags |= cpu_mmx; - return cpu_flags; - } - - if (max_level < 1) - return cpu_flags; - - get_cpuid(®s, 1); -#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX) - /* xsave/xrestore */ - if (regs.ecx & (1 << 27)) { - xgetbv_flags = get_xgetbv(0); - if ((regs.ecx & (1 << 28)) && (xgetbv_flags & 0x6)) cpu_flags |= cpu_avx; - } -#endif - if (regs.ecx & (1 << 20)) cpu_flags |= cpu_sse4_2; - if (regs.ecx & (1 << 19)) cpu_flags |= cpu_sse4_2; - if (regs.ecx & (1 << 9)) cpu_flags |= cpu_ssse3; - if (regs.ecx & (1 )) cpu_flags |= cpu_sse3; - if (regs.edx & (1 << 26)) cpu_flags |= cpu_sse2; - if (regs.edx & (1 << 25)) cpu_flags |= cpu_sse; - if (regs.edx & (1 << 23)) cpu_flags |= cpu_mmx; - - if (cpu_flags & cpu_avx) { - if (max_level >= 7) { - get_cpuid(®s, 7); - if (regs.ebx & (1 << 5)) cpu_flags |= cpu_avx2; - } - - get_cpuid(®s, 0x80000000); - max_ext_level = regs.eax; - if (max_ext_level >= 0x80000001) { - get_cpuid(®s, 0x80000001); - if (regs.ecx & (1 << 11)) cpu_flags |= cpu_xop; - } - } - - -#if defined(SCRYPT_TEST_SPEED) - cpu_flags &= cpu_detect_mask; -#endif - - return cpu_flags; -} - -#if defined(SCRYPT_TEST_SPEED) -static const char * -get_top_cpuflag_desc(size_t flag) { - if (flag & cpu_avx2) return "AVX2"; - else if (flag & cpu_xop) return "XOP"; - else if (flag & cpu_avx) return "AVX"; - else if (flag & cpu_sse4_2) return "SSE4.2"; - else if (flag & cpu_sse4_1) return "SSE4.1"; - else if (flag & cpu_ssse3) return "SSSE3"; - else if (flag & cpu_sse2) return "SSE2"; - else if (flag & cpu_sse) return "SSE"; - else if (flag & cpu_mmx) return "MMX"; - else return "Basic"; -} -#endif - -/* enable the highest system-wide option */ -#if defined(SCRYPT_CHOOSE_COMPILETIME) - #if !defined(__AVX2__) - #undef X86_64ASM_AVX2 - #undef X86ASM_AVX2 - #undef X86_INTRINSIC_AVX2 - #endif - #if !defined(__XOP__) - #undef X86_64ASM_XOP - #undef X86ASM_XOP - #undef X86_INTRINSIC_XOP - #endif - #if !defined(__AVX__) - #undef X86_64ASM_AVX - #undef X86ASM_AVX - #undef X86_INTRINSIC_AVX - #endif - #if !defined(__SSSE3__) - #undef X86_64ASM_SSSE3 - #undef X86ASM_SSSE3 - #undef X86_INTRINSIC_SSSE3 - #endif - #if !defined(__SSE2__) - #undef X86_64ASM_SSE2 - #undef X86ASM_SSE2 - #undef X86_INTRINSIC_SSE2 - #endif -#endif - -#endif /* defined(CPU_X86) || defined(CPU_X86_64) */ diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-portable.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-portable.h deleted file mode 100644 index 9baa55e0..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-portable.h +++ /dev/null @@ -1,310 +0,0 @@ -/* determine os */ -#if defined(_WIN32) || defined(_WIN64) || defined(__TOS_WIN__) || defined(__WINDOWS__) - #include - #include - #define OS_WINDOWS -#elif defined(sun) || defined(__sun) || defined(__SVR4) || defined(__svr4__) - #include - #include - #include - - #define OS_SOLARIS -#else - #include - #include - #include /* need this to define BSD */ - #include - #include - - #define OS_NIX - #if defined(__linux__) - #include - #define OS_LINUX - #elif defined(BSD) - #define OS_BSD - - #if defined(MACOS_X) || (defined(__APPLE__) & defined(__MACH__)) - #define OS_OSX - #elif defined(macintosh) || defined(Macintosh) - #define OS_MAC - #elif defined(__OpenBSD__) - #define OS_OPENBSD - #endif - #endif -#endif - - -/* determine compiler */ -#if defined(_MSC_VER) - #define COMPILER_MSVC_VS6 120000000 - #define COMPILER_MSVC_VS6PP 121000000 - #define COMPILER_MSVC_VS2002 130000000 - #define COMPILER_MSVC_VS2003 131000000 - #define COMPILER_MSVC_VS2005 140050727 - #define COMPILER_MSVC_VS2008 150000000 - #define COMPILER_MSVC_VS2008SP1 150030729 - #define COMPILER_MSVC_VS2010 160000000 - #define COMPILER_MSVC_VS2010SP1 160040219 - #define COMPILER_MSVC_VS2012RC 170000000 - #define COMPILER_MSVC_VS2012 170050727 - - #if _MSC_FULL_VER > 100000000 - #define COMPILER_MSVC (_MSC_FULL_VER) - #else - #define COMPILER_MSVC (_MSC_FULL_VER * 10) - #endif - - #if ((_MSC_VER == 1200) && defined(_mm_free)) - #undef COMPILER_MSVC - #define COMPILER_MSVC COMPILER_MSVC_VS6PP - #endif - - #pragma warning(disable : 4127) /* conditional expression is constant */ - #pragma warning(disable : 4100) /* unreferenced formal parameter */ - - #ifndef _CRT_SECURE_NO_WARNINGS - #define _CRT_SECURE_NO_WARNINGS - #endif - - #include - #include /* _rotl */ - #include - - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; - typedef signed int int32_t; - typedef unsigned __int64 uint64_t; - typedef signed __int64 int64_t; - - #define ROTL32(a,b) _rotl(a,b) - #define ROTR32(a,b) _rotr(a,b) - #define ROTL64(a,b) _rotl64(a,b) - #define ROTR64(a,b) _rotr64(a,b) - #undef NOINLINE - #define NOINLINE __declspec(noinline) - #undef NORETURN - #define NORETURN - #undef INLINE - #define INLINE __forceinline - #undef FASTCALL - #define FASTCALL __fastcall - #undef CDECL - #define CDECL __cdecl - #undef STDCALL - #define STDCALL __stdcall - #undef NAKED - #define NAKED __declspec(naked) - #define ALIGN(n) __declspec(align(n)) -#endif -#if defined(__ICC) - #define COMPILER_INTEL -#endif -#if defined(__GNUC__) - #if (__GNUC__ >= 3) - #define COMPILER_GCC_PATCHLEVEL __GNUC_PATCHLEVEL__ - #else - #define COMPILER_GCC_PATCHLEVEL 0 - #endif - #define COMPILER_GCC (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + COMPILER_GCC_PATCHLEVEL) - #define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b))) - #define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b))) - #define ROTL64(a,b) (((a) << (b)) | ((a) >> (64 - b))) - #define ROTR64(a,b) (((a) >> (b)) | ((a) << (64 - b))) - #undef NOINLINE - #if (COMPILER_GCC >= 30000) - #define NOINLINE __attribute__((noinline)) - #else - #define NOINLINE - #endif - #undef NORETURN - #if (COMPILER_GCC >= 30000) - #define NORETURN __attribute__((noreturn)) - #else - #define NORETURN - #endif - #undef INLINE - #if (COMPILER_GCC >= 30000) - #define INLINE __attribute__((always_inline)) - #else - #define INLINE inline - #endif - #undef FASTCALL - #if (COMPILER_GCC >= 30400) - #define FASTCALL __attribute__((fastcall)) - #else - #define FASTCALL - #endif - #undef CDECL - #define CDECL __attribute__((cdecl)) - #undef STDCALL - #define STDCALL __attribute__((stdcall)) - #define ALIGN(n) __attribute__((aligned(n))) - #include -#endif -#if defined(__MINGW32__) || defined(__MINGW64__) - #define COMPILER_MINGW -#endif -#if defined(__PATHCC__) - #define COMPILER_PATHCC -#endif - -#define OPTIONAL_INLINE -#if defined(OPTIONAL_INLINE) - #undef OPTIONAL_INLINE - #define OPTIONAL_INLINE INLINE -#else - #define OPTIONAL_INLINE -#endif - -#define CRYPTO_FN NOINLINE STDCALL - -/* determine cpu */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__ ) || defined(_M_X64) - #define CPU_X86_64 -#elif defined(__i586__) || defined(__i686__) || (defined(_M_IX86) && (_M_IX86 >= 500)) - #define CPU_X86 500 -#elif defined(__i486__) || (defined(_M_IX86) && (_M_IX86 >= 400)) - #define CPU_X86 400 -#elif defined(__i386__) || (defined(_M_IX86) && (_M_IX86 >= 300)) || defined(__X86__) || defined(_X86_) || defined(__I86__) - #define CPU_X86 300 -#elif defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(_M_IA64) || defined(__ia64) - #define CPU_IA64 -#endif - -#if defined(__sparc__) || defined(__sparc) || defined(__sparcv9) - #define CPU_SPARC - #if defined(__sparcv9) - #define CPU_SPARC64 - #endif -#endif - -#if defined(CPU_X86_64) || defined(CPU_IA64) || defined(CPU_SPARC64) || defined(__64BIT__) || defined(__LP64__) || defined(_LP64) || (defined(_MIPS_SZLONG) && (_MIPS_SZLONG == 64)) - #define CPU_64BITS - #undef FASTCALL - #define FASTCALL - #undef CDECL - #define CDECL - #undef STDCALL - #define STDCALL -#endif - -#if defined(powerpc) || defined(__PPC__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(__powerpc__) || defined(__powerpc) || defined(POWERPC) || defined(_M_PPC) - #define CPU_PPC - #if defined(_ARCH_PWR7) - #define CPU_POWER7 - #elif defined(__64BIT__) - #define CPU_PPC64 - #else - #define CPU_PPC32 - #endif -#endif - -#if defined(__hppa__) || defined(__hppa) - #define CPU_HPPA -#endif - -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) - #define CPU_ALPHA -#endif - -/* endian */ - -#if ((defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN)) || \ - (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || \ - (defined(CPU_X86) || defined(CPU_X86_64)) || \ - (defined(vax) || defined(MIPSEL) || defined(_MIPSEL))) -#define CPU_LE -#elif ((defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)) || \ - (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || \ - (defined(CPU_SPARC) || defined(CPU_PPC) || defined(mc68000) || defined(sel)) || defined(_MIPSEB)) -#define CPU_BE -#else - /* unknown endian! */ -#endif - - -#define U8TO32_BE(p) \ - (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \ - ((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) )) - -#define U8TO32_LE(p) \ - (((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \ - ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24)) - -#define U32TO8_BE(p, v) \ - (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \ - (p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) ); - -#define U32TO8_LE(p, v) \ - (p)[0] = (uint8_t)((v) ); (p)[1] = (uint8_t)((v) >> 8); \ - (p)[2] = (uint8_t)((v) >> 16); (p)[3] = (uint8_t)((v) >> 24); - -#define U8TO64_BE(p) \ - (((uint64_t)U8TO32_BE(p) << 32) | (uint64_t)U8TO32_BE((p) + 4)) - -#define U8TO64_LE(p) \ - (((uint64_t)U8TO32_LE(p)) | ((uint64_t)U8TO32_LE((p) + 4) << 32)) - -#define U64TO8_BE(p, v) \ - U32TO8_BE((p), (uint32_t)((v) >> 32)); \ - U32TO8_BE((p) + 4, (uint32_t)((v) )); - -#define U64TO8_LE(p, v) \ - U32TO8_LE((p), (uint32_t)((v) )); \ - U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); - -#define U32_SWAP(v) { \ - (v) = (((v) << 8) & 0xFF00FF00 ) | (((v) >> 8) & 0xFF00FF ); \ - (v) = ((v) << 16) | ((v) >> 16); \ -} - -#define U64_SWAP(v) { \ - (v) = (((v) << 8) & 0xFF00FF00FF00FF00ull ) | (((v) >> 8) & 0x00FF00FF00FF00FFull ); \ - (v) = (((v) << 16) & 0xFFFF0000FFFF0000ull ) | (((v) >> 16) & 0x0000FFFF0000FFFFull ); \ - (v) = ((v) << 32) | ((v) >> 32); \ -} - -static int -scrypt_verify(const uint8_t *x, const uint8_t *y, size_t len) { - uint32_t differentbits = 0; - while (len--) - differentbits |= (*x++ ^ *y++); - return (1 & ((differentbits - 1) >> 8)); -} - -static void -scrypt_ensure_zero(void *p, size_t len) { -#if ((defined(CPU_X86) || defined(CPU_X86_64)) && defined(COMPILER_MSVC)) - __stosb((unsigned char *)p, 0, len); -#elif (defined(CPU_X86) && defined(COMPILER_GCC)) - __asm__ __volatile__( - "pushl %%edi;\n" - "pushl %%ecx;\n" - "rep stosb;\n" - "popl %%ecx;\n" - "popl %%edi;\n" - :: "a"(0), "D"(p), "c"(len) : "cc", "memory" - ); -#elif (defined(CPU_X86_64) && defined(COMPILER_GCC)) - __asm__ __volatile__( - "pushq %%rdi;\n" - "pushq %%rcx;\n" - "rep stosb;\n" - "popq %%rcx;\n" - "popq %%rdi;\n" - :: "a"(0), "D"(p), "c"(len) : "cc", "memory" - ); -#else - volatile uint8_t *b = (volatile uint8_t *)p; - size_t i; - for (i = 0; i < len; i++) - b[i] = 0; -#endif -} - -#include "scrypt-jane-portable-x86.h" - -#if !defined(asm_calling_convention) -#define asm_calling_convention -#endif diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-romix-basic.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-romix-basic.h deleted file mode 100644 index 3124c847..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-romix-basic.h +++ /dev/null @@ -1,75 +0,0 @@ -#if !defined(SCRYPT_CHOOSE_COMPILETIME) -/* function type returned by scrypt_getROMix, used with cpu detection */ -typedef void (FASTCALL *scrypt_ROMixfn)(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r); -#endif - -/* romix pre/post nop function */ -/* -static void asm_calling_convention -scrypt_romix_nop(scrypt_mix_word_t *blocks, size_t nblocks) { - (void)blocks; (void)nblocks; -} -*/ -/* romix pre/post endian conversion function */ -static void asm_calling_convention -scrypt_romix_convert_endian(scrypt_mix_word_t *blocks, size_t nblocks) { -#if !defined(CPU_LE) - static const union { uint8_t b[2]; uint16_t w; } endian_test = {{1,0}}; - size_t i; - if (endian_test.w == 0x100) { - nblocks *= SCRYPT_BLOCK_WORDS; - for (i = 0; i < nblocks; i++) { - SCRYPT_WORD_ENDIAN_SWAP(blocks[i]); - } - } -#else - (void)blocks; (void)nblocks; -#endif -} - -/* chunkmix test function */ -typedef void (asm_calling_convention *chunkmixfn)(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r); -typedef void (asm_calling_convention *blockfixfn)(scrypt_mix_word_t *blocks, size_t nblocks); - -static int -scrypt_test_mix_instance(chunkmixfn mixfn, blockfixfn prefn, blockfixfn postfn, const uint8_t expected[16]) { - /* r = 2, (2 * r) = 4 blocks in a chunk, 4 * SCRYPT_BLOCK_WORDS total */ - const uint32_t r = 2, blocks = 2 * r, words = blocks * SCRYPT_BLOCK_WORDS; -#if (defined(X86ASM_AVX2) || defined(X86_64ASM_AVX2) || defined(X86_INTRINSIC_AVX2)) - scrypt_mix_word_t ALIGN(32) chunk[2][4 * SCRYPT_BLOCK_WORDS], v; -#else - scrypt_mix_word_t ALIGN(16) chunk[2][4 * SCRYPT_BLOCK_WORDS], v; -#endif - uint8_t final[16]; - size_t i; - - for (i = 0; i < words; i++) { - v = (scrypt_mix_word_t)i; - v = (v << 8) | v; - v = (v << 16) | v; - chunk[0][i] = v; - } - - prefn(chunk[0], blocks); - mixfn(chunk[1], chunk[0], NULL, r); - postfn(chunk[1], blocks); - - /* grab the last 16 bytes of the final block */ - for (i = 0; i < 16; i += sizeof(scrypt_mix_word_t)) { - SCRYPT_WORDTO8_LE(final + i, chunk[1][words - (16 / sizeof(scrypt_mix_word_t)) + (i / sizeof(scrypt_mix_word_t))]); - } - - return scrypt_verify(expected, final, 16); -} - -/* returns a pointer to item i, where item is len scrypt_mix_word_t's long */ -static scrypt_mix_word_t * -scrypt_item(scrypt_mix_word_t *base, scrypt_mix_word_t i, scrypt_mix_word_t len) { - return base + (i * len); -} - -/* returns a pointer to block i */ -static scrypt_mix_word_t * -scrypt_block(scrypt_mix_word_t *base, scrypt_mix_word_t i) { - return base + (i * SCRYPT_BLOCK_WORDS); -} diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-romix-template.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-romix-template.h deleted file mode 100644 index 373ae604..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-romix-template.h +++ /dev/null @@ -1,122 +0,0 @@ -#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX) - -#if defined(SCRYPT_CHOOSE_COMPILETIME) -#undef SCRYPT_ROMIX_FN -#define SCRYPT_ROMIX_FN scrypt_ROMix -#endif - -#undef SCRYPT_HAVE_ROMIX -#define SCRYPT_HAVE_ROMIX - -#if !defined(SCRYPT_CHUNKMIX_FN) - -#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_basic - -/* - Bout = ChunkMix(Bin) - - 2*r: number of blocks in the chunk -*/ -static void asm_calling_convention -SCRYPT_CHUNKMIX_FN(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r) { -#if (defined(X86ASM_AVX2) || defined(X86_64ASM_AVX2) || defined(X86_INTRINSIC_AVX2)) - scrypt_mix_word_t ALIGN(32) X[SCRYPT_BLOCK_WORDS], *block; -#else - scrypt_mix_word_t ALIGN(16) X[SCRYPT_BLOCK_WORDS], *block; -#endif - uint32_t i, j, blocksPerChunk = /*r * 2*/2, half = 0; - - /* 1: X = B_{2r - 1} */ - block = scrypt_block(Bin, blocksPerChunk - 1); - for (i = 0; i < SCRYPT_BLOCK_WORDS; i++) - X[i] = block[i]; - - if (Bxor) { - block = scrypt_block(Bxor, blocksPerChunk - 1); - for (i = 0; i < SCRYPT_BLOCK_WORDS; i++) - X[i] ^= block[i]; - } - - /* 2: for i = 0 to 2r - 1 do */ - for (i = 0; i < blocksPerChunk; i++, half ^= /*r*/1) { - /* 3: X = H(X ^ B_i) */ - block = scrypt_block(Bin, i); - for (j = 0; j < SCRYPT_BLOCK_WORDS; j++) - X[j] ^= block[j]; - - if (Bxor) { - block = scrypt_block(Bxor, i); - for (j = 0; j < SCRYPT_BLOCK_WORDS; j++) - X[j] ^= block[j]; - } - SCRYPT_MIX_FN(X); - - /* 4: Y_i = X */ - /* 6: B'[0..r-1] = Y_even */ - /* 6: B'[r..2r-1] = Y_odd */ - block = scrypt_block(Bout, (i / 2) + half); - for (j = 0; j < SCRYPT_BLOCK_WORDS; j++) - block[j] = X[j]; - } -} -#endif - -/* - X = ROMix(X) - - X: chunk to mix - Y: scratch chunk - N: number of rounds - V[N]: array of chunks to randomly index in to - 2*r: number of blocks in a chunk -*/ - -static void NOINLINE FASTCALL -SCRYPT_ROMIX_FN(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[N * chunkWords]*/, uint32_t N, uint32_t r) { - uint32_t i, j, chunkWords = (uint32_t)(SCRYPT_BLOCK_WORDS * 2); - scrypt_mix_word_t *block = V; - - SCRYPT_ROMIX_TANGLE_FN(X, 2); - - /* 1: X = B */ - /* implicit */ - - /* 2: for i = 0 to N - 1 do */ - memcpy(block, X, chunkWords * sizeof(scrypt_mix_word_t)); - for (i = 0; i < /*N - 1*/511; i++, block += chunkWords) { - /* 3: V_i = X */ - /* 4: X = H(X) */ - SCRYPT_CHUNKMIX_FN(block + chunkWords, block, NULL, /*r*/1); - } - SCRYPT_CHUNKMIX_FN(X, block, NULL, 1); - - /* 6: for i = 0 to N - 1 do */ - for (i = 0; i < /*N*/512; i += 2) { - /* 7: j = Integerify(X) % N */ - j = X[chunkWords - SCRYPT_BLOCK_WORDS] & /*(N - 1)*/511; - - /* 8: X = H(Y ^ V_j) */ - SCRYPT_CHUNKMIX_FN(Y, X, scrypt_item(V, j, chunkWords), 1); - - /* 7: j = Integerify(Y) % N */ - j = Y[chunkWords - SCRYPT_BLOCK_WORDS] & /*(N - 1)*/511; - - /* 8: X = H(Y ^ V_j) */ - SCRYPT_CHUNKMIX_FN(X, Y, scrypt_item(V, j, chunkWords), 1); - } - - /* 10: B' = X */ - /* implicit */ - - SCRYPT_ROMIX_UNTANGLE_FN(X, 2); -} - -#endif /* !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX) */ - - -#undef SCRYPT_CHUNKMIX_FN -#undef SCRYPT_ROMIX_FN -#undef SCRYPT_MIX_FN -#undef SCRYPT_ROMIX_TANGLE_FN -#undef SCRYPT_ROMIX_UNTANGLE_FN - diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-romix.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-romix.h deleted file mode 100644 index 02de3577..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-romix.h +++ /dev/null @@ -1,23 +0,0 @@ -#if defined(SCRYPT_SALSA64) -#include "scrypt-jane-salsa64.h" -#else - #define SCRYPT_MIX_BASE "ERROR" - typedef uint32_t scrypt_mix_word_t; - #define SCRYPT_WORDTO8_LE U32TO8_LE - #define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP - #define SCRYPT_BLOCK_BYTES 64 - #define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t)) - #if !defined(SCRYPT_CHOOSE_COMPILETIME) - static void FASTCALL scrypt_ROMix_error(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r) {} - static scrypt_ROMixfn scrypt_getROMix(void) { return scrypt_ROMix_error; } - #else - static void FASTCALL scrypt_ROMix(scrypt_mix_word_t *X, scrypt_mix_word_t *Y, scrypt_mix_word_t *V, uint32_t N, uint32_t r) {} - #endif - static int scrypt_test_mix(void) { return 0; } - #error must define a mix function! -#endif - -#if !defined(SCRYPT_CHOOSE_COMPILETIME) -#undef SCRYPT_MIX -#define SCRYPT_MIX SCRYPT_MIX_BASE -#endif diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-salsa64.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-salsa64.h deleted file mode 100644 index 96b78136..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-salsa64.h +++ /dev/null @@ -1,183 +0,0 @@ -#define SCRYPT_MIX_BASE "Salsa64/8" - -typedef uint64_t scrypt_mix_word_t; - -#define SCRYPT_WORDTO8_LE U64TO8_LE -#define SCRYPT_WORD_ENDIAN_SWAP U64_SWAP - -#define SCRYPT_BLOCK_BYTES 128 -#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t)) - -/* must have these here in case block bytes is ever != 64 */ -#include "scrypt-jane-romix-basic.h" - -#include "scrypt-jane-mix_salsa64-avx2.h" -#include "scrypt-jane-mix_salsa64-xop.h" -#include "scrypt-jane-mix_salsa64-avx.h" -#include "scrypt-jane-mix_salsa64-ssse3.h" -#include "scrypt-jane-mix_salsa64-sse2.h" -#include "scrypt-jane-mix_salsa64.h" - -#if defined(SCRYPT_SALSA64_AVX2) - #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx2 - #define SCRYPT_ROMIX_FN scrypt_ROMix_avx2 - #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2 - #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2 - #include "scrypt-jane-romix-template.h" -#endif - -#if defined(SCRYPT_SALSA64_XOP) - #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_xop - #define SCRYPT_ROMIX_FN scrypt_ROMix_xop - #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2 - #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2 - #include "scrypt-jane-romix-template.h" -#endif - -#if defined(SCRYPT_SALSA64_AVX) - #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx - #define SCRYPT_ROMIX_FN scrypt_ROMix_avx - #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2 - #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2 - #include "scrypt-jane-romix-template.h" -#endif - -#if defined(SCRYPT_SALSA64_SSSE3) - #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_ssse3 - #define SCRYPT_ROMIX_FN scrypt_ROMix_ssse3 - #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2 - #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2 - #include "scrypt-jane-romix-template.h" -#endif - -#if defined(SCRYPT_SALSA64_SSE2) - #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2 - #define SCRYPT_ROMIX_FN scrypt_ROMix_sse2 - #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2 - #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2 - #include "scrypt-jane-romix-template.h" -#endif - -/* cpu agnostic */ -#define SCRYPT_ROMIX_FN scrypt_ROMix_basic -#define SCRYPT_MIX_FN salsa64_core_basic -#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian -#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian -#include "scrypt-jane-romix-template.h" - -#if !defined(SCRYPT_CHOOSE_COMPILETIME) -static scrypt_ROMixfn -scrypt_getROMix(void) { - size_t cpuflags = detect_cpu(); - -#if defined(SCRYPT_SALSA64_AVX2) - if (cpuflags & cpu_avx2) - return scrypt_ROMix_avx2; - else -#endif - -#if defined(SCRYPT_SALSA64_XOP) - if (cpuflags & cpu_xop) - return scrypt_ROMix_xop; - else -#endif - -#if defined(SCRYPT_SALSA64_AVX) - if (cpuflags & cpu_avx) - return scrypt_ROMix_avx; - else -#endif - -#if defined(SCRYPT_SALSA64_SSSE3) - if (cpuflags & cpu_ssse3) - return scrypt_ROMix_ssse3; - else -#endif - -#if defined(SCRYPT_SALSA64_SSE2) - if (cpuflags & cpu_sse2) - return scrypt_ROMix_sse2; - else -#endif - - return scrypt_ROMix_basic; -} -#endif - - -#if defined(SCRYPT_TEST_SPEED) -static size_t -available_implementations(void) { - size_t cpuflags = detect_cpu(); - size_t flags = 0; - -#if defined(SCRYPT_SALSA64_AVX2) - if (cpuflags & cpu_avx2) - flags |= cpu_avx2; -#endif - -#if defined(SCRYPT_SALSA64_XOP) - if (cpuflags & cpu_xop) - flags |= cpu_xop; -#endif - -#if defined(SCRYPT_SALSA64_AVX) - if (cpuflags & cpu_avx) - flags |= cpu_avx; -#endif - -#if defined(SCRYPT_SALSA64_SSSE3) - if (cpuflags & cpu_ssse3) - flags |= cpu_ssse3; -#endif - -#if defined(SCRYPT_SALSA64_SSE2) - if (cpuflags & cpu_sse2) - flags |= cpu_sse2; -#endif - - return flags; -} -#endif - -static int -scrypt_test_mix(void) { - static const uint8_t expected[16] = { - 0xf8,0x92,0x9b,0xf8,0xcc,0x1d,0xce,0x2e,0x13,0x82,0xac,0x96,0xb2,0x6c,0xee,0x2c, - }; - - int ret = 1; - size_t cpuflags = detect_cpu(); - -#if defined(SCRYPT_SALSA64_AVX2) - if (cpuflags & cpu_avx2) - ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx2, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected); -#endif - -#if defined(SCRYPT_SALSA64_XOP) - if (cpuflags & cpu_xop) - ret &= scrypt_test_mix_instance(scrypt_ChunkMix_xop, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected); -#endif - -#if defined(SCRYPT_SALSA64_AVX) - if (cpuflags & cpu_avx) - ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected); -#endif - -#if defined(SCRYPT_SALSA64_SSSE3) - if (cpuflags & cpu_ssse3) - ret &= scrypt_test_mix_instance(scrypt_ChunkMix_ssse3, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected); -#endif - -#if defined(SCRYPT_SALSA64_SSE2) - if (cpuflags & cpu_sse2) - ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected); -#endif - -#if defined(SCRYPT_SALSA64_BASIC) - ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected); -#endif - - return ret; -} - diff --git a/algo/argon2/argon2a/ar2/sj/scrypt-jane-test-vectors.h b/algo/argon2/argon2a/ar2/sj/scrypt-jane-test-vectors.h deleted file mode 100644 index 2d0b596d..00000000 --- a/algo/argon2/argon2a/ar2/sj/scrypt-jane-test-vectors.h +++ /dev/null @@ -1,28 +0,0 @@ -typedef struct scrypt_test_setting_t { - const char *pw, *salt; - uint8_t Nfactor, rfactor, pfactor; -} scrypt_test_setting; - -static const scrypt_test_setting post_settings[] = { - {"", "", 3, 0, 0}, - {"password", "NaCl", 9, 3, 4}, - {0, 0, 0, 0, 0} -}; - -#if defined(SCRYPT_SKEIN512) - #if defined(SCRYPT_SALSA64) - static const uint8_t post_vectors[][64] = { - {0xd2,0xad,0x32,0x05,0xee,0x80,0xe3,0x44,0x70,0xc6,0x34,0xde,0x05,0xb6,0xcf,0x60, - 0x89,0x98,0x70,0xc0,0xb8,0xf5,0x54,0xf1,0xa6,0xb2,0xc8,0x76,0x34,0xec,0xc4,0x59, - 0x8e,0x64,0x42,0xd0,0xa9,0xed,0xe7,0x19,0xb2,0x8a,0x11,0xc6,0xa6,0xbf,0xa7,0xa9, - 0x4e,0x44,0x32,0x7e,0x12,0x91,0x9d,0xfe,0x52,0x48,0xa8,0x27,0xb3,0xfc,0xb1,0x89}, - {0xd6,0x67,0xd2,0x3e,0x30,0x1e,0x9d,0xe2,0x55,0x68,0x17,0x3d,0x2b,0x75,0x5a,0xe5, - 0x04,0xfb,0x3d,0x0e,0x86,0xe0,0xaa,0x1d,0xd4,0x72,0xda,0xb0,0x79,0x41,0xb7,0x99, - 0x68,0xe5,0xd9,0x55,0x79,0x7d,0xc3,0xd1,0xa6,0x56,0xc1,0xbe,0x0b,0x6c,0x62,0x23, - 0x66,0x67,0x91,0x47,0x99,0x13,0x6b,0xe3,0xda,0x59,0x55,0x18,0x67,0x8f,0x2e,0x3b} - }; - #endif -#else - static const uint8_t post_vectors[][64] = {{0}}; -#endif - diff --git a/algo/argon2/argon2a/argon2a.c b/algo/argon2/argon2a/argon2a.c deleted file mode 100644 index 5a7c54d1..00000000 --- a/algo/argon2/argon2a/argon2a.c +++ /dev/null @@ -1,85 +0,0 @@ -#include -#include -#include -#include -#include -#include "ar2/argon2.h" -#include "ar2/cores.h" -#include "ar2/ar2-scrypt-jane.h" -#include "algo-gate-api.h" - -#define T_COSTS 2 -#define M_COSTS 16 -#define MASK 8 -#define ZERO 0 - -inline void argon_call(void *out, void *in, void *salt, int type) -{ - argon2_context context; - - context.out = (uint8_t *)out; - context.pwd = (uint8_t *)in; - context.salt = (uint8_t*)salt; - context.pwdlen = 0; - context.allocate_cbk = NULL; - context.free_cbk = NULL; - - ar2_argon2_core(&context, type); -} - -void argon2hash(void *output, const void *input) -{ - uint32_t _ALIGN(64) hashA[8], hashB[8]; - - my_scrypt((const unsigned char *)input, 80, - (const unsigned char *)input, 80, - (unsigned char *)hashA); - - argon_call(hashB, hashA, hashA, (hashA[0] & MASK) == ZERO); - - my_scrypt((const unsigned char *)hashB, 32, - (const unsigned char *)hashB, 32, - (unsigned char *)output); -} - -int scanhash_argon2( struct work* work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ - uint32_t _ALIGN(64) endiandata[20]; - uint32_t _ALIGN(64) hash[8]; - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - int thr_id = mythr->id; // thr_id arg is deprecated - - const uint32_t first_nonce = pdata[19]; - const uint32_t Htarg = ptarget[7]; - uint32_t nonce = first_nonce; - - swab32_array( endiandata, pdata, 20 ); - - do { - be32enc(&endiandata[19], nonce); - argon2hash(hash, endiandata); - if (hash[7] <= Htarg && fulltest(hash, ptarget)) { - pdata[19] = nonce; - submit_solution( work, hash, mythr ); - } - nonce++; - } while (nonce < max_nonce && !work_restart[thr_id].restart); - - pdata[19] = nonce; - *hashes_done = pdata[19] - first_nonce + 1; - return 0; -} - -bool register_argon2_algo( algo_gate_t* gate ) -{ - gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT; - gate->scanhash = (void*)&scanhash_argon2; - gate->hash = (void*)&argon2hash; - gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root; - opt_target_factor = 65536.0; - - return true; -}; - diff --git a/algo/argon2/argon2d/argon2d-gate.c b/algo/argon2/argon2d/argon2d-gate.c deleted file mode 100644 index cd41a326..00000000 --- a/algo/argon2/argon2d/argon2d-gate.c +++ /dev/null @@ -1,189 +0,0 @@ -#include "argon2d-gate.h" -#include "simd-utils.h" -#include "argon2d/argon2.h" - -static const size_t INPUT_BYTES = 80; // Lenth of a block header in bytes. Input Length = Salt Length (salt = input) -static const size_t OUTPUT_BYTES = 32; // Length of output needed for a 256-bit hash -static const unsigned int DEFAULT_ARGON2_FLAG = 2; //Same as ARGON2_DEFAULT_FLAGS - -// Credits - -void argon2d_crds_hash( void *output, const void *input ) -{ - argon2_context context; - context.out = (uint8_t *)output; - context.outlen = (uint32_t)OUTPUT_BYTES; - context.pwd = (uint8_t *)input; - context.pwdlen = (uint32_t)INPUT_BYTES; - context.salt = (uint8_t *)input; //salt = input - context.saltlen = (uint32_t)INPUT_BYTES; - context.secret = NULL; - context.secretlen = 0; - context.ad = NULL; - context.adlen = 0; - context.allocate_cbk = NULL; - context.free_cbk = NULL; - context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS - // main configurable Argon2 hash parameters - context.m_cost = 250; // Memory in KiB (~256KB) - context.lanes = 4; // Degree of Parallelism - context.threads = 1; // Threads - context.t_cost = 1; // Iterations - context.version = ARGON2_VERSION_10; - - argon2_ctx( &context, Argon2_d ); -} - -int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ - uint32_t _ALIGN(64) edata[20]; - uint32_t _ALIGN(64) hash[8]; - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - int thr_id = mythr->id; // thr_id arg is deprecated - const uint32_t first_nonce = pdata[19]; - const uint32_t Htarg = ptarget[7]; - uint32_t nonce = first_nonce; - - swab32_array( edata, pdata, 20 ); - - do { - be32enc(&edata[19], nonce); - argon2d_crds_hash( hash, edata ); - if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark ) - { - pdata[19] = nonce; - submit_solution( work, hash, mythr ); - } - nonce++; - } while (nonce < max_nonce && !work_restart[thr_id].restart); - - pdata[19] = nonce; - *hashes_done = pdata[19] - first_nonce + 1; - return 0; -} - -bool register_argon2d_crds_algo( algo_gate_t* gate ) -{ - gate->scanhash = (void*)&scanhash_argon2d_crds; - gate->hash = (void*)&argon2d_crds_hash; - gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT; - opt_target_factor = 65536.0; - return true; -} - -// Dynamic - -void argon2d_dyn_hash( void *output, const void *input ) -{ - argon2_context context; - context.out = (uint8_t *)output; - context.outlen = (uint32_t)OUTPUT_BYTES; - context.pwd = (uint8_t *)input; - context.pwdlen = (uint32_t)INPUT_BYTES; - context.salt = (uint8_t *)input; //salt = input - context.saltlen = (uint32_t)INPUT_BYTES; - context.secret = NULL; - context.secretlen = 0; - context.ad = NULL; - context.adlen = 0; - context.allocate_cbk = NULL; - context.free_cbk = NULL; - context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS - // main configurable Argon2 hash parameters - context.m_cost = 500; // Memory in KiB (512KB) - context.lanes = 8; // Degree of Parallelism - context.threads = 1; // Threads - context.t_cost = 2; // Iterations - context.version = ARGON2_VERSION_10; - - argon2_ctx( &context, Argon2_d ); -} - -int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ - uint32_t _ALIGN(64) edata[20]; - uint32_t _ALIGN(64) hash[8]; - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - const int thr_id = mythr->id; - const uint32_t first_nonce = (const uint32_t)pdata[19]; - const uint32_t last_nonce = (const uint32_t)max_nonce; - uint32_t nonce = first_nonce; - const bool bench = opt_benchmark; - - mm128_bswap32_80( edata, pdata ); - do - { - edata[19] = nonce; - argon2d_dyn_hash( hash, edata ); - if ( unlikely( valid_hash( (uint64_t*)hash, (uint64_t*)ptarget ) - && !bench ) ) - { - pdata[19] = bswap_32( nonce );; - submit_solution( work, hash, mythr ); - } - nonce++; - } while ( likely( nonce < last_nonce && !work_restart[thr_id].restart ) ); - - pdata[19] = nonce; - *hashes_done = pdata[19] - first_nonce; - return 0; -} - -bool register_argon2d_dyn_algo( algo_gate_t* gate ) -{ - gate->scanhash = (void*)&scanhash_argon2d_dyn; - gate->hash = (void*)&argon2d_dyn_hash; - gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT; - opt_target_factor = 65536.0; - return true; -} - -// Unitus - -int scanhash_argon2d4096( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ - uint32_t _ALIGN(64) vhash[8]; - uint32_t _ALIGN(64) edata[20]; - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - const uint32_t first_nonce = pdata[19]; - const uint32_t last_nonce = (const uint32_t)max_nonce; - uint32_t n = first_nonce; - const int thr_id = mythr->id; // thr_id arg is deprecated - uint32_t t_cost = 1; // 1 iteration - uint32_t m_cost = 4096; // use 4MB - uint32_t parallelism = 1; // 1 thread, 2 lanes - const bool bench = opt_benchmark; - - mm128_bswap32_80( edata, pdata ); - - do { - edata[19] = n; - argon2d_hash_raw( t_cost, m_cost, parallelism, (char*) edata, 80, - (char*) edata, 80, (char*) vhash, 32, ARGON2_VERSION_13 ); - if ( unlikely( valid_hash( vhash, ptarget ) && !bench ) ) - { - be32enc( &pdata[19], n ); - submit_solution( work, vhash, mythr ); - } - n++; - } while ( likely( n < last_nonce && !work_restart[thr_id].restart ) ); - - *hashes_done = n - first_nonce; - pdata[19] = n; - return 0; -} - -bool register_argon2d4096_algo( algo_gate_t* gate ) -{ - gate->scanhash = (void*)&scanhash_argon2d4096; - gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT; - opt_target_factor = 65536.0; - return true; -} - diff --git a/algo/argon2/argon2d/argon2d-gate.h b/algo/argon2/argon2d/argon2d-gate.h deleted file mode 100644 index dbb2b4da..00000000 --- a/algo/argon2/argon2d/argon2d-gate.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef ARGON2D_GATE_H__ -#define ARGON2D_GATE_H__ - -#include "algo-gate-api.h" -#include - -// Credits: version = 0x10, m_cost = 250. -bool register_argon2d_crds_algo( algo_gate_t* gate ); - -void argon2d_crds_hash( void *state, const void *input ); - -int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); - -// Dynamic: version = 0x10, m_cost = 500. -bool register_argon2d_dyn_algo( algo_gate_t* gate ); - -void argon2d_dyn_hash( void *state, const void *input ); - -int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); - - -// Unitus: version = 0x13, m_cost = 4096. -bool register_argon2d4096_algo( algo_gate_t* gate ); - -int scanhash_argon2d4096( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); - -#endif - diff --git a/algo/argon2/argon2d/argon2d/argon2.c b/algo/argon2/argon2d/argon2d/argon2.c deleted file mode 100644 index 5eabe35d..00000000 --- a/algo/argon2/argon2d/argon2d/argon2.c +++ /dev/null @@ -1,458 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#include -#include -#include - -#include "argon2.h" -#include "encoding.h" -#include "core.h" - -const char *argon2_type2string(argon2_type type, int uppercase) { - switch (type) { - case Argon2_d: - return uppercase ? "Argon2d" : "argon2d"; - case Argon2_i: - return uppercase ? "Argon2i" : "argon2i"; - case Argon2_id: - return uppercase ? "Argon2id" : "argon2id"; - } - - return NULL; -} - -int argon2_ctx(argon2_context *context, argon2_type type) { - /* 1. Validate all inputs */ - int result = validate_inputs(context); - uint32_t memory_blocks, segment_length; - argon2_instance_t instance; - - if (ARGON2_OK != result) { - return result; - } - - if (Argon2_d != type && Argon2_i != type && Argon2_id != type) { - return ARGON2_INCORRECT_TYPE; - } - - /* 2. Align memory size */ - /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ - memory_blocks = context->m_cost; - - if (memory_blocks < 2 * ARGON2_SYNC_POINTS * context->lanes) { - memory_blocks = 2 * ARGON2_SYNC_POINTS * context->lanes; - } - - segment_length = memory_blocks / (context->lanes * ARGON2_SYNC_POINTS); - /* Ensure that all segments have equal length */ - memory_blocks = segment_length * (context->lanes * ARGON2_SYNC_POINTS); - - instance.version = context->version; - instance.memory = NULL; - instance.passes = context->t_cost; - instance.memory_blocks = memory_blocks; - instance.segment_length = segment_length; - instance.lane_length = segment_length * ARGON2_SYNC_POINTS; - instance.lanes = context->lanes; - instance.threads = context->threads; - instance.type = type; - - if (instance.threads > instance.lanes) { - instance.threads = instance.lanes; - } - - /* 3. Initialization: Hashing inputs, allocating memory, filling first - * blocks - */ - result = initialize(&instance, context); - - if (ARGON2_OK != result) { - return result; - } - - /* 4. Filling memory */ - result = fill_memory_blocks(&instance); - - if (ARGON2_OK != result) { - return result; - } - /* 5. Finalization */ - finalize(context, &instance); - - return ARGON2_OK; -} - -int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, const size_t saltlen, - void *hash, const size_t hashlen, char *encoded, - const size_t encodedlen, argon2_type type, - const uint32_t version){ - - argon2_context context; - int result; - uint8_t *out; - - if (pwdlen > ARGON2_MAX_PWD_LENGTH) { - return ARGON2_PWD_TOO_LONG; - } - - if (saltlen > ARGON2_MAX_SALT_LENGTH) { - return ARGON2_SALT_TOO_LONG; - } - - if (hashlen > ARGON2_MAX_OUTLEN) { - return ARGON2_OUTPUT_TOO_LONG; - } - - if (hashlen < ARGON2_MIN_OUTLEN) { - return ARGON2_OUTPUT_TOO_SHORT; - } - - out = malloc(hashlen); - if (!out) { - return ARGON2_MEMORY_ALLOCATION_ERROR; - } - - context.out = (uint8_t *)out; - context.outlen = (uint32_t)hashlen; - context.pwd = CONST_CAST(uint8_t *)pwd; - context.pwdlen = (uint32_t)pwdlen; - context.salt = CONST_CAST(uint8_t *)salt; - context.saltlen = (uint32_t)saltlen; - context.secret = NULL; - context.secretlen = 0; - context.ad = NULL; - context.adlen = 0; - context.t_cost = t_cost; - context.m_cost = m_cost; - context.lanes = parallelism; - context.threads = parallelism; - context.allocate_cbk = NULL; - context.free_cbk = NULL; - context.flags = ARGON2_DEFAULT_FLAGS; - context.version = version; - - result = argon2_ctx(&context, type); - - if (result != ARGON2_OK) { - clear_internal_memory(out, hashlen); - free(out); - return result; - } - - /* if raw hash requested, write it */ - if (hash) { - memcpy(hash, out, hashlen); - } - - /* if encoding requested, write it */ - if (encoded && encodedlen) { - if (encode_string(encoded, encodedlen, &context, type) != ARGON2_OK) { - clear_internal_memory(out, hashlen); /* wipe buffers if error */ - clear_internal_memory(encoded, encodedlen); - free(out); - return ARGON2_ENCODING_FAIL; - } - } - clear_internal_memory(out, hashlen); - free(out); - - return ARGON2_OK; -} - -int argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, const size_t hashlen, - char *encoded, const size_t encodedlen, - const uint32_t version) { - - return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - NULL, hashlen, encoded, encodedlen, Argon2_i, - version ); -} - -int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, const size_t hashlen, - const uint32_t version ) { - - return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - hash, hashlen, NULL, 0, Argon2_i, version ); -} - -int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, const size_t hashlen, - char *encoded, const size_t encodedlen, - const uint32_t version ) { - - return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - NULL, hashlen, encoded, encodedlen, Argon2_d, - version ); -} - -int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, const size_t hashlen, - const uint32_t version ) { - - return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - hash, hashlen, NULL, 0, Argon2_d, version ); -} - -int argon2id_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, const size_t hashlen, - char *encoded, const size_t encodedlen, - const uint32_t version ) { - - return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - NULL, hashlen, encoded, encodedlen, Argon2_id, - version); -} - -int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, const size_t hashlen, - const uint32_t version ) { - return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - hash, hashlen, NULL, 0, Argon2_id, version ); -} - -static int argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) { - size_t i; - uint8_t d = 0U; - - for (i = 0U; i < len; i++) { - d |= b1[i] ^ b2[i]; - } - return (int)((1 & ((d - 1) >> 8)) - 1); -} - -int argon2_verify(const char *encoded, const void *pwd, const size_t pwdlen, - argon2_type type) { - - argon2_context ctx; - uint8_t *desired_result = NULL; - - int ret = ARGON2_OK; - - size_t encoded_len; - uint32_t max_field_len; - - if (pwdlen > ARGON2_MAX_PWD_LENGTH) { - return ARGON2_PWD_TOO_LONG; - } - - if (encoded == NULL) { - return ARGON2_DECODING_FAIL; - } - - encoded_len = strlen(encoded); - if (encoded_len > UINT32_MAX) { - return ARGON2_DECODING_FAIL; - } - - /* No field can be longer than the encoded length */ - max_field_len = (uint32_t)encoded_len; - - ctx.saltlen = max_field_len; - ctx.outlen = max_field_len; - - ctx.salt = malloc(ctx.saltlen); - ctx.out = malloc(ctx.outlen); - if (!ctx.salt || !ctx.out) { - ret = ARGON2_MEMORY_ALLOCATION_ERROR; - goto fail; - } - - ctx.pwd = (uint8_t *)pwd; - ctx.pwdlen = (uint32_t)pwdlen; - - ret = decode_string(&ctx, encoded, type); - if (ret != ARGON2_OK) { - goto fail; - } - - /* Set aside the desired result, and get a new buffer. */ - desired_result = ctx.out; - ctx.out = malloc(ctx.outlen); - if (!ctx.out) { - ret = ARGON2_MEMORY_ALLOCATION_ERROR; - goto fail; - } - - ret = argon2_verify_ctx(&ctx, (char *)desired_result, type); - if (ret != ARGON2_OK) { - goto fail; - } - -fail: - free(ctx.salt); - free(ctx.out); - free(desired_result); - - return ret; -} - -int argon2i_verify(const char *encoded, const void *pwd, const size_t pwdlen) { - - return argon2_verify(encoded, pwd, pwdlen, Argon2_i); -} - -int argon2d_verify(const char *encoded, const void *pwd, const size_t pwdlen) { - - return argon2_verify(encoded, pwd, pwdlen, Argon2_d); -} - -int argon2id_verify(const char *encoded, const void *pwd, const size_t pwdlen) { - - return argon2_verify(encoded, pwd, pwdlen, Argon2_id); -} - -int argon2d_ctx(argon2_context *context) { - return argon2_ctx(context, Argon2_d); -} - -int argon2i_ctx(argon2_context *context) { - return argon2_ctx(context, Argon2_i); -} - -int argon2id_ctx(argon2_context *context) { - return argon2_ctx(context, Argon2_id); -} - -int argon2_verify_ctx(argon2_context *context, const char *hash, - argon2_type type) { - int ret = argon2_ctx(context, type); - if (ret != ARGON2_OK) { - return ret; - } - - if (argon2_compare((uint8_t *)hash, context->out, context->outlen)) { - return ARGON2_VERIFY_MISMATCH; - } - - return ARGON2_OK; -} - -int argon2d_verify_ctx(argon2_context *context, const char *hash) { - return argon2_verify_ctx(context, hash, Argon2_d); -} - -int argon2i_verify_ctx(argon2_context *context, const char *hash) { - return argon2_verify_ctx(context, hash, Argon2_i); -} - -int argon2id_verify_ctx(argon2_context *context, const char *hash) { - return argon2_verify_ctx(context, hash, Argon2_id); -} - -const char *argon2_error_message(int error_code) { - switch (error_code) { - case ARGON2_OK: - return "OK"; - case ARGON2_OUTPUT_PTR_NULL: - return "Output pointer is NULL"; - case ARGON2_OUTPUT_TOO_SHORT: - return "Output is too short"; - case ARGON2_OUTPUT_TOO_LONG: - return "Output is too long"; - case ARGON2_PWD_TOO_SHORT: - return "Password is too short"; - case ARGON2_PWD_TOO_LONG: - return "Password is too long"; - case ARGON2_SALT_TOO_SHORT: - return "Salt is too short"; - case ARGON2_SALT_TOO_LONG: - return "Salt is too long"; - case ARGON2_AD_TOO_SHORT: - return "Associated data is too short"; - case ARGON2_AD_TOO_LONG: - return "Associated data is too long"; - case ARGON2_SECRET_TOO_SHORT: - return "Secret is too short"; - case ARGON2_SECRET_TOO_LONG: - return "Secret is too long"; - case ARGON2_TIME_TOO_SMALL: - return "Time cost is too small"; - case ARGON2_TIME_TOO_LARGE: - return "Time cost is too large"; - case ARGON2_MEMORY_TOO_LITTLE: - return "Memory cost is too small"; - case ARGON2_MEMORY_TOO_MUCH: - return "Memory cost is too large"; - case ARGON2_LANES_TOO_FEW: - return "Too few lanes"; - case ARGON2_LANES_TOO_MANY: - return "Too many lanes"; - case ARGON2_PWD_PTR_MISMATCH: - return "Password pointer is NULL, but password length is not 0"; - case ARGON2_SALT_PTR_MISMATCH: - return "Salt pointer is NULL, but salt length is not 0"; - case ARGON2_SECRET_PTR_MISMATCH: - return "Secret pointer is NULL, but secret length is not 0"; - case ARGON2_AD_PTR_MISMATCH: - return "Associated data pointer is NULL, but ad length is not 0"; - case ARGON2_MEMORY_ALLOCATION_ERROR: - return "Memory allocation error"; - case ARGON2_FREE_MEMORY_CBK_NULL: - return "The free memory callback is NULL"; - case ARGON2_ALLOCATE_MEMORY_CBK_NULL: - return "The allocate memory callback is NULL"; - case ARGON2_INCORRECT_PARAMETER: - return "Argon2_Context context is NULL"; - case ARGON2_INCORRECT_TYPE: - return "There is no such version of Argon2"; - case ARGON2_OUT_PTR_MISMATCH: - return "Output pointer mismatch"; - case ARGON2_THREADS_TOO_FEW: - return "Not enough threads"; - case ARGON2_THREADS_TOO_MANY: - return "Too many threads"; - case ARGON2_MISSING_ARGS: - return "Missing arguments"; - case ARGON2_ENCODING_FAIL: - return "Encoding failed"; - case ARGON2_DECODING_FAIL: - return "Decoding failed"; - case ARGON2_THREAD_FAIL: - return "Threading failure"; - case ARGON2_DECODING_LENGTH_FAIL: - return "Some of encoded parameters are too long or too short"; - case ARGON2_VERIFY_MISMATCH: - return "The password does not match the supplied hash"; - default: - return "Unknown error code"; - } -} -/* -size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism, - uint32_t saltlen, uint32_t hashlen, argon2_type type) { - return strlen("$$v=$m=,t=,p=$$") + strlen(argon2_type2string(type, 0)) + - numlen(t_cost) + numlen(m_cost) + numlen(parallelism) + - b64len(saltlen) + b64len(hashlen) + numlen(ARGON2_VERSION_NUMBER) + 1; -} -*/ diff --git a/algo/argon2/argon2d/argon2d/argon2.h b/algo/argon2/argon2d/argon2d/argon2.h deleted file mode 100644 index d546d37e..00000000 --- a/algo/argon2/argon2d/argon2d/argon2.h +++ /dev/null @@ -1,440 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#ifndef ARGON2_H -#define ARGON2_H - -#include -#include -#include - -#if defined(__cplusplus) -extern "C" { -#endif - -/* Symbols visibility control */ -#ifdef A2_VISCTL -#define ARGON2_PUBLIC __attribute__((visibility("default"))) -#define ARGON2_LOCAL __attribute__ ((visibility ("hidden"))) -#elif _MSC_VER -#define ARGON2_PUBLIC __declspec(dllexport) -#define ARGON2_LOCAL -#else -#define ARGON2_PUBLIC -#define ARGON2_LOCAL -#endif - -/* - * Argon2 input parameter restrictions - */ - -/* Minimum and maximum number of lanes (degree of parallelism) */ -#define ARGON2_MIN_LANES UINT32_C(1) -#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF) - -/* Minimum and maximum number of threads */ -#define ARGON2_MIN_THREADS UINT32_C(1) -#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF) - -/* Number of synchronization points between lanes per pass */ -#define ARGON2_SYNC_POINTS UINT32_C(4) - -/* Minimum and maximum digest size in bytes */ -#define ARGON2_MIN_OUTLEN UINT32_C(4) -#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */ -#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */ - -#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b)) -/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */ -#define ARGON2_MAX_MEMORY_BITS \ - ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1)) -#define ARGON2_MAX_MEMORY \ - ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS) - -/* Minimum and maximum number of passes */ -#define ARGON2_MIN_TIME UINT32_C(1) -#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum password length in bytes */ -#define ARGON2_MIN_PWD_LENGTH UINT32_C(0) -#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum associated data length in bytes */ -#define ARGON2_MIN_AD_LENGTH UINT32_C(0) -#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum salt length in bytes */ -#define ARGON2_MIN_SALT_LENGTH UINT32_C(8) -#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum key length in bytes */ -#define ARGON2_MIN_SECRET UINT32_C(0) -#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF) - -/* Flags to determine which fields are securely wiped (default = no wipe). */ -#define ARGON2_DEFAULT_FLAGS UINT32_C(0) -#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0) -#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1) - -/* Global flag to determine if we are wiping internal memory buffers. This flag - * is defined in core.c and deafults to 1 (wipe internal memory). */ -extern int FLAG_clear_internal_memory; - -/* Error codes */ -typedef enum Argon2_ErrorCodes { - ARGON2_OK = 0, - - ARGON2_OUTPUT_PTR_NULL = -1, - - ARGON2_OUTPUT_TOO_SHORT = -2, - ARGON2_OUTPUT_TOO_LONG = -3, - - ARGON2_PWD_TOO_SHORT = -4, - ARGON2_PWD_TOO_LONG = -5, - - ARGON2_SALT_TOO_SHORT = -6, - ARGON2_SALT_TOO_LONG = -7, - - ARGON2_AD_TOO_SHORT = -8, - ARGON2_AD_TOO_LONG = -9, - - ARGON2_SECRET_TOO_SHORT = -10, - ARGON2_SECRET_TOO_LONG = -11, - - ARGON2_TIME_TOO_SMALL = -12, - ARGON2_TIME_TOO_LARGE = -13, - - ARGON2_MEMORY_TOO_LITTLE = -14, - ARGON2_MEMORY_TOO_MUCH = -15, - - ARGON2_LANES_TOO_FEW = -16, - ARGON2_LANES_TOO_MANY = -17, - - ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */ - ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */ - ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */ - ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */ - - ARGON2_MEMORY_ALLOCATION_ERROR = -22, - - ARGON2_FREE_MEMORY_CBK_NULL = -23, - ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24, - - ARGON2_INCORRECT_PARAMETER = -25, - ARGON2_INCORRECT_TYPE = -26, - - ARGON2_OUT_PTR_MISMATCH = -27, - - ARGON2_THREADS_TOO_FEW = -28, - ARGON2_THREADS_TOO_MANY = -29, - - ARGON2_MISSING_ARGS = -30, - - ARGON2_ENCODING_FAIL = -31, - - ARGON2_DECODING_FAIL = -32, - - ARGON2_THREAD_FAIL = -33, - - ARGON2_DECODING_LENGTH_FAIL = -34, - - ARGON2_VERIFY_MISMATCH = -35 -} argon2_error_codes; - -/* Memory allocator types --- for external allocation */ -typedef int (*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate); -typedef void (*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate); - -/* Argon2 external data structures */ - -/* - ***** - * Context: structure to hold Argon2 inputs: - * output array and its length, - * password and its length, - * salt and its length, - * secret and its length, - * associated data and its length, - * number of passes, amount of used memory (in KBytes, can be rounded up a bit) - * number of parallel threads that will be run. - * All the parameters above affect the output hash value. - * Additionally, two function pointers can be provided to allocate and - * deallocate the memory (if NULL, memory will be allocated internally). - * Also, three flags indicate whether to erase password, secret as soon as they - * are pre-hashed (and thus not needed anymore), and the entire memory - ***** - * Simplest situation: you have output array out[8], password is stored in - * pwd[32], salt is stored in salt[16], you do not have keys nor associated - * data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with - * 4 parallel lanes. - * You want to erase the password, but you're OK with last pass not being - * erased. You want to use the default memory allocator. - * Then you initialize: - Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false) - */ -typedef struct Argon2_Context { - uint8_t *out; /* output array */ - uint32_t outlen; /* digest length */ - - uint8_t *pwd; /* password array */ - uint32_t pwdlen; /* password length */ - - uint8_t *salt; /* salt array */ - uint32_t saltlen; /* salt length */ - - uint8_t *secret; /* key array */ - uint32_t secretlen; /* key length */ - - uint8_t *ad; /* associated data array */ - uint32_t adlen; /* associated data length */ - - uint32_t t_cost; /* number of passes */ - uint32_t m_cost; /* amount of memory requested (KB) */ - uint32_t lanes; /* number of lanes */ - uint32_t threads; /* maximum number of threads */ - - uint32_t version; /* version number */ - - allocate_fptr allocate_cbk; /* pointer to memory allocator */ - deallocate_fptr free_cbk; /* pointer to memory deallocator */ - - uint32_t flags; /* array of bool options */ -} argon2_context; - -/* Argon2 primitive type */ -typedef enum Argon2_type { - Argon2_d = 0, - Argon2_i = 1, - Argon2_id = 2 -} argon2_type; - -/* Version of the algorithm */ -#define ARGON2_VERSION_10 0x10 -#define ARGON2_VERSION_13 0x13 - -/* - * Function that gives the string representation of an argon2_type. - * @param type The argon2_type that we want the string for - * @param uppercase Whether the string should have the first letter uppercase - * @return NULL if invalid type, otherwise the string representation. - */ -ARGON2_PUBLIC const char *argon2_type2string(argon2_type type, int uppercase); - -/* - * Function that performs memory-hard hashing with certain degree of parallelism - * @param context Pointer to the Argon2 internal structure - * @return Error code if smth is wrong, ARGON2_OK otherwise - */ -ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type); - -/** - * Hashes a password with Argon2i, producing an encoded hash - * @param t_cost Number of iterations - * @param m_cost Sets memory usage to m_cost kibibytes - * @param parallelism Number of threads and compute lanes - * @param pwd Pointer to password - * @param pwdlen Password size in bytes - * @param salt Pointer to salt - * @param saltlen Salt size in bytes - * @param hashlen Desired length of the hash in bytes - * @param encoded Buffer where to write the encoded hash - * @param encodedlen Size of the buffer (thus max size of the encoded hash) - * @pre Different parallelism levels will give different results - * @pre Returns ARGON2_OK if successful - */ -ARGON2_PUBLIC int argon2i_hash_encoded(const uint32_t t_cost, - const uint32_t m_cost, - const uint32_t parallelism, - const void *pwd, const size_t pwdlen, - const void *salt, const size_t saltlen, - const size_t hashlen, char *encoded, - const size_t encodedlen, - const uint32_t version ); - -/** - * Hashes a password with Argon2i, producing a raw hash at @hash - * @param t_cost Number of iterations - * @param m_cost Sets memory usage to m_cost kibibytes - * @param parallelism Number of threads and compute lanes - * @param pwd Pointer to password - * @param pwdlen Password size in bytes - * @param salt Pointer to salt - * @param saltlen Salt size in bytes - * @param hash Buffer where to write the raw hash - updated by the function - * @param hashlen Desired length of the hash in bytes - * @pre Different parallelism levels will give different results - * @pre Returns ARGON2_OK if successful - */ -ARGON2_PUBLIC int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, - const size_t hashlen, - const uint32_t version ); - -ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost, - const uint32_t m_cost, - const uint32_t parallelism, - const void *pwd, const size_t pwdlen, - const void *salt, const size_t saltlen, - const size_t hashlen, char *encoded, - const size_t encodedlen, - const uint32_t version ); - -ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, - const size_t hashlen, - const uint32_t version ); - -ARGON2_PUBLIC int argon2id_hash_encoded(const uint32_t t_cost, - const uint32_t m_cost, - const uint32_t parallelism, - const void *pwd, const size_t pwdlen, - const void *salt, const size_t saltlen, - const size_t hashlen, char *encoded, - const size_t encodedlen, - const uint32_t version ); - -ARGON2_PUBLIC int argon2id_hash_raw(const uint32_t t_cost, - const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, - const size_t hashlen, - const uint32_t version ); - -/* generic function underlying the above ones */ -ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, - const size_t hashlen, char *encoded, - const size_t encodedlen, argon2_type type, - const uint32_t version ); - -/** - * Verifies a password against an encoded string - * Encoded string is restricted as in validate_inputs() - * @param encoded String encoding parameters, salt, hash - * @param pwd Pointer to password - * @pre Returns ARGON2_OK if successful - */ -ARGON2_PUBLIC int argon2i_verify(const char *encoded, const void *pwd, - const size_t pwdlen); - -ARGON2_PUBLIC int argon2d_verify(const char *encoded, const void *pwd, - const size_t pwdlen); - -ARGON2_PUBLIC int argon2id_verify(const char *encoded, const void *pwd, - const size_t pwdlen); - -/* generic function underlying the above ones */ -ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd, - const size_t pwdlen, argon2_type type); - -/** - * Argon2d: Version of Argon2 that picks memory blocks depending - * on the password and salt. Only for side-channel-free - * environment!! - ***** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise - */ -ARGON2_PUBLIC int argon2d_ctx(argon2_context *context); - -/** - * Argon2i: Version of Argon2 that picks memory blocks - * independent on the password and salt. Good for side-channels, - * but worse w.r.t. tradeoff attacks if only one pass is used. - ***** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise - */ -ARGON2_PUBLIC int argon2i_ctx(argon2_context *context); - -/** - * Argon2id: Version of Argon2 where the first half-pass over memory is - * password-independent, the rest are password-dependent (on the password and - * salt). OK against side channels (they reduce to 1/2-pass Argon2i), and - * better with w.r.t. tradeoff attacks (similar to Argon2d). - ***** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise - */ -ARGON2_PUBLIC int argon2id_ctx(argon2_context *context); - -/** - * Verify if a given password is correct for Argon2d hashing - * @param context Pointer to current Argon2 context - * @param hash The password hash to verify. The length of the hash is - * specified by the context outlen member - * @return Zero if successful, a non zero error code otherwise - */ -ARGON2_PUBLIC int argon2d_verify_ctx(argon2_context *context, const char *hash); - -/** - * Verify if a given password is correct for Argon2i hashing - * @param context Pointer to current Argon2 context - * @param hash The password hash to verify. The length of the hash is - * specified by the context outlen member - * @return Zero if successful, a non zero error code otherwise - */ -ARGON2_PUBLIC int argon2i_verify_ctx(argon2_context *context, const char *hash); - -/** - * Verify if a given password is correct for Argon2id hashing - * @param context Pointer to current Argon2 context - * @param hash The password hash to verify. The length of the hash is - * specified by the context outlen member - * @return Zero if successful, a non zero error code otherwise - */ -ARGON2_PUBLIC int argon2id_verify_ctx(argon2_context *context, - const char *hash); - -/* generic function underlying the above ones */ -ARGON2_PUBLIC int argon2_verify_ctx(argon2_context *context, const char *hash, - argon2_type type); - -/** - * Get the associated error message for given error code - * @return The error message associated with the given error code - */ -ARGON2_PUBLIC const char *argon2_error_message(int error_code); - -/** - * Returns the encoded hash length for the given input parameters - * @param t_cost Number of iterations - * @param m_cost Memory usage in kibibytes - * @param parallelism Number of threads; used to compute lanes - * @param saltlen Salt size in bytes - * @param hashlen Hash size in bytes - * @param type The argon2_type that we want the encoded length for - * @return The encoded hash length in bytes - */ -ARGON2_PUBLIC size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, - uint32_t parallelism, uint32_t saltlen, - uint32_t hashlen, argon2_type type); - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/algo/argon2/argon2d/argon2d/argon2d_thread.c b/algo/argon2/argon2d/argon2d/argon2d_thread.c deleted file mode 100644 index 41eca426..00000000 --- a/algo/argon2/argon2d/argon2d/argon2d_thread.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#if !defined(ARGON2_NO_THREADS) - -#include "argon2d_thread.h" -#if defined(_WIN32) -#include -#endif - -int argon2_thread_create(argon2_thread_handle_t *handle, - argon2_thread_func_t func, void *args) { - if (NULL == handle || func == NULL) { - return -1; - } -#if defined(_WIN32) - *handle = _beginthreadex(NULL, 0, func, args, 0, NULL); - return *handle != 0 ? 0 : -1; -#else - return pthread_create(handle, NULL, func, args); -#endif -} - -int argon2_thread_join(argon2_thread_handle_t handle) { -#if defined(_WIN32) - if (WaitForSingleObject((HANDLE)handle, INFINITE) == WAIT_OBJECT_0) { - return CloseHandle((HANDLE)handle) != 0 ? 0 : -1; - } - return -1; -#else - return pthread_join(handle, NULL); -#endif -} - -void argon2_thread_exit(void) { -#if defined(_WIN32) - _endthreadex(0); -#else - pthread_exit(NULL); -#endif -} - -#endif /* ARGON2_NO_THREADS */ diff --git a/algo/argon2/argon2d/argon2d/argon2d_thread.h b/algo/argon2/argon2d/argon2d/argon2d_thread.h deleted file mode 100644 index 49d88367..00000000 --- a/algo/argon2/argon2d/argon2d/argon2d_thread.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#ifndef ARGON2_THREAD_H -#define ARGON2_THREAD_H - -#if !defined(ARGON2_NO_THREADS) - -/* - Here we implement an abstraction layer for the simpĺe requirements - of the Argon2 code. We only require 3 primitives---thread creation, - joining, and termination---so full emulation of the pthreads API - is unwarranted. Currently we wrap pthreads and Win32 threads. - - The API defines 2 types: the function pointer type, - argon2_thread_func_t, - and the type of the thread handle---argon2_thread_handle_t. -*/ -#if defined(_WIN32) -#include -typedef unsigned(__stdcall *argon2_thread_func_t)(void *); -typedef uintptr_t argon2_thread_handle_t; -#else -#include -typedef void *(*argon2_thread_func_t)(void *); -typedef pthread_t argon2_thread_handle_t; -#endif - -/* Creates a thread - * @param handle pointer to a thread handle, which is the output of this - * function. Must not be NULL. - * @param func A function pointer for the thread's entry point. Must not be - * NULL. - * @param args Pointer that is passed as an argument to @func. May be NULL. - * @return 0 if @handle and @func are valid pointers and a thread is successfully - * created. - */ -int argon2_thread_create(argon2_thread_handle_t *handle, - argon2_thread_func_t func, void *args); - -/* Waits for a thread to terminate - * @param handle Handle to a thread created with argon2_thread_create. - * @return 0 if @handle is a valid handle, and joining completed successfully. -*/ -int argon2_thread_join(argon2_thread_handle_t handle); - -/* Terminate the current thread. Must be run inside a thread created by - * argon2_thread_create. -*/ -void argon2_thread_exit(void); - -#endif /* ARGON2_NO_THREADS */ -#endif diff --git a/algo/argon2/argon2d/argon2d/core.c b/algo/argon2/argon2d/argon2d/core.c deleted file mode 100644 index 08c65d09..00000000 --- a/algo/argon2/argon2d/argon2d/core.c +++ /dev/null @@ -1,638 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -/*For memory wiping*/ -#ifdef _MSC_VER -#include -#include /* For SecureZeroMemory */ -#endif -#if defined __STDC_LIB_EXT1__ -#define __STDC_WANT_LIB_EXT1__ 1 -#endif -#define VC_GE_2005(version) (version >= 1400) - -#include -#include -#include -#include - -#include "core.h" -#include "argon2d_thread.h" -#include "../blake2/blake2.h" -#include "../blake2/blake2-impl.h" - -#ifdef GENKAT -#include "genkat.h" -#endif - -#if defined(__clang__) -#if __has_attribute(optnone) -#define NOT_OPTIMIZED __attribute__((optnone)) -#endif -#elif defined(__GNUC__) -#define GCC_VERSION \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) -#if GCC_VERSION >= 40400 -#define NOT_OPTIMIZED __attribute__((optimize("O0"))) -#endif -#endif -#ifndef NOT_OPTIMIZED -#define NOT_OPTIMIZED -#endif - -/***************Instance and Position constructors**********/ -void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); } - -void copy_block(block *dst, const block *src) { - memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK); -} - -void xor_block(block *dst, const block *src) { - int i; - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { - dst->v[i] ^= src->v[i]; - } -} - -static void load_block(block *dst, const void *input) { - unsigned i; - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { - dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i])); - } -} - -static void store_block(void *output, const block *src) { - unsigned i; - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { - store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]); - } -} - -/***************Memory functions*****************/ - -int allocate_memory(const argon2_context *context, uint8_t **memory, - size_t num, size_t size) { - size_t memory_size = num*size; - if (memory == NULL) { - return ARGON2_MEMORY_ALLOCATION_ERROR; - } - - /* 1. Check for multiplication overflow */ - if (size != 0 && memory_size / size != num) { - return ARGON2_MEMORY_ALLOCATION_ERROR; - } - - /* 2. Try to allocate with appropriate allocator */ - if (context->allocate_cbk) { - (context->allocate_cbk)(memory, memory_size); - } else { - *memory = _mm_malloc( memory_size, 64 ); -// *memory = malloc(memory_size); - } - - if (*memory == NULL) { - return ARGON2_MEMORY_ALLOCATION_ERROR; - } - - return ARGON2_OK; -} - -void free_memory(const argon2_context *context, uint8_t *memory, - size_t num, size_t size) { - size_t memory_size = num*size; -// clear_internal_memory(memory, memory_size); - if (context->free_cbk) { - (context->free_cbk)(memory, memory_size); - } else { -// free(memory); - _mm_free( memory ); - } -} - -void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) { -#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER) - SecureZeroMemory(v, n); -#elif defined memset_s - memset_s(v, n, 0, n); -#elif defined(__OpenBSD__) - explicit_bzero(v, n); -#else - static void *(*const volatile memset_sec)(void *, int, size_t) = &memset; - memset_sec(v, 0, n); -#endif -} - -/* Memory clear flag defaults to true. */ -int FLAG_clear_internal_memory = 0; -void clear_internal_memory(void *v, size_t n) { - if (FLAG_clear_internal_memory && v) { -// secure_wipe_memory(v, n); - } -} - -void finalize(const argon2_context *context, argon2_instance_t *instance) { - if (context != NULL && instance != NULL) { - block blockhash; - uint32_t l; - - copy_block(&blockhash, instance->memory + instance->lane_length - 1); - - /* XOR the last blocks */ - for (l = 1; l < instance->lanes; ++l) { - uint32_t last_block_in_lane = - l * instance->lane_length + (instance->lane_length - 1); - xor_block(&blockhash, instance->memory + last_block_in_lane); - } - - /* Hash the result */ - { - uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; - store_block(blockhash_bytes, &blockhash); - blake2b_long(context->out, context->outlen, blockhash_bytes, - ARGON2_BLOCK_SIZE); - /* clear blockhash and blockhash_bytes */ - clear_internal_memory(blockhash.v, ARGON2_BLOCK_SIZE); - clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); - } - -#ifdef GENKAT - print_tag(context->out, context->outlen); -#endif - - free_memory(context, (uint8_t *)instance->memory, - instance->memory_blocks, sizeof(block)); - } -} - -uint32_t index_alpha(const argon2_instance_t *instance, - const argon2_position_t *position, uint32_t pseudo_rand, - int same_lane) { - /* - * Pass 0: - * This lane : all already finished segments plus already constructed - * blocks in this segment - * Other lanes : all already finished segments - * Pass 1+: - * This lane : (SYNC_POINTS - 1) last segments plus already constructed - * blocks in this segment - * Other lanes : (SYNC_POINTS - 1) last segments - */ - uint32_t reference_area_size; - uint64_t relative_position; - uint32_t start_position, absolute_position; - - if (0 == position->pass) { - /* First pass */ - if (0 == position->slice) { - /* First slice */ - reference_area_size = - position->index - 1; /* all but the previous */ - } else { - if (same_lane) { - /* The same lane => add current segment */ - reference_area_size = - position->slice * instance->segment_length + - position->index - 1; - } else { - reference_area_size = - position->slice * instance->segment_length + - ((position->index == 0) ? (-1) : 0); - } - } - } else { - /* Second pass */ - if (same_lane) { - reference_area_size = instance->lane_length - - instance->segment_length + position->index - - 1; - } else { - reference_area_size = instance->lane_length - - instance->segment_length + - ((position->index == 0) ? (-1) : 0); - } - } - - /* 1.2.4. Mapping pseudo_rand to 0.. and produce - * relative position */ - relative_position = pseudo_rand; - relative_position = relative_position * relative_position >> 32; - relative_position = reference_area_size - 1 - - (reference_area_size * relative_position >> 32); - - /* 1.2.5 Computing starting position */ - start_position = 0; - - if (0 != position->pass) { - start_position = (position->slice == ARGON2_SYNC_POINTS - 1) - ? 0 - : (position->slice + 1) * instance->segment_length; - } - - /* 1.2.6. Computing absolute position */ - absolute_position = (start_position + relative_position) % - instance->lane_length; /* absolute position */ - return absolute_position; -} - -/* Single-threaded version for p=1 case */ -static int fill_memory_blocks_st(argon2_instance_t *instance) { - uint32_t r, s, l; - - for (r = 0; r < instance->passes; ++r) { - for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { - for (l = 0; l < instance->lanes; ++l) { - argon2_position_t position = {r, l, (uint8_t)s, 0}; - fill_segment(instance, position); - } - } -#ifdef GENKAT - internal_kat(instance, r); /* Print all memory blocks */ -#endif - } - return ARGON2_OK; -} - -#if !defined(ARGON2_NO_THREADS) - -#ifdef _WIN32 -static unsigned __stdcall fill_segment_thr(void *thread_data) -#else -static void *fill_segment_thr(void *thread_data) -#endif -{ - argon2_thread_data *my_data = thread_data; - fill_segment(my_data->instance_ptr, my_data->pos); - argon2_thread_exit(); - return 0; -} - -/* Multi-threaded version for p > 1 case */ -static int fill_memory_blocks_mt(argon2_instance_t *instance) { - uint32_t r, s; - argon2_thread_handle_t *thread = NULL; - argon2_thread_data *thr_data = NULL; - int rc = ARGON2_OK; - - /* 1. Allocating space for threads */ - thread = calloc(instance->lanes, sizeof(argon2_thread_handle_t)); - if (thread == NULL) { - rc = ARGON2_MEMORY_ALLOCATION_ERROR; - goto fail; - } - - thr_data = calloc(instance->lanes, sizeof(argon2_thread_data)); - if (thr_data == NULL) { - rc = ARGON2_MEMORY_ALLOCATION_ERROR; - goto fail; - } - - for (r = 0; r < instance->passes; ++r) { - for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { - uint32_t l; - - /* 2. Calling threads */ - for (l = 0; l < instance->lanes; ++l) { - argon2_position_t position; - - /* 2.1 Join a thread if limit is exceeded */ - if (l >= instance->threads) { - if (argon2_thread_join(thread[l - instance->threads])) { - rc = ARGON2_THREAD_FAIL; - goto fail; - } - } - - /* 2.2 Create thread */ - position.pass = r; - position.lane = l; - position.slice = (uint8_t)s; - position.index = 0; - thr_data[l].instance_ptr = - instance; /* preparing the thread input */ - memcpy(&(thr_data[l].pos), &position, - sizeof(argon2_position_t)); - if (argon2_thread_create(&thread[l], &fill_segment_thr, - (void *)&thr_data[l])) { - rc = ARGON2_THREAD_FAIL; - goto fail; - } - - /* fill_segment(instance, position); */ - /*Non-thread equivalent of the lines above */ - } - - /* 3. Joining remaining threads */ - for (l = instance->lanes - instance->threads; l < instance->lanes; - ++l) { - if (argon2_thread_join(thread[l])) { - rc = ARGON2_THREAD_FAIL; - goto fail; - } - } - } - -#ifdef GENKAT - internal_kat(instance, r); /* Print all memory blocks */ -#endif - } - -fail: - if (thread != NULL) { - free(thread); - } - if (thr_data != NULL) { - free(thr_data); - } - return rc; -} - -#endif /* ARGON2_NO_THREADS */ - -int fill_memory_blocks(argon2_instance_t *instance) { - if (instance == NULL || instance->lanes == 0) { - return ARGON2_INCORRECT_PARAMETER; - } -#if defined(ARGON2_NO_THREADS) - return fill_memory_blocks_st(instance); -#else - return instance->threads == 1 ? - fill_memory_blocks_st(instance) : fill_memory_blocks_mt(instance); -#endif -} - -int validate_inputs(const argon2_context *context) { - if (NULL == context) { - return ARGON2_INCORRECT_PARAMETER; - } - - if (NULL == context->out) { - return ARGON2_OUTPUT_PTR_NULL; - } - - /* Validate output length */ - if (ARGON2_MIN_OUTLEN > context->outlen) { - return ARGON2_OUTPUT_TOO_SHORT; - } - - if (ARGON2_MAX_OUTLEN < context->outlen) { - return ARGON2_OUTPUT_TOO_LONG; - } - - /* Validate password (required param) */ - if (NULL == context->pwd) { - if (0 != context->pwdlen) { - return ARGON2_PWD_PTR_MISMATCH; - } - } - - if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) { - return ARGON2_PWD_TOO_SHORT; - } - - if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) { - return ARGON2_PWD_TOO_LONG; - } - - /* Validate salt (required param) */ - if (NULL == context->salt) { - if (0 != context->saltlen) { - return ARGON2_SALT_PTR_MISMATCH; - } - } - - if (ARGON2_MIN_SALT_LENGTH > context->saltlen) { - return ARGON2_SALT_TOO_SHORT; - } - - if (ARGON2_MAX_SALT_LENGTH < context->saltlen) { - return ARGON2_SALT_TOO_LONG; - } - - /* Validate secret (optional param) */ - if (NULL == context->secret) { - if (0 != context->secretlen) { - return ARGON2_SECRET_PTR_MISMATCH; - } - } else { - if (ARGON2_MIN_SECRET > context->secretlen) { - return ARGON2_SECRET_TOO_SHORT; - } - if (ARGON2_MAX_SECRET < context->secretlen) { - return ARGON2_SECRET_TOO_LONG; - } - } - - /* Validate associated data (optional param) */ - if (NULL == context->ad) { - if (0 != context->adlen) { - return ARGON2_AD_PTR_MISMATCH; - } - } else { - if (ARGON2_MIN_AD_LENGTH > context->adlen) { - return ARGON2_AD_TOO_SHORT; - } - if (ARGON2_MAX_AD_LENGTH < context->adlen) { - return ARGON2_AD_TOO_LONG; - } - } - - /* Validate memory cost */ - if (ARGON2_MIN_MEMORY > context->m_cost) { - return ARGON2_MEMORY_TOO_LITTLE; - } - - if (ARGON2_MAX_MEMORY < context->m_cost) { - return ARGON2_MEMORY_TOO_MUCH; - } - - if (context->m_cost < 8 * context->lanes) { - return ARGON2_MEMORY_TOO_LITTLE; - } - - /* Validate time cost */ - if (ARGON2_MIN_TIME > context->t_cost) { - return ARGON2_TIME_TOO_SMALL; - } - - if (ARGON2_MAX_TIME < context->t_cost) { - return ARGON2_TIME_TOO_LARGE; - } - - /* Validate lanes */ - if (ARGON2_MIN_LANES > context->lanes) { - return ARGON2_LANES_TOO_FEW; - } - - if (ARGON2_MAX_LANES < context->lanes) { - return ARGON2_LANES_TOO_MANY; - } - - /* Validate threads */ - if (ARGON2_MIN_THREADS > context->threads) { - return ARGON2_THREADS_TOO_FEW; - } - - if (ARGON2_MAX_THREADS < context->threads) { - return ARGON2_THREADS_TOO_MANY; - } - - if (NULL != context->allocate_cbk && NULL == context->free_cbk) { - return ARGON2_FREE_MEMORY_CBK_NULL; - } - - if (NULL == context->allocate_cbk && NULL != context->free_cbk) { - return ARGON2_ALLOCATE_MEMORY_CBK_NULL; - } - - return ARGON2_OK; -} - -void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) { - uint32_t l; - /* Make the first and second block in each lane as G(H0||0||i) or - G(H0||1||i) */ - uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; - for (l = 0; l < instance->lanes; ++l) { - - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0); - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l); - blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, - ARGON2_PREHASH_SEED_LENGTH); - load_block(&instance->memory[l * instance->lane_length + 0], - blockhash_bytes); - - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1); - blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, - ARGON2_PREHASH_SEED_LENGTH); - load_block(&instance->memory[l * instance->lane_length + 1], - blockhash_bytes); - } - clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); -} - -void initial_hash(uint8_t *blockhash, argon2_context *context, - argon2_type type) { - blake2b_state BlakeHash; - uint8_t value[sizeof(uint32_t)]; - - if (NULL == context || NULL == blockhash) { - return; - } - - blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH); - - store32(&value, context->lanes); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->outlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->m_cost); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->t_cost); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - -// store32(&value, ARGON2_VERSION_NUMBER); - store32(&value, context->version); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, (uint32_t)type); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->pwdlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - if (context->pwd != NULL) { - blake2b_update(&BlakeHash, (const uint8_t *)context->pwd, - context->pwdlen); - - if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { -// secure_wipe_memory(context->pwd, context->pwdlen); - context->pwdlen = 0; - } - } - - store32(&value, context->saltlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - if (context->salt != NULL) { - blake2b_update(&BlakeHash, (const uint8_t *)context->salt, - context->saltlen); - } - - store32(&value, context->secretlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - if (context->secret != NULL) { - blake2b_update(&BlakeHash, (const uint8_t *)context->secret, - context->secretlen); - - if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { -// secure_wipe_memory(context->secret, context->secretlen); - context->secretlen = 0; - } - } - - store32(&value, context->adlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - if (context->ad != NULL) { - blake2b_update(&BlakeHash, (const uint8_t *)context->ad, - context->adlen); - } - - blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); -} - -int initialize(argon2_instance_t *instance, argon2_context *context) { - uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; - int result = ARGON2_OK; - - if (instance == NULL || context == NULL) - return ARGON2_INCORRECT_PARAMETER; - instance->context_ptr = context; - - /* 1. Memory allocation */ - result = allocate_memory(context, (uint8_t **)&(instance->memory), - instance->memory_blocks, sizeof(block)); - if (result != ARGON2_OK) { - return result; - } - - /* 2. Initial hashing */ - /* H_0 + 8 extra bytes to produce the first blocks */ - /* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */ - /* Hashing all inputs */ - initial_hash(blockhash, context, instance->type); - /* Zeroing 8 extra bytes */ - clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, - ARGON2_PREHASH_SEED_LENGTH - - ARGON2_PREHASH_DIGEST_LENGTH); - -#ifdef GENKAT - initial_kat(blockhash, context, instance->type); -#endif - - /* 3. Creating first blocks, we always have at least two blocks in a slice - */ - fill_first_blocks(blockhash, instance); - /* Clearing the hash */ - clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH); - - return ARGON2_OK; -} diff --git a/algo/argon2/argon2d/argon2d/core.h b/algo/argon2/argon2d/argon2d/core.h deleted file mode 100644 index 78000ba9..00000000 --- a/algo/argon2/argon2d/argon2d/core.h +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#ifndef ARGON2_CORE_H -#define ARGON2_CORE_H - -#include "argon2.h" - -#define CONST_CAST(x) (x)(uintptr_t) - -/**********************Argon2 internal constants*******************************/ - -enum argon2_core_constants { - /* Memory block size in bytes */ - ARGON2_BLOCK_SIZE = 1024, - ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8, - ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16, - ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32, - ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64, - - /* Number of pseudo-random values generated by one call to Blake in Argon2i - to - generate reference block positions */ - ARGON2_ADDRESSES_IN_BLOCK = 128, - - /* Pre-hashing digest length and its extension*/ - ARGON2_PREHASH_DIGEST_LENGTH = 64, - ARGON2_PREHASH_SEED_LENGTH = 72 -}; - -/*************************Argon2 internal data types***********************/ - -/* - * Structure for the (1KB) memory block implemented as 128 64-bit words. - * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no - * bounds checking). - */ -typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block; - -/*****************Functions that work with the block******************/ - -/* Initialize each byte of the block with @in */ -void init_block_value(block *b, uint8_t in); - -/* Copy block @src to block @dst */ -void copy_block(block *dst, const block *src); - -/* XOR @src onto @dst bytewise */ -void xor_block(block *dst, const block *src); - -/* - * Argon2 instance: memory pointer, number of passes, amount of memory, type, - * and derived values. - * Used to evaluate the number and location of blocks to construct in each - * thread - */ -typedef struct Argon2_instance_t { - block *memory; /* Memory pointer */ - uint32_t version; - uint32_t passes; /* Number of passes */ - uint32_t memory_blocks; /* Number of blocks in memory */ - uint32_t segment_length; - uint32_t lane_length; - uint32_t lanes; - uint32_t threads; - argon2_type type; - int print_internals; /* whether to print the memory blocks */ - argon2_context *context_ptr; /* points back to original context */ -} argon2_instance_t; - -/* - * Argon2 position: where we construct the block right now. Used to distribute - * work between threads. - */ -typedef struct Argon2_position_t { - uint32_t pass; - uint32_t lane; - uint8_t slice; - uint32_t index; -} argon2_position_t; - -/*Struct that holds the inputs for thread handling FillSegment*/ -typedef struct Argon2_thread_data { - argon2_instance_t *instance_ptr; - argon2_position_t pos; -} argon2_thread_data; - -/*************************Argon2 core functions********************************/ - -/* Allocates memory to the given pointer, uses the appropriate allocator as - * specified in the context. Total allocated memory is num*size. - * @param context argon2_context which specifies the allocator - * @param memory pointer to the pointer to the memory - * @param size the size in bytes for each element to be allocated - * @param num the number of elements to be allocated - * @return ARGON2_OK if @memory is a valid pointer and memory is allocated - */ -int allocate_memory(const argon2_context *context, uint8_t **memory, - size_t num, size_t size); - -/* - * Frees memory at the given pointer, uses the appropriate deallocator as - * specified in the context. Also cleans the memory using clear_internal_memory. - * @param context argon2_context which specifies the deallocator - * @param memory pointer to buffer to be freed - * @param size the size in bytes for each element to be deallocated - * @param num the number of elements to be deallocated - */ -void free_memory(const argon2_context *context, uint8_t *memory, - size_t num, size_t size); - -/* Function that securely cleans the memory. This ignores any flags set - * regarding clearing memory. Usually one just calls clear_internal_memory. - * @param mem Pointer to the memory - * @param s Memory size in bytes - */ -void secure_wipe_memory(void *v, size_t n); - -/* Function that securely clears the memory if FLAG_clear_internal_memory is - * set. If the flag isn't set, this function does nothing. - * @param mem Pointer to the memory - * @param s Memory size in bytes - */ -void clear_internal_memory(void *v, size_t n); - -/* - * Computes absolute position of reference block in the lane following a skewed - * distribution and using a pseudo-random value as input - * @param instance Pointer to the current instance - * @param position Pointer to the current position - * @param pseudo_rand 32-bit pseudo-random value used to determine the position - * @param same_lane Indicates if the block will be taken from the current lane. - * If so we can reference the current segment - * @pre All pointers must be valid - */ -uint32_t index_alpha(const argon2_instance_t *instance, - const argon2_position_t *position, uint32_t pseudo_rand, - int same_lane); - -/* - * Function that validates all inputs against predefined restrictions and return - * an error code - * @param context Pointer to current Argon2 context - * @return ARGON2_OK if everything is all right, otherwise one of error codes - * (all defined in - */ -int validate_inputs(const argon2_context *context); - -/* - * Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears - * password and secret if needed - * @param context Pointer to the Argon2 internal structure containing memory - * pointer, and parameters for time and space requirements. - * @param blockhash Buffer for pre-hashing digest - * @param type Argon2 type - * @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes - * allocated - */ -void initial_hash(uint8_t *blockhash, argon2_context *context, - argon2_type type); - -/* - * Function creates first 2 blocks per lane - * @param instance Pointer to the current instance - * @param blockhash Pointer to the pre-hashing digest - * @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values - */ -void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance); - -/* - * Function allocates memory, hashes the inputs with Blake, and creates first - * two blocks. Returns the pointer to the main memory with 2 blocks per lane - * initialized - * @param context Pointer to the Argon2 internal structure containing memory - * pointer, and parameters for time and space requirements. - * @param instance Current Argon2 instance - * @return Zero if successful, -1 if memory failed to allocate. @context->state - * will be modified if successful. - */ -int initialize(argon2_instance_t *instance, argon2_context *context); - -/* - * XORing the last block of each lane, hashing it, making the tag. Deallocates - * the memory. - * @param context Pointer to current Argon2 context (use only the out parameters - * from it) - * @param instance Pointer to current instance of Argon2 - * @pre instance->state must point to necessary amount of memory - * @pre context->out must point to outlen bytes of memory - * @pre if context->free_cbk is not NULL, it should point to a function that - * deallocates memory - */ -void finalize(const argon2_context *context, argon2_instance_t *instance); - -/* - * Function that fills the segment using previous segments also from other - * threads - * @param context current context - * @param instance Pointer to the current instance - * @param position Current position - * @pre all block pointers must be valid - */ -void fill_segment(const argon2_instance_t *instance, - argon2_position_t position); - -/* - * Function that fills the entire memory t_cost times based on the first two - * blocks in each lane - * @param instance Pointer to the current instance - * @return ARGON2_OK if successful, @context->state - */ -int fill_memory_blocks(argon2_instance_t *instance); - -#endif diff --git a/algo/argon2/argon2d/argon2d/encoding.c b/algo/argon2/argon2d/argon2d/encoding.c deleted file mode 100644 index 12cfda4d..00000000 --- a/algo/argon2/argon2d/argon2d/encoding.c +++ /dev/null @@ -1,463 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#include -#include -#include -#include -#include "encoding.h" -#include "core.h" - -/* - * Example code for a decoder and encoder of "hash strings", with Argon2 - * parameters. - * - * This code comprises three sections: - * - * -- The first section contains generic Base64 encoding and decoding - * functions. It is conceptually applicable to any hash function - * implementation that uses Base64 to encode and decode parameters, - * salts and outputs. It could be made into a library, provided that - * the relevant functions are made public (non-static) and be given - * reasonable names to avoid collisions with other functions. - * - * -- The second section is specific to Argon2. It encodes and decodes - * the parameters, salts and outputs. It does not compute the hash - * itself. - * - * The code was originally written by Thomas Pornin , - * to whom comments and remarks may be sent. It is released under what - * should amount to Public Domain or its closest equivalent; the - * following mantra is supposed to incarnate that fact with all the - * proper legal rituals: - * - * --------------------------------------------------------------------- - * This file is provided under the terms of Creative Commons CC0 1.0 - * Public Domain Dedication. To the extent possible under law, the - * author (Thomas Pornin) has waived all copyright and related or - * neighboring rights to this file. This work is published from: Canada. - * --------------------------------------------------------------------- - * - * Copyright (c) 2015 Thomas Pornin - */ - -/* ==================================================================== */ -/* - * Common code; could be shared between different hash functions. - * - * Note: the Base64 functions below assume that uppercase letters (resp. - * lowercase letters) have consecutive numerical codes, that fit on 8 - * bits. All modern systems use ASCII-compatible charsets, where these - * properties are true. If you are stuck with a dinosaur of a system - * that still defaults to EBCDIC then you already have much bigger - * interoperability issues to deal with. - */ - -/* - * Some macros for constant-time comparisons. These work over values in - * the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true". - */ -#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF) -#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF) -#define GE(x, y) (GT(y, x) ^ 0xFF) -#define LT(x, y) GT(y, x) -#define LE(x, y) GE(y, x) - -/* - * Convert value x (0..63) to corresponding Base64 character. - */ -static int b64_byte_to_char(unsigned x) { - return (LT(x, 26) & (x + 'A')) | - (GE(x, 26) & LT(x, 52) & (x + ('a' - 26))) | - (GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+') | - (EQ(x, 63) & '/'); -} - -/* - * Convert character c to the corresponding 6-bit value. If character c - * is not a Base64 character, then 0xFF (255) is returned. - */ -static unsigned b64_char_to_byte(int c) { - unsigned x; - - x = (GE(c, 'A') & LE(c, 'Z') & (c - 'A')) | - (GE(c, 'a') & LE(c, 'z') & (c - ('a' - 26))) | - (GE(c, '0') & LE(c, '9') & (c - ('0' - 52))) | (EQ(c, '+') & 62) | - (EQ(c, '/') & 63); - return x | (EQ(x, 0) & (EQ(c, 'A') ^ 0xFF)); -} - -/* - * Convert some bytes to Base64. 'dst_len' is the length (in characters) - * of the output buffer 'dst'; if that buffer is not large enough to - * receive the result (including the terminating 0), then (size_t)-1 - * is returned. Otherwise, the zero-terminated Base64 string is written - * in the buffer, and the output length (counted WITHOUT the terminating - * zero) is returned. - */ -static size_t to_base64(char *dst, size_t dst_len, const void *src, - size_t src_len) { - size_t olen; - const unsigned char *buf; - unsigned acc, acc_len; - - olen = (src_len / 3) << 2; - switch (src_len % 3) { - case 2: - olen++; - /* fall through */ - case 1: - olen += 2; - break; - } - if (dst_len <= olen) { - return (size_t)-1; - } - acc = 0; - acc_len = 0; - buf = (const unsigned char *)src; - while (src_len-- > 0) { - acc = (acc << 8) + (*buf++); - acc_len += 8; - while (acc_len >= 6) { - acc_len -= 6; - *dst++ = (char)b64_byte_to_char((acc >> acc_len) & 0x3F); - } - } - if (acc_len > 0) { - *dst++ = (char)b64_byte_to_char((acc << (6 - acc_len)) & 0x3F); - } - *dst++ = 0; - return olen; -} - -/* - * Decode Base64 chars into bytes. The '*dst_len' value must initially - * contain the length of the output buffer '*dst'; when the decoding - * ends, the actual number of decoded bytes is written back in - * '*dst_len'. - * - * Decoding stops when a non-Base64 character is encountered, or when - * the output buffer capacity is exceeded. If an error occurred (output - * buffer is too small, invalid last characters leading to unprocessed - * buffered bits), then NULL is returned; otherwise, the returned value - * points to the first non-Base64 character in the source stream, which - * may be the terminating zero. - */ -static const char *from_base64(void *dst, size_t *dst_len, const char *src) { - size_t len; - unsigned char *buf; - unsigned acc, acc_len; - - buf = (unsigned char *)dst; - len = 0; - acc = 0; - acc_len = 0; - for (;;) { - unsigned d; - - d = b64_char_to_byte(*src); - if (d == 0xFF) { - break; - } - src++; - acc = (acc << 6) + d; - acc_len += 6; - if (acc_len >= 8) { - acc_len -= 8; - if ((len++) >= *dst_len) { - return NULL; - } - *buf++ = (acc >> acc_len) & 0xFF; - } - } - - /* - * If the input length is equal to 1 modulo 4 (which is - * invalid), then there will remain 6 unprocessed bits; - * otherwise, only 0, 2 or 4 bits are buffered. The buffered - * bits must also all be zero. - */ - if (acc_len > 4 || (acc & (((unsigned)1 << acc_len) - 1)) != 0) { - return NULL; - } - *dst_len = len; - return src; -} - -/* - * Decode decimal integer from 'str'; the value is written in '*v'. - * Returned value is a pointer to the next non-decimal character in the - * string. If there is no digit at all, or the value encoding is not - * minimal (extra leading zeros), or the value does not fit in an - * 'unsigned long', then NULL is returned. - */ -static const char *decode_decimal(const char *str, unsigned long *v) { - const char *orig; - unsigned long acc; - - acc = 0; - for (orig = str;; str++) { - int c; - - c = *str; - if (c < '0' || c > '9') { - break; - } - c -= '0'; - if (acc > (ULONG_MAX / 10)) { - return NULL; - } - acc *= 10; - if ((unsigned long)c > (ULONG_MAX - acc)) { - return NULL; - } - acc += (unsigned long)c; - } - if (str == orig || (*orig == '0' && str != (orig + 1))) { - return NULL; - } - *v = acc; - return str; -} - -/* ==================================================================== */ -/* - * Code specific to Argon2. - * - * The code below applies the following format: - * - * $argon2[$v=]$m=,t=,p=$$ - * - * where is either 'd', 'id', or 'i', is a decimal integer (positive, - * fits in an 'unsigned long'), and is Base64-encoded data (no '=' padding - * characters, no newline or whitespace). - * - * The last two binary chunks (encoded in Base64) are, in that order, - * the salt and the output. Both are required. The binary salt length and the - * output length must be in the allowed ranges defined in argon2.h. - * - * The ctx struct must contain buffers large enough to hold the salt and pwd - * when it is fed into decode_string. - */ - -int decode_string(argon2_context *ctx, const char *str, argon2_type type) { - -/* check for prefix */ -#define CC(prefix) \ - do { \ - size_t cc_len = strlen(prefix); \ - if (strncmp(str, prefix, cc_len) != 0) { \ - return ARGON2_DECODING_FAIL; \ - } \ - str += cc_len; \ - } while ((void)0, 0) - -/* optional prefix checking with supplied code */ -#define CC_opt(prefix, code) \ - do { \ - size_t cc_len = strlen(prefix); \ - if (strncmp(str, prefix, cc_len) == 0) { \ - str += cc_len; \ - { code; } \ - } \ - } while ((void)0, 0) - -/* Decoding prefix into decimal */ -#define DECIMAL(x) \ - do { \ - unsigned long dec_x; \ - str = decode_decimal(str, &dec_x); \ - if (str == NULL) { \ - return ARGON2_DECODING_FAIL; \ - } \ - (x) = dec_x; \ - } while ((void)0, 0) - - -/* Decoding prefix into uint32_t decimal */ -#define DECIMAL_U32(x) \ - do { \ - unsigned long dec_x; \ - str = decode_decimal(str, &dec_x); \ - if (str == NULL || dec_x > UINT32_MAX) { \ - return ARGON2_DECODING_FAIL; \ - } \ - (x) = (uint32_t)dec_x; \ - } while ((void)0, 0) - - -/* Decoding base64 into a binary buffer */ -#define BIN(buf, max_len, len) \ - do { \ - size_t bin_len = (max_len); \ - str = from_base64(buf, &bin_len, str); \ - if (str == NULL || bin_len > UINT32_MAX) { \ - return ARGON2_DECODING_FAIL; \ - } \ - (len) = (uint32_t)bin_len; \ - } while ((void)0, 0) - - size_t maxsaltlen = ctx->saltlen; - size_t maxoutlen = ctx->outlen; - int validation_result; - const char* type_string; - - /* We should start with the argon2_type we are using */ - type_string = argon2_type2string(type, 0); - if (!type_string) { - return ARGON2_INCORRECT_TYPE; - } - - CC("$"); - CC(type_string); - - /* Reading the version number if the default is suppressed */ - ctx->version = ARGON2_VERSION_10; - CC_opt("$v=", DECIMAL_U32(ctx->version)); - - CC("$m="); - DECIMAL_U32(ctx->m_cost); - CC(",t="); - DECIMAL_U32(ctx->t_cost); - CC(",p="); - DECIMAL_U32(ctx->lanes); - ctx->threads = ctx->lanes; - - CC("$"); - BIN(ctx->salt, maxsaltlen, ctx->saltlen); - CC("$"); - BIN(ctx->out, maxoutlen, ctx->outlen); - - /* The rest of the fields get the default values */ - ctx->secret = NULL; - ctx->secretlen = 0; - ctx->ad = NULL; - ctx->adlen = 0; - ctx->allocate_cbk = NULL; - ctx->free_cbk = NULL; - ctx->flags = ARGON2_DEFAULT_FLAGS; - - /* On return, must have valid context */ - validation_result = validate_inputs(ctx); - if (validation_result != ARGON2_OK) { - return validation_result; - } - - /* Can't have any additional characters */ - if (*str == 0) { - return ARGON2_OK; - } else { - return ARGON2_DECODING_FAIL; - } -#undef CC -#undef CC_opt -#undef DECIMAL -#undef BIN -} - -int encode_string(char *dst, size_t dst_len, argon2_context *ctx, - argon2_type type) { -#define SS(str) \ - do { \ - size_t pp_len = strlen(str); \ - if (pp_len >= dst_len) { \ - return ARGON2_ENCODING_FAIL; \ - } \ - memcpy(dst, str, pp_len + 1); \ - dst += pp_len; \ - dst_len -= pp_len; \ - } while ((void)0, 0) - -#define SX(x) \ - do { \ - char tmp[30]; \ - sprintf(tmp, "%lu", (unsigned long)(x)); \ - SS(tmp); \ - } while ((void)0, 0) - -#define SB(buf, len) \ - do { \ - size_t sb_len = to_base64(dst, dst_len, buf, len); \ - if (sb_len == (size_t)-1) { \ - return ARGON2_ENCODING_FAIL; \ - } \ - dst += sb_len; \ - dst_len -= sb_len; \ - } while ((void)0, 0) - - const char* type_string = argon2_type2string(type, 0); - int validation_result = validate_inputs(ctx); - - if (!type_string) { - return ARGON2_ENCODING_FAIL; - } - - if (validation_result != ARGON2_OK) { - return validation_result; - } - - - SS("$"); - SS(type_string); - - SS("$v="); - SX(ctx->version); - - SS("$m="); - SX(ctx->m_cost); - SS(",t="); - SX(ctx->t_cost); - SS(",p="); - SX(ctx->lanes); - - SS("$"); - SB(ctx->salt, ctx->saltlen); - - SS("$"); - SB(ctx->out, ctx->outlen); - return ARGON2_OK; - -#undef SS -#undef SX -#undef SB -} - -size_t b64len(uint32_t len) { - size_t olen = ((size_t)len / 3) << 2; - - switch (len % 3) { - case 2: - olen++; - /* fall through */ - case 1: - olen += 2; - break; - } - - return olen; -} - -size_t numlen(uint32_t num) { - size_t len = 1; - while (num >= 10) { - ++len; - num = num / 10; - } - return len; -} - diff --git a/algo/argon2/argon2d/argon2d/encoding.h b/algo/argon2/argon2d/argon2d/encoding.h deleted file mode 100644 index 7e83ec92..00000000 --- a/algo/argon2/argon2d/argon2d/encoding.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#ifndef ENCODING_H -#define ENCODING_H -#include "argon2.h" - -#define ARGON2_MAX_DECODED_LANES UINT32_C(255) -#define ARGON2_MIN_DECODED_SALT_LEN UINT32_C(8) -#define ARGON2_MIN_DECODED_OUT_LEN UINT32_C(12) - -/* -* encode an Argon2 hash string into the provided buffer. 'dst_len' -* contains the size, in characters, of the 'dst' buffer; if 'dst_len' -* is less than the number of required characters (including the -* terminating 0), then this function returns ARGON2_ENCODING_ERROR. -* -* on success, ARGON2_OK is returned. -*/ -int encode_string(char *dst, size_t dst_len, argon2_context *ctx, - argon2_type type); - -/* -* Decodes an Argon2 hash string into the provided structure 'ctx'. -* The only fields that must be set prior to this call are ctx.saltlen and -* ctx.outlen (which must be the maximal salt and out length values that are -* allowed), ctx.salt and ctx.out (which must be buffers of the specified -* length), and ctx.pwd and ctx.pwdlen which must hold a valid password. -* -* Invalid input string causes an error. On success, the ctx is valid and all -* fields have been initialized. -* -* Returned value is ARGON2_OK on success, other ARGON2_ codes on error. -*/ -int decode_string(argon2_context *ctx, const char *str, argon2_type type); - -/* Returns the length of the encoded byte stream with length len */ -size_t b64len(uint32_t len); - -/* Returns the length of the encoded number num */ -size_t numlen(uint32_t num); - -#endif diff --git a/algo/argon2/argon2d/argon2d/opt.c b/algo/argon2/argon2d/argon2d/opt.c deleted file mode 100644 index 5164a1e9..00000000 --- a/algo/argon2/argon2d/argon2d/opt.c +++ /dev/null @@ -1,332 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#include -#include -#include - -#include "argon2.h" -#include "core.h" -#include "simd-utils.h" -#include "../blake2/blake2.h" -#include "../blake2/blamka-round-opt.h" - -/* - * Function fills a new memory block and optionally XORs the old block over the new one. - * Memory must be initialized. - * @param state Pointer to the just produced block. Content will be updated(!) - * @param ref_block Pointer to the reference block - * @param next_block Pointer to the block to be XORed over. May coincide with @ref_block - * @param with_xor Whether to XOR into the new block (1) or just overwrite (0) - * @pre all block pointers must be valid - */ - -#if defined(__AVX512F__) - -static inline __m512i blamka( __m512i x, __m512i y ) -{ - __m512i xy = _mm512_mul_epu32( x, y ); - return _mm512_add_epi64( _mm512_add_epi64( x, y ), - _mm512_add_epi64( xy, xy ) ); -} - -static void fill_block( __m512i *state, const block *ref_block, - block *next_block, int with_xor ) -{ - __m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK]; - unsigned int i; - - if ( with_xor ) - { - for ( i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++ ) - { - state[i] = _mm512_xor_si512( state[i], - _mm512_load_si512( (const __m512i*)ref_block->v + i ) ); - block_XY[i] = _mm512_xor_si512( state[i], - _mm512_load_si512( (const __m512i*)next_block->v + i ) ); - } - } - else - { - for ( i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++ ) - block_XY[i] = state[i] = _mm512_xor_si512( state[i], - _mm512_load_si512( (const __m512i*)ref_block->v + i ) ); - } - - BLAKE2_ROUND_1( state[ 0], state[ 1], state[ 2], state[ 3], - state[ 4], state[ 5], state[ 6], state[ 7] ); - BLAKE2_ROUND_1( state[ 8], state[ 9], state[10], state[11], - state[12], state[13], state[14], state[15] ); - - BLAKE2_ROUND_2( state[ 0], state[ 2], state[ 4], state[ 6], - state[ 8], state[10], state[12], state[14] ); - BLAKE2_ROUND_2( state[ 1], state[ 3], state[ 5], state[ 7], - state[ 9], state[11], state[13], state[15] ); - - for ( i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++ ) - { - state[i] = _mm512_xor_si512( state[i], block_XY[i] ); - _mm512_store_si512( (__m512i*)next_block->v + i, state[i] ); - } -} - -#elif defined(__AVX2__) - -static void fill_block(__m256i *state, const block *ref_block, - block *next_block, int with_xor) { - __m256i block_XY[ARGON2_HWORDS_IN_BLOCK]; - unsigned int i; - - if (with_xor) { - for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { - state[i] = _mm256_xor_si256( - state[i], _mm256_load_si256((const __m256i *)ref_block->v + i)); - block_XY[i] = _mm256_xor_si256( - state[i], _mm256_load_si256((const __m256i *)next_block->v + i)); - } - } else { - for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { - block_XY[i] = state[i] = _mm256_xor_si256( - state[i], _mm256_load_si256((const __m256i *)ref_block->v + i)); - } - } - - BLAKE2_ROUND_1( state[ 0], state[ 4], state[ 1], state[ 5], - state[ 2], state[ 6], state[ 3], state[ 7] ); - BLAKE2_ROUND_1( state[ 8], state[12], state[ 9], state[13], - state[10], state[14], state[11], state[15] ); - BLAKE2_ROUND_1( state[16], state[20], state[17], state[21], - state[18], state[22], state[19], state[23] ); - BLAKE2_ROUND_1( state[24], state[28], state[25], state[29], - state[26], state[30], state[27], state[31] ); - - BLAKE2_ROUND_2( state[ 0], state[ 4], state[ 8], state[12], - state[16], state[20], state[24], state[28] ); - BLAKE2_ROUND_2( state[ 1], state[ 5], state[ 9], state[13], - state[17], state[21], state[25], state[29] ); - BLAKE2_ROUND_2( state[ 2], state[ 6], state[10], state[14], - state[18], state[22], state[26], state[30] ); - BLAKE2_ROUND_2( state[ 3], state[ 7], state[11], state[15], - state[19], state[23], state[27], state[31] ); - - for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { - state[i] = _mm256_xor_si256(state[i], block_XY[i]); - _mm256_store_si256((__m256i *)next_block->v + i, state[i]); - } -} - -#else // SSE2 - -static void fill_block(__m128i *state, const block *ref_block, - block *next_block, int with_xor) { - __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; - unsigned int i; - - if (with_xor) { - for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { - state[i] = _mm_xor_si128( - state[i], _mm_load_si128((const __m128i *)ref_block->v + i)); - block_XY[i] = _mm_xor_si128( - state[i], _mm_load_si128((const __m128i *)next_block->v + i)); - } - } else { - for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { - block_XY[i] = state[i] = _mm_xor_si128( - state[i], _mm_load_si128((const __m128i *)ref_block->v + i)); - } - } - - BLAKE2_ROUND( state[ 0], state[ 1], state[ 2], state[ 3], - state[ 4], state[ 5], state[ 6], state[ 7] ); - BLAKE2_ROUND( state[ 8], state[ 9], state[10], state[11], - state[12], state[13], state[14], state[15] ); - BLAKE2_ROUND( state[16], state[17], state[18], state[19], - state[20], state[21], state[22], state[23] ); - BLAKE2_ROUND( state[24], state[25], state[26], state[27], - state[28], state[29], state[30], state[31] ); - BLAKE2_ROUND( state[32], state[33], state[34], state[35], - state[36], state[37], state[38], state[39] ); - BLAKE2_ROUND( state[40], state[41], state[42], state[43], - state[44], state[45], state[46], state[47] ); - BLAKE2_ROUND( state[48], state[49], state[50], state[51], - state[52], state[53], state[54], state[55] ); - BLAKE2_ROUND( state[56], state[57], state[58], state[59], - state[60], state[61], state[62], state[63] ); - - BLAKE2_ROUND( state[ 0], state[ 8], state[16], state[24], - state[32], state[40], state[48], state[56] ); - BLAKE2_ROUND( state[ 1], state[ 9], state[17], state[25], - state[33], state[41], state[49], state[57] ); - BLAKE2_ROUND( state[ 2], state[10], state[18], state[26], - state[34], state[42], state[50], state[58] ); - BLAKE2_ROUND( state[ 3], state[11], state[19], state[27], - state[35], state[43], state[51], state[59] ); - BLAKE2_ROUND( state[ 4], state[12], state[20], state[28], - state[36], state[44], state[52], state[60] ); - BLAKE2_ROUND( state[ 5], state[13], state[21], state[29], - state[37], state[45], state[53], state[61] ); - BLAKE2_ROUND( state[ 6], state[14], state[22], state[30], - state[38], state[46], state[54], state[62] ); - BLAKE2_ROUND( state[ 7], state[15], state[23], state[31], - state[39], state[47], state[55], state[63] ); - - for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { - state[i] = _mm_xor_si128(state[i], block_XY[i]); - _mm_store_si128((__m128i *)next_block->v + i, state[i]); - } -} - -#endif - -#if 0 -static void next_addresses(block *address_block, block *input_block) { - /*Temporary zero-initialized blocks*/ -#if defined(__AVX512F__) - __m512i zero_block[ARGON2_512BIT_WORDS_IN_BLOCK]; - __m512i zero2_block[ARGON2_512BIT_WORDS_IN_BLOCK]; -#elif defined(__AVX2__) - __m256i zero_block[ARGON2_HWORDS_IN_BLOCK]; - __m256i zero2_block[ARGON2_HWORDS_IN_BLOCK]; -#else - __m128i zero_block[ARGON2_OWORDS_IN_BLOCK]; - __m128i zero2_block[ARGON2_OWORDS_IN_BLOCK]; -#endif - - memset(zero_block, 0, sizeof(zero_block)); - memset(zero2_block, 0, sizeof(zero2_block)); - - /*Increasing index counter*/ - input_block->v[6]++; - - /*First iteration of G*/ - fill_block(zero_block, input_block, address_block, 0); - - /*Second iteration of G*/ - fill_block(zero2_block, address_block, address_block, 0); -} -#endif - -void fill_segment(const argon2_instance_t *instance, - argon2_position_t position) { - block *ref_block = NULL, *curr_block = NULL; -// block address_block, input_block; - uint64_t pseudo_rand, ref_index, ref_lane; - uint32_t prev_offset, curr_offset; - uint32_t starting_index, i; -#if defined(__AVX512F__) - __m512i state[ARGON2_512BIT_WORDS_IN_BLOCK]; -#elif defined(__AVX2__) - __m256i state[ARGON2_HWORDS_IN_BLOCK]; -#else - __m128i state[ARGON2_OWORDS_IN_BLOCK]; -#endif -// int data_independent_addressing; - - if (instance == NULL) { - return; - } - - // data_independent_addressing = - // (instance->type == Argon2_i) || - // (instance->type == Argon2_id && (position.pass == 0) && - // (position.slice < ARGON2_SYNC_POINTS / 2)); - - // if (data_independent_addressing) { - // init_block_value(&input_block, 0); - - // input_block.v[0] = position.pass; - // input_block.v[1] = position.lane; - // input_block.v[2] = position.slice; - // input_block.v[3] = instance->memory_blocks; - // input_block.v[4] = instance->passes; - // input_block.v[5] = instance->type; - // } - - starting_index = 0; - - if ((0 == position.pass) && (0 == position.slice)) { - starting_index = 2; /* we have already generated the first two blocks */ - - /* Don't forget to generate the first block of addresses: */ -// if (data_independent_addressing) { -// next_addresses(&address_block, &input_block); -// } - } - - /* Offset of the current block */ - curr_offset = position.lane * instance->lane_length + - position.slice * instance->segment_length + starting_index; - - if (0 == curr_offset % instance->lane_length) { - /* Last block in this lane */ - prev_offset = curr_offset + instance->lane_length - 1; - } else { - /* Previous block */ - prev_offset = curr_offset - 1; - } - - memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); - - for (i = starting_index; i < instance->segment_length; - ++i, ++curr_offset, ++prev_offset) { - /*1.1 Rotating prev_offset if needed */ - if (curr_offset % instance->lane_length == 1) { - prev_offset = curr_offset - 1; - } - - /* 1.2 Computing the index of the reference block */ - /* 1.2.1 Taking pseudo-random value from the previous block */ -// if (data_independent_addressing) { -// if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { -// next_addresses(&address_block, &input_block); -// } -// pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; -// } else { - pseudo_rand = instance->memory[prev_offset].v[0]; -// } - - /* 1.2.2 Computing the lane of the reference block */ - ref_lane = ((pseudo_rand >> 32)) % instance->lanes; - - if ((position.pass == 0) && (position.slice == 0)) { - /* Can not reference other lanes yet */ - ref_lane = position.lane; - } - - /* 1.2.3 Computing the number of possible reference block within the - * lane. - */ - position.index = i; - ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, - ref_lane == position.lane); - - /* 2 Creating a new block */ - ref_block = - instance->memory + instance->lane_length * ref_lane + ref_index; - curr_block = instance->memory + curr_offset; - if (ARGON2_VERSION_10 == instance->version) { - /* version 1.2.1 and earlier: overwrite, not XOR */ - fill_block(state, ref_block, curr_block, 0); - } else { - if(0 == position.pass) { - fill_block(state, ref_block, curr_block, 0); - } else { - fill_block(state, ref_block, curr_block, 1); - } - } - } -} diff --git a/algo/argon2/argon2d/blake2/blake2-impl.h b/algo/argon2/argon2d/blake2/blake2-impl.h deleted file mode 100644 index 241f0beb..00000000 --- a/algo/argon2/argon2d/blake2/blake2-impl.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#ifndef PORTABLE_BLAKE2_IMPL_H -#define PORTABLE_BLAKE2_IMPL_H - -#include -#include - -#if defined(_MSC_VER) -#define BLAKE2_INLINE __inline -#elif defined(__GNUC__) || defined(__clang__) -#define BLAKE2_INLINE __inline__ -#else -#define BLAKE2_INLINE -#endif - -/* Argon2 Team - Begin Code */ -/* - Not an exhaustive list, but should cover the majority of modern platforms - Additionally, the code will always be correct---this is only a performance - tweak. -*/ -#if (defined(__BYTE_ORDER__) && \ - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \ - defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \ - defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \ - defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \ - defined(_M_ARM) -#define NATIVE_LITTLE_ENDIAN -#endif -/* Argon2 Team - End Code */ - -static BLAKE2_INLINE uint32_t load32(const void *src) { -#if defined(NATIVE_LITTLE_ENDIAN) - uint32_t w; - memcpy(&w, src, sizeof w); - return w; -#else - const uint8_t *p = (const uint8_t *)src; - uint32_t w = *p++; - w |= (uint32_t)(*p++) << 8; - w |= (uint32_t)(*p++) << 16; - w |= (uint32_t)(*p++) << 24; - return w; -#endif -} - -static BLAKE2_INLINE uint64_t load64(const void *src) { -#if defined(NATIVE_LITTLE_ENDIAN) - uint64_t w; - memcpy(&w, src, sizeof w); - return w; -#else - const uint8_t *p = (const uint8_t *)src; - uint64_t w = *p++; - w |= (uint64_t)(*p++) << 8; - w |= (uint64_t)(*p++) << 16; - w |= (uint64_t)(*p++) << 24; - w |= (uint64_t)(*p++) << 32; - w |= (uint64_t)(*p++) << 40; - w |= (uint64_t)(*p++) << 48; - w |= (uint64_t)(*p++) << 56; - return w; -#endif -} - -static BLAKE2_INLINE void store32(void *dst, uint32_t w) { -#if defined(NATIVE_LITTLE_ENDIAN) - memcpy(dst, &w, sizeof w); -#else - uint8_t *p = (uint8_t *)dst; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; -#endif -} - -static BLAKE2_INLINE void store64(void *dst, uint64_t w) { -#if defined(NATIVE_LITTLE_ENDIAN) - memcpy(dst, &w, sizeof w); -#else - uint8_t *p = (uint8_t *)dst; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; -#endif -} - -static BLAKE2_INLINE uint64_t load48(const void *src) { - const uint8_t *p = (const uint8_t *)src; - uint64_t w = *p++; - w |= (uint64_t)(*p++) << 8; - w |= (uint64_t)(*p++) << 16; - w |= (uint64_t)(*p++) << 24; - w |= (uint64_t)(*p++) << 32; - w |= (uint64_t)(*p++) << 40; - return w; -} - -static BLAKE2_INLINE void store48(void *dst, uint64_t w) { - uint8_t *p = (uint8_t *)dst; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; -} - -static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) { - return (w >> c) | (w << (32 - c)); -} - -static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) { - return (w >> c) | (w << (64 - c)); -} - -void clear_internal_memory(void *v, size_t n); - -#endif diff --git a/algo/argon2/argon2d/blake2/blake2.h b/algo/argon2/argon2d/blake2/blake2.h deleted file mode 100644 index a452f33e..00000000 --- a/algo/argon2/argon2d/blake2/blake2.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#ifndef PORTABLE_BLAKE2_H -#define PORTABLE_BLAKE2_H - -#include -#include -#include - -#if defined(__cplusplus) -extern "C" { -#endif - -enum blake2b_constant { - BLAKE2B_BLOCKBYTES = 128, - BLAKE2B_OUTBYTES = 64, - BLAKE2B_KEYBYTES = 64, - BLAKE2B_SALTBYTES = 16, - BLAKE2B_PERSONALBYTES = 16 -}; - -#pragma pack(push, 1) -typedef struct __blake2b_param { - uint8_t digest_length; /* 1 */ - uint8_t key_length; /* 2 */ - uint8_t fanout; /* 3 */ - uint8_t depth; /* 4 */ - uint32_t leaf_length; /* 8 */ - uint64_t node_offset; /* 16 */ - uint8_t node_depth; /* 17 */ - uint8_t inner_length; /* 18 */ - uint8_t reserved[14]; /* 32 */ - uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ - uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ -} blake2b_param; -#pragma pack(pop) - -typedef struct __blake2b_state { - uint64_t h[8]; - uint64_t t[2]; - uint64_t f[2]; - uint8_t buf[BLAKE2B_BLOCKBYTES]; - unsigned buflen; - unsigned outlen; - uint8_t last_node; -} blake2b_state; - -/* Ensure param structs have not been wrongly padded */ -/* Poor man's static_assert */ -enum { - blake2_size_check_0 = 1 / !!(CHAR_BIT == 8), - blake2_size_check_2 = - 1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT) -}; - -/* Streaming API */ -int blake2b_init(blake2b_state *S, size_t outlen); -int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, - size_t keylen); -int blake2b_init_param(blake2b_state *S, const blake2b_param *P); -int blake2b_update(blake2b_state *S, const void *in, size_t inlen); -int blake2b_final(blake2b_state *S, void *out, size_t outlen); - -/* Simple API */ -int blake2b(void *out, size_t outlen, const void *in, size_t inlen, - const void *key, size_t keylen); - -/* Argon2 Team - Begin Code */ -int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen); -/* Argon2 Team - End Code */ - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/algo/argon2/argon2d/blake2/blake2b.c b/algo/argon2/argon2d/blake2/blake2b.c deleted file mode 100644 index ca05df59..00000000 --- a/algo/argon2/argon2d/blake2/blake2b.c +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#include -#include -#include - -#include "blake2.h" -#include "blake2-impl.h" - -static const uint64_t blake2b_IV[8] = { - UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), - UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), - UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), - UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)}; - -static const unsigned int blake2b_sigma[12][16] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, - {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, - {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, - {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, - {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, - {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, - {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, -}; - -static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) { - S->f[1] = (uint64_t)-1; -} - -static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) { - if (S->last_node) { - blake2b_set_lastnode(S); - } - S->f[0] = (uint64_t)-1; -} - -static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S, - uint64_t inc) { - S->t[0] += inc; - S->t[1] += (S->t[0] < inc); -} - -static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) { - clear_internal_memory(S, sizeof(*S)); /* wipe */ - blake2b_set_lastblock(S); /* invalidate for further use */ -} - -static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) { - memset(S, 0, sizeof(*S)); - memcpy(S->h, blake2b_IV, sizeof(S->h)); -} - -int blake2b_init_param(blake2b_state *S, const blake2b_param *P) { - const unsigned char *p = (const unsigned char *)P; - unsigned int i; - - if (NULL == P || NULL == S) { - return -1; - } - - blake2b_init0(S); - /* IV XOR Parameter Block */ - for (i = 0; i < 8; ++i) { - S->h[i] ^= load64(&p[i * sizeof(S->h[i])]); - } - S->outlen = P->digest_length; - return 0; -} - -/* Sequential blake2b initialization */ -int blake2b_init(blake2b_state *S, size_t outlen) { - blake2b_param P; - - if (S == NULL) { - return -1; - } - - if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { - blake2b_invalidate_state(S); - return -1; - } - - /* Setup Parameter Block for unkeyed BLAKE2 */ - P.digest_length = (uint8_t)outlen; - P.key_length = 0; - P.fanout = 1; - P.depth = 1; - P.leaf_length = 0; - P.node_offset = 0; - P.node_depth = 0; - P.inner_length = 0; - memset(P.reserved, 0, sizeof(P.reserved)); - memset(P.salt, 0, sizeof(P.salt)); - memset(P.personal, 0, sizeof(P.personal)); - - return blake2b_init_param(S, &P); -} - -int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, - size_t keylen) { - blake2b_param P; - - if (S == NULL) { - return -1; - } - - if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { - blake2b_invalidate_state(S); - return -1; - } - - if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) { - blake2b_invalidate_state(S); - return -1; - } - - /* Setup Parameter Block for keyed BLAKE2 */ - P.digest_length = (uint8_t)outlen; - P.key_length = (uint8_t)keylen; - P.fanout = 1; - P.depth = 1; - P.leaf_length = 0; - P.node_offset = 0; - P.node_depth = 0; - P.inner_length = 0; - memset(P.reserved, 0, sizeof(P.reserved)); - memset(P.salt, 0, sizeof(P.salt)); - memset(P.personal, 0, sizeof(P.personal)); - - if (blake2b_init_param(S, &P) < 0) { - blake2b_invalidate_state(S); - return -1; - } - - { - uint8_t block[BLAKE2B_BLOCKBYTES]; - memset(block, 0, BLAKE2B_BLOCKBYTES); - memcpy(block, key, keylen); - blake2b_update(S, block, BLAKE2B_BLOCKBYTES); - /* Burn the key from stack */ - clear_internal_memory(block, BLAKE2B_BLOCKBYTES); - } - return 0; -} - -static void blake2b_compress(blake2b_state *S, const uint8_t *block) { - uint64_t m[16]; - uint64_t v[16]; - unsigned int i, r; - - for (i = 0; i < 16; ++i) { - m[i] = load64(block + i * sizeof(m[i])); - } - - for (i = 0; i < 8; ++i) { - v[i] = S->h[i]; - } - - v[8] = blake2b_IV[0]; - v[9] = blake2b_IV[1]; - v[10] = blake2b_IV[2]; - v[11] = blake2b_IV[3]; - v[12] = blake2b_IV[4] ^ S->t[0]; - v[13] = blake2b_IV[5] ^ S->t[1]; - v[14] = blake2b_IV[6] ^ S->f[0]; - v[15] = blake2b_IV[7] ^ S->f[1]; - -#define G(r, i, a, b, c, d) \ - do { \ - a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ - d = rotr64(d ^ a, 32); \ - c = c + d; \ - b = rotr64(b ^ c, 24); \ - a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ - d = rotr64(d ^ a, 16); \ - c = c + d; \ - b = rotr64(b ^ c, 63); \ - } while ((void)0, 0) - -#define ROUND(r) \ - do { \ - G(r, 0, v[0], v[4], v[8], v[12]); \ - G(r, 1, v[1], v[5], v[9], v[13]); \ - G(r, 2, v[2], v[6], v[10], v[14]); \ - G(r, 3, v[3], v[7], v[11], v[15]); \ - G(r, 4, v[0], v[5], v[10], v[15]); \ - G(r, 5, v[1], v[6], v[11], v[12]); \ - G(r, 6, v[2], v[7], v[8], v[13]); \ - G(r, 7, v[3], v[4], v[9], v[14]); \ - } while ((void)0, 0) - - for (r = 0; r < 12; ++r) { - ROUND(r); - } - - for (i = 0; i < 8; ++i) { - S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; - } - -#undef G -#undef ROUND -} - -int blake2b_update(blake2b_state *S, const void *in, size_t inlen) { - const uint8_t *pin = (const uint8_t *)in; - - if (inlen == 0) { - return 0; - } - - /* Sanity check */ - if (S == NULL || in == NULL) { - return -1; - } - - /* Is this a reused state? */ - if (S->f[0] != 0) { - return -1; - } - - if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { - /* Complete current block */ - size_t left = S->buflen; - size_t fill = BLAKE2B_BLOCKBYTES - left; - memcpy(&S->buf[left], pin, fill); - blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); - blake2b_compress(S, S->buf); - S->buflen = 0; - inlen -= fill; - pin += fill; - /* Avoid buffer copies when possible */ - while (inlen > BLAKE2B_BLOCKBYTES) { - blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); - blake2b_compress(S, pin); - inlen -= BLAKE2B_BLOCKBYTES; - pin += BLAKE2B_BLOCKBYTES; - } - } - memcpy(&S->buf[S->buflen], pin, inlen); - S->buflen += (unsigned int)inlen; - return 0; -} - -int blake2b_final(blake2b_state *S, void *out, size_t outlen) { - uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; - unsigned int i; - - /* Sanity checks */ - if (S == NULL || out == NULL || outlen < S->outlen) { - return -1; - } - - /* Is this a reused state? */ - if (S->f[0] != 0) { - return -1; - } - - blake2b_increment_counter(S, S->buflen); - blake2b_set_lastblock(S); - memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ - blake2b_compress(S, S->buf); - - for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ - store64(buffer + sizeof(S->h[i]) * i, S->h[i]); - } - - memcpy(out, buffer, S->outlen); - clear_internal_memory(buffer, sizeof(buffer)); - clear_internal_memory(S->buf, sizeof(S->buf)); - clear_internal_memory(S->h, sizeof(S->h)); - return 0; -} - -int blake2b(void *out, size_t outlen, const void *in, size_t inlen, - const void *key, size_t keylen) { - blake2b_state S; - int ret = -1; - - /* Verify parameters */ - if (NULL == in && inlen > 0) { - goto fail; - } - - if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) { - goto fail; - } - - if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) { - goto fail; - } - - if (keylen > 0) { - if (blake2b_init_key(&S, outlen, key, keylen) < 0) { - goto fail; - } - } else { - if (blake2b_init(&S, outlen) < 0) { - goto fail; - } - } - - if (blake2b_update(&S, in, inlen) < 0) { - goto fail; - } - ret = blake2b_final(&S, out, outlen); - -fail: - clear_internal_memory(&S, sizeof(S)); - return ret; -} - -/* Argon2 Team - Begin Code */ -int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) { - uint8_t *out = (uint8_t *)pout; - blake2b_state blake_state; - uint8_t outlen_bytes[sizeof(uint32_t)] = {0}; - int ret = -1; - - if (outlen > UINT32_MAX) { - goto fail; - } - - /* Ensure little-endian byte order! */ - store32(outlen_bytes, (uint32_t)outlen); - -#define TRY(statement) \ - do { \ - ret = statement; \ - if (ret < 0) { \ - goto fail; \ - } \ - } while ((void)0, 0) - - if (outlen <= BLAKE2B_OUTBYTES) { - TRY(blake2b_init(&blake_state, outlen)); - TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); - TRY(blake2b_update(&blake_state, in, inlen)); - TRY(blake2b_final(&blake_state, out, outlen)); - } else { - uint32_t toproduce; - uint8_t out_buffer[BLAKE2B_OUTBYTES]; - uint8_t in_buffer[BLAKE2B_OUTBYTES]; - TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES)); - TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); - TRY(blake2b_update(&blake_state, in, inlen)); - TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES)); - memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); - out += BLAKE2B_OUTBYTES / 2; - toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2; - - while (toproduce > BLAKE2B_OUTBYTES) { - memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); - TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, - BLAKE2B_OUTBYTES, NULL, 0)); - memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); - out += BLAKE2B_OUTBYTES / 2; - toproduce -= BLAKE2B_OUTBYTES / 2; - } - - memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); - TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL, - 0)); - memcpy(out, out_buffer, toproduce); - } -fail: - clear_internal_memory(&blake_state, sizeof(blake_state)); - return ret; -#undef TRY -} -/* Argon2 Team - End Code */ diff --git a/algo/argon2/argon2d/blake2/blamka-round-opt.h b/algo/argon2/argon2d/blake2/blamka-round-opt.h deleted file mode 100644 index 4cb8bdad..00000000 --- a/algo/argon2/argon2d/blake2/blamka-round-opt.h +++ /dev/null @@ -1,465 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#ifndef BLAKE_ROUND_MKA_OPT_H -#define BLAKE_ROUND_MKA_OPT_H - -#include "blake2-impl.h" - -#include -#if defined(__SSSE3__) -#include /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */ -#endif - -#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__)) -#include -#endif - -#include "simd-utils.h" - -#if !defined(__AVX512F__) -#if !defined(__AVX2__) -#if !defined(__XOP__) -#if defined(__SSSE3__) -#define r16 \ - (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) -#define r24 \ - (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) -#define _mm_roti_epi64(x, c) \ - (-(c) == 32) \ - ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \ - : (-(c) == 24) \ - ? _mm_shuffle_epi8((x), r24) \ - : (-(c) == 16) \ - ? _mm_shuffle_epi8((x), r16) \ - : (-(c) == 63) \ - ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ - _mm_add_epi64((x), (x))) \ - : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ - _mm_slli_epi64((x), 64 - (-(c)))) -#else /* defined(__SSE2__) */ -#define _mm_roti_epi64(r, c) \ - _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c)))) -#endif -#else -#endif - -static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) { - const __m128i z = _mm_mul_epu32(x, y); - return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); -} - -#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = fBlaMka(A0, B0); \ - A1 = fBlaMka(A1, B1); \ - \ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ - \ - D0 = _mm_roti_epi64(D0, -32); \ - D1 = _mm_roti_epi64(D1, -32); \ - \ - C0 = fBlaMka(C0, D0); \ - C1 = fBlaMka(C1, D1); \ - \ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ - \ - B0 = _mm_roti_epi64(B0, -24); \ - B1 = _mm_roti_epi64(B1, -24); \ - } while ((void)0, 0) - -#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = fBlaMka(A0, B0); \ - A1 = fBlaMka(A1, B1); \ - \ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ - \ - D0 = _mm_roti_epi64(D0, -16); \ - D1 = _mm_roti_epi64(D1, -16); \ - \ - C0 = fBlaMka(C0, D0); \ - C1 = fBlaMka(C1, D1); \ - \ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ - \ - B0 = _mm_roti_epi64(B0, -63); \ - B1 = _mm_roti_epi64(B1, -63); \ - } while ((void)0, 0) - -#if defined(__SSSE3__) -#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ - __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ - B0 = t0; \ - B1 = t1; \ - \ - t0 = C0; \ - C0 = C1; \ - C1 = t0; \ - \ - t0 = _mm_alignr_epi8(D1, D0, 8); \ - t1 = _mm_alignr_epi8(D0, D1, 8); \ - D0 = t1; \ - D1 = t0; \ - } while ((void)0, 0) - -#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ - __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ - B0 = t0; \ - B1 = t1; \ - \ - t0 = C0; \ - C0 = C1; \ - C1 = t0; \ - \ - t0 = _mm_alignr_epi8(D0, D1, 8); \ - t1 = _mm_alignr_epi8(D1, D0, 8); \ - D0 = t1; \ - D1 = t0; \ - } while ((void)0, 0) -#else /* SSE2 */ -#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = D0; \ - __m128i t1 = B0; \ - D0 = C0; \ - C0 = C1; \ - C1 = D0; \ - D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \ - D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \ - B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \ - B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \ - } while ((void)0, 0) - -#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0, t1; \ - t0 = C0; \ - C0 = C1; \ - C1 = t0; \ - t0 = B0; \ - t1 = D0; \ - B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \ - B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \ - D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \ - D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \ - } while ((void)0, 0) -#endif - -#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ - \ - DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ - \ - UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - } while ((void)0, 0) -#else /* __AVX2__ */ - -#include - -#define rotr32( x ) mm256_ror_64( x, 32 ) -#define rotr24( x ) mm256_ror_64( x, 24 ) -#define rotr16( x ) mm256_ror_64( x, 16 ) -#define rotr63( x ) mm256_rol_64( x, 1 ) - -//#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)) -//#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) -//#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) -//#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x))) - -#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - __m256i ml0, ml1; \ - ml0 = _mm256_mul_epu32(A0, B0); \ - ml1 = _mm256_mul_epu32(A1, B1); \ - ml0 = _mm256_add_epi64(ml0, ml0); \ - ml1 = _mm256_add_epi64(ml1, ml1); \ - A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml0)); \ - A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml1)); \ - D0 = _mm256_xor_si256(D0, A0); \ - D1 = _mm256_xor_si256(D1, A1); \ - D0 = rotr32(D0); \ - D1 = rotr32(D1); \ - ml0 = _mm256_mul_epu32(C0, D0); \ - ml1 = _mm256_mul_epu32(C1, D1); \ - ml0 = _mm256_add_epi64(ml0, ml0); \ - ml1 = _mm256_add_epi64(ml1, ml1); \ - C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml0)); \ - C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml1)); \ - B0 = _mm256_xor_si256(B0, C0); \ - B1 = _mm256_xor_si256(B1, C1); \ - B0 = rotr24(B0); \ - B1 = rotr24(B1); \ - } while((void)0, 0); - -#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - __m256i ml0, ml1; \ - ml0 = _mm256_mul_epu32(A0, B0); \ - ml1 = _mm256_mul_epu32(A1, B1); \ - ml0 = _mm256_add_epi64(ml0, ml0); \ - ml1 = _mm256_add_epi64(ml1, ml1); \ - A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml0)); \ - A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml1)); \ - D0 = _mm256_xor_si256(D0, A0); \ - D1 = _mm256_xor_si256(D1, A1); \ - D0 = rotr16(D0); \ - D1 = rotr16(D1); \ - ml0 = _mm256_mul_epu32(C0, D0); \ - ml1 = _mm256_mul_epu32(C1, D1); \ - ml0 = _mm256_add_epi64(ml0, ml0); \ - ml1 = _mm256_add_epi64(ml1, ml1); \ - C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml0)); \ - C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml1)); \ - B0 = _mm256_xor_si256(B0, C0); \ - B1 = _mm256_xor_si256(B1, C1); \ - B0 = rotr63(B0); \ - B1 = rotr63(B1); \ - } while((void)0, 0); - -#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ - C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ - D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ - \ - B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ - C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ - D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ - } while((void)0, 0); - -#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ - __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ - B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ - tmp1 = C0; \ - B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ - C0 = C1; \ - tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \ - C1 = tmp1; \ - tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \ - D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ - D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ - } while(0); - -#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ - C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ - D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ - \ - B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ - C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ - D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ - } while((void)0, 0); - -#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ - __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ - B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ - tmp1 = C0; \ - B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ - C0 = C1; \ - tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \ - C1 = tmp1; \ - tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \ - D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ - D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ - } while((void)0, 0); - -#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \ - do{ \ - G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - \ - DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ - \ - G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - \ - UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ - } while((void)0, 0); - -#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do{ \ - G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - \ - DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ - \ - G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - \ - UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ - } while((void)0, 0); - -#endif /* __AVX2__ */ - -#else /* __AVX512F__ */ - -#include - -static inline __m512i muladd(__m512i x, __m512i y) -{ - __m512i z = _mm512_mul_epu32(x, y); - return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z)); -} - -#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = muladd(A0, B0); \ - A1 = muladd(A1, B1); \ -\ - D0 = _mm512_xor_si512(D0, A0); \ - D1 = _mm512_xor_si512(D1, A1); \ -\ - D0 = _mm512_ror_epi64(D0, 32); \ - D1 = _mm512_ror_epi64(D1, 32); \ -\ - C0 = muladd(C0, D0); \ - C1 = muladd(C1, D1); \ -\ - B0 = _mm512_xor_si512(B0, C0); \ - B1 = _mm512_xor_si512(B1, C1); \ -\ - B0 = _mm512_ror_epi64(B0, 24); \ - B1 = _mm512_ror_epi64(B1, 24); \ - } while ((void)0, 0) - -#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = muladd(A0, B0); \ - A1 = muladd(A1, B1); \ -\ - D0 = _mm512_xor_si512(D0, A0); \ - D1 = _mm512_xor_si512(D1, A1); \ -\ - D0 = _mm512_ror_epi64(D0, 16); \ - D1 = _mm512_ror_epi64(D1, 16); \ -\ - C0 = muladd(C0, D0); \ - C1 = muladd(C1, D1); \ -\ - B0 = _mm512_xor_si512(B0, C0); \ - B1 = _mm512_xor_si512(B1, C1); \ -\ - B0 = _mm512_ror_epi64(B0, 63); \ - B1 = _mm512_ror_epi64(B1, 63); \ - } while ((void)0, 0) - -#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ - B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ -\ - C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ - C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ -\ - D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ - D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ - } while ((void)0, 0) - -#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ - B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ -\ - C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ - C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ -\ - D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ - D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ - } while ((void)0, 0) - -#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - } while ((void)0, 0) - -#define SWAP_HALVES(A0, A1) \ - do { \ - __m512i t; \ - t = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \ - A1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \ - A0 = t; \ - } while((void)0, 0) - -#define SWAP_QUARTERS(A0, A1) \ - do { \ - SWAP_HALVES(A0, A1); \ - A0 = _mm512_shuffle_i64x2( A0, A0, 0xd8 ); \ - A1 = _mm512_shuffle_i64x2( A1, A1, 0xd8 ); \ - } while((void)0, 0) - -#define UNSWAP_QUARTERS(A0, A1) \ - do { \ - A0 = _mm512_shuffle_i64x2( A0, A0, 0xd8 ); \ - A1 = _mm512_shuffle_i64x2( A1, A1, 0xd8 ); \ - SWAP_HALVES(A0, A1); \ - } while((void)0, 0) - -#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \ - do { \ - SWAP_HALVES(A0, B0); \ - SWAP_HALVES(C0, D0); \ - SWAP_HALVES(A1, B1); \ - SWAP_HALVES(C1, D1); \ - BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ - SWAP_HALVES(A0, B0); \ - SWAP_HALVES(C0, D0); \ - SWAP_HALVES(A1, B1); \ - SWAP_HALVES(C1, D1); \ - } while ((void)0, 0) - -#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - SWAP_QUARTERS(A0, A1); \ - SWAP_QUARTERS(B0, B1); \ - SWAP_QUARTERS(C0, C1); \ - SWAP_QUARTERS(D0, D1); \ - BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ - UNSWAP_QUARTERS(A0, A1); \ - UNSWAP_QUARTERS(B0, B1); \ - UNSWAP_QUARTERS(C0, C1); \ - UNSWAP_QUARTERS(D0, D1); \ - } while ((void)0, 0) - -#endif /* __AVX512F__ */ -#endif /* BLAKE_ROUND_MKA_OPT_H */ diff --git a/algo/argon2/argon2d/blake2/blamka-round-ref.h b/algo/argon2/argon2d/blake2/blamka-round-ref.h deleted file mode 100644 index b8f2cf47..00000000 --- a/algo/argon2/argon2d/blake2/blamka-round-ref.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Argon2 reference source code package - reference C implementations - * - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves - * - * You may use this work under the terms of a Creative Commons CC0 1.0 - * License/Waiver or the Apache Public License 2.0, at your option. The terms of - * these licenses can be found at: - * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - * - * You should have received a copy of both of these licenses along with this - * software. If not, they may be obtained at the above URLs. - */ - -#ifndef BLAKE_ROUND_MKA_H -#define BLAKE_ROUND_MKA_H - -#include "blake2.h" -#include "blake2-impl.h" - -/* designed by the Lyra PHC team */ -static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) { - const uint64_t m = UINT64_C(0xFFFFFFFF); - const uint64_t xy = (x & m) * (y & m); - return x + y + 2 * xy; -} - -#define G(a, b, c, d) \ - do { \ - a = fBlaMka(a, b); \ - d = rotr64(d ^ a, 32); \ - c = fBlaMka(c, d); \ - b = rotr64(b ^ c, 24); \ - a = fBlaMka(a, b); \ - d = rotr64(d ^ a, 16); \ - c = fBlaMka(c, d); \ - b = rotr64(b ^ c, 63); \ - } while ((void)0, 0) - -#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \ - v12, v13, v14, v15) \ - do { \ - G(v0, v4, v8, v12); \ - G(v1, v5, v9, v13); \ - G(v2, v6, v10, v14); \ - G(v3, v7, v11, v15); \ - G(v0, v5, v10, v15); \ - G(v1, v6, v11, v12); \ - G(v2, v7, v8, v13); \ - G(v3, v4, v9, v14); \ - } while ((void)0, 0) - -#endif diff --git a/algo/hodl/aes.c b/algo/hodl/aes.c deleted file mode 100644 index 5be2af39..00000000 --- a/algo/hodl/aes.c +++ /dev/null @@ -1,182 +0,0 @@ -#include -#include -#include "wolf-aes.h" -#include "miner.h" - -#if defined(__AES__) - -static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2) -{ - __m128i tmp4; - *tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF); - tmp4 = _mm_slli_si128(*tmp1, 0x04); - *tmp1 = _mm_xor_si128(*tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - *tmp1 = _mm_xor_si128(*tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - *tmp1 = _mm_xor_si128(*tmp1, tmp4); - *tmp1 = _mm_xor_si128(*tmp1, *tmp2); -} - -static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3) -{ - __m128i tmp2, tmp4; - - tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00); - tmp2 = _mm_shuffle_epi32(tmp4, 0xAA); - tmp4 = _mm_slli_si128(*tmp3, 0x04); - *tmp3 = _mm_xor_si128(*tmp3, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - *tmp3 = _mm_xor_si128(*tmp3, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - *tmp3 = _mm_xor_si128(*tmp3, tmp4); - *tmp3 = _mm_xor_si128(*tmp3, tmp2); -} - -// Special thanks to Intel for helping me -// with ExpandAESKey256() and its subroutines -void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf) -{ - __m128i tmp1, tmp2, tmp3; - - tmp1 = keys[0] = KeyBuf[0]; - tmp3 = keys[1] = KeyBuf[1]; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[2] = tmp1; - ExpandAESKey256_sub2(&tmp1, &tmp3); - keys[3] = tmp3; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[4] = tmp1; - ExpandAESKey256_sub2(&tmp1, &tmp3); - keys[5] = tmp3; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[6] = tmp1; - ExpandAESKey256_sub2(&tmp1, &tmp3); - keys[7] = tmp3; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[8] = tmp1; - ExpandAESKey256_sub2(&tmp1, &tmp3); - keys[9] = tmp3; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[10] = tmp1; - ExpandAESKey256_sub2(&tmp1, &tmp3); - keys[11] = tmp3; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[12] = tmp1; - ExpandAESKey256_sub2(&tmp1, &tmp3); - keys[13] = tmp3; - - tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40); - ExpandAESKey256_sub1(&tmp1, &tmp2); - keys[14] = tmp1; -} - -#if defined(__SSE4_2__) -//#ifdef __AVX__ - -#define AESENC(i,j) \ - State[j] = _mm_aesenc_si128(State[j], ExpandedKey[j][i]); - -#define AESENC_N(i) \ - AESENC(i,0) \ - AESENC(i,1) \ - AESENC(i,2) \ - AESENC(i,3) \ - AESENC(i,4) \ - AESENC(i,5) \ - AESENC(i,6) \ - AESENC(i,7) \ - - -static inline void AES256Core(__m128i* State, __m128i ExpandedKey[][16]) -{ - const uint32_t N = AES_PARALLEL_N; - - for(int j=0; j> 8) & 0xff) | (((x) & 0xff) << 8))) - -static __inline unsigned short int -__bswap_16 (unsigned short int __bsx) -{ - return __bswap_constant_16 (__bsx); -} - -// LE -# define htobe16(x) __bswap_16 (x) -# define htole16(x) (x) -# define be16toh(x) __bswap_16 (x) -# define le16toh(x) (x) - -// BE -//# define htole16(x) __bswap_16 (x) -//# define htobe16(x) (x) -//# define le16toh(x) __bswap_16 (x) -//# define be16toh(x) (x) - -#define __bswap_constant_32(x) \ - ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ - (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) - -static __inline unsigned int -__bswap_32 (unsigned int __bsx) -{ - return __builtin_bswap32 (__bsx); -} - -// LE -# define htobe32(x) __bswap_32 (x) -# define htole32(x) (x) -# define be32toh(x) __bswap_32 (x) -# define le32toh(x) (x) - -// BE -//# define htole32(x) __bswap_32 (x) -//# define htobe32(x) (x) -//# define le32toh(x) __bswap_32 (x) -//# define be32toh(x) (x) - -# define __bswap_constant_64(x) \ - ((((x) & 0xff00000000000000ull) >> 56) \ - | (((x) & 0x00ff000000000000ull) >> 40) \ - | (((x) & 0x0000ff0000000000ull) >> 24) \ - | (((x) & 0x000000ff00000000ull) >> 8) \ - | (((x) & 0x00000000ff000000ull) << 8) \ - | (((x) & 0x0000000000ff0000ull) << 24) \ - | (((x) & 0x000000000000ff00ull) << 40) \ - | (((x) & 0x00000000000000ffull) << 56)) - -static __inline uint64_t -__bswap_64 (uint64_t __bsx) -{ - return __bswap_constant_64 (__bsx); -} - -// LE -# define htobe64(x) __bswap_64 (x) -# define htole64(x) (x) -# define be64toh(x) __bswap_64 (x) -# define le64toh(x) (x) - -// BE -//# define htole64(x) __bswap_64 (x) -//# define htobe64(x) (x) -//# define le64toh(x) __bswap_64 (x) -//# define be64toh(x) (x) - -#endif \ No newline at end of file diff --git a/algo/hodl/hodl-gate.c b/algo/hodl/hodl-gate.c deleted file mode 100644 index ca94fd3e..00000000 --- a/algo/hodl/hodl-gate.c +++ /dev/null @@ -1,184 +0,0 @@ -#include -#include - -#include "hodl-gate.h" -#include "hodl-wolf.h" - -#define HODL_NSTARTLOC_INDEX 20 -#define HODL_NFINALCALC_INDEX 21 - -static struct work hodl_work; - -pthread_barrier_t hodl_barrier; - -// All references to this buffer are local to this file, so no args -// need to be passed. -unsigned char *hodl_scratchbuf = NULL; - -void hodl_le_build_stratum_request( char* req, struct work* work, - struct stratum_ctx *sctx ) -{ - uint32_t ntime, nonce, nstartloc, nfinalcalc; - char ntimestr[9], noncestr[9], nstartlocstr[9], nfinalcalcstr[9]; - unsigned char *xnonce2str; - - le32enc( &ntime, work->data[ algo_gate.ntime_index ] ); - le32enc( &nonce, work->data[ algo_gate.nonce_index ] ); - bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) ); - bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) ); - xnonce2str = abin2hex(work->xnonce2, work->xnonce2_len ); - le32enc( &nstartloc, work->data[ HODL_NSTARTLOC_INDEX ] ); - le32enc( &nfinalcalc, work->data[ HODL_NFINALCALC_INDEX ] ); - bin2hex( nstartlocstr, (char*)(&nstartloc), sizeof(uint32_t) ); - bin2hex( nfinalcalcstr, (char*)(&nfinalcalc), sizeof(uint32_t) ); - sprintf( req, "{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}", - rpc_user, work->job_id, xnonce2str, ntimestr, noncestr, - nstartlocstr, nfinalcalcstr ); - free( xnonce2str ); -} - -char* hodl_malloc_txs_request( struct work *work ) -{ - char* req; - json_t *val; - char data_str[2 * sizeof(work->data) + 1]; - int i; - - for ( i = 0; i < ARRAY_SIZE(work->data); i++ ) - be32enc( work->data + i, work->data[i] ); - - bin2hex( data_str, (unsigned char *)work->data, 88 ); - if ( work->workid ) - { - char *params; - val = json_object(); - json_object_set_new( val, "workid", json_string( work->workid ) ); - params = json_dumps( val, 0 ); - json_decref( val ); - req = malloc( 128 + 2*88 + strlen( work->txs ) + strlen( params ) ); - sprintf( req, - "{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":1}\r\n", - data_str, work->txs, params); - free( params ); - } - else - { - req = malloc( 128 + 2*88 + strlen(work->txs)); - sprintf( req, - "{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":1}\r\n", - data_str, work->txs); - } - return req; -} - -void hodl_build_block_header( struct work* g_work, uint32_t version, - uint32_t *prevhash, uint32_t *merkle_tree, - uint32_t ntime, uint32_t nbits ) -{ - int i; - - memset( g_work->data, 0, sizeof(g_work->data) ); - g_work->data[0] = version; - - if ( have_stratum ) - for ( i = 0; i < 8; i++ ) - g_work->data[ 1+i ] = le32dec( prevhash + i ); - else - for (i = 0; i < 8; i++) - g_work->data[ 8-i ] = le32dec( prevhash + i ); - - for ( i = 0; i < 8; i++ ) - g_work->data[ 9+i ] = be32dec( merkle_tree + i ); - - g_work->data[ algo_gate.ntime_index ] = ntime; - g_work->data[ algo_gate.nbits_index ] = nbits; - g_work->data[22] = 0x80000000; - g_work->data[31] = 0x00000280; -} - -// called only by thread 0, saves a backup of g_work -void hodl_get_new_work( struct work* work, struct work* g_work) -{ -// pthread_rwlock_rdlock( &g_work_lock ); - - work_free( &hodl_work ); - work_copy( &hodl_work, g_work ); - hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999; - -// pthread_rwlock_unlock( &g_work_lock ); -} - -json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url ) -{ - json_t *val; - char *req = NULL; - - if ( have_gbt ) - { - req = malloc( strlen( gbt_lp_req ) + strlen( lp_id ) + 1 ); - sprintf( req, gbt_lp_req, lp_id ); - } - val = json_rpc_call( curl, lp_url, rpc_userpass, - req ? req : getwork_req, err, JSON_RPC_LONGPOLL ); - free( req ); - return val; -} - -// called by every thread, copies the backup to each thread's work. -void hodl_resync_threads( int thr_id, struct work* work ) -{ - int nonce_index = algo_gate.nonce_index; - pthread_barrier_wait( &hodl_barrier ); - if ( memcmp( work->data, hodl_work.data, algo_gate.work_cmp_size ) ) - { - work_free( work ); - work_copy( work, &hodl_work ); - } - work->data[ nonce_index ] = swab32( hodl_work.data[ nonce_index ] ); - work_restart[thr_id].restart = 0; -} - -bool hodl_do_this_thread( int thr_id ) -{ - return ( thr_id == 0 ); -} - -int hodl_scanhash( struct work* work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ -#if defined(__AES__) - GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, mythr->id ); - pthread_barrier_wait( &hodl_barrier ); - return scanhash_hodl_wolf( work, max_nonce, hashes_done, mythr ); -#endif - return false; -} - -bool register_hodl_algo( algo_gate_t* gate ) -{ -#if !defined(__AES__) - applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version."); - return false; -#endif - - if ( GARBAGE_SIZE % opt_n_threads ) - applog( LOG_WARNING,"WARNING: Thread count must be power of 2. Miner may crash or produce invalid hash!" ); - - pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads ); - gate->optimizations = SSE42_OPT | AES_OPT | AVX2_OPT; - gate->scanhash = (void*)&hodl_scanhash; - gate->get_new_work = (void*)&hodl_get_new_work; - gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call; - gate->build_stratum_request = (void*)&hodl_le_build_stratum_request; - gate->malloc_txs_request = (void*)&hodl_malloc_txs_request; - gate->build_block_header = (void*)&hodl_build_block_header; - gate->resync_threads = (void*)&hodl_resync_threads; - gate->do_this_thread = (void*)&hodl_do_this_thread; - gate->work_cmp_size = 76; - hodl_scratchbuf = (unsigned char*)_mm_malloc( 1 << 30, 64 ); - allow_getwork = false; - opt_target_factor = 8388608.0; - return ( hodl_scratchbuf != NULL ); -} - - diff --git a/algo/hodl/hodl-gate.h b/algo/hodl/hodl-gate.h deleted file mode 100644 index 9a8ecf75..00000000 --- a/algo/hodl/hodl-gate.h +++ /dev/null @@ -1,6 +0,0 @@ -#include "algo-gate-api.h" - -extern unsigned char *hodl_scratchbuf; - -bool register_hodl_algo ( algo_gate_t* gate ); - diff --git a/algo/hodl/hodl-wolf.c b/algo/hodl/hodl-wolf.c deleted file mode 100644 index 7ce79da8..00000000 --- a/algo/hodl/hodl-wolf.c +++ /dev/null @@ -1,225 +0,0 @@ -#include -#include -#include -#include -#include "sha512-avx.h" -#include "wolf-aes.h" -#include "hodl-gate.h" -#include "hodl-wolf.h" -#include "miner.h" -#include "algo/sha/sha256d.h" - -#if defined(__AES__) - -void GenerateGarbageCore( CacheEntry *Garbage, int ThreadID, int ThreadCount, - void *MidHash ) -{ - const int Chunk = TOTAL_CHUNKS / ThreadCount; - const uint32_t StartChunk = ThreadID * Chunk; - const uint32_t EndChunk = StartChunk + Chunk; - -#if defined(__SSE4_2__) -//#ifdef __AVX__ - uint64_t* TempBufs[ SHA512_PARALLEL_N ] ; - uint64_t* desination[ SHA512_PARALLEL_N ]; - - for ( int i=0; i < SHA512_PARALLEL_N; ++i ) - { - TempBufs[i] = (uint64_t*)malloc( 32 ); - memcpy( TempBufs[i], MidHash, 32 ); - } - - for ( uint32_t i = StartChunk; i < EndChunk; i += SHA512_PARALLEL_N ) - { - for ( int j = 0; j < SHA512_PARALLEL_N; ++j ) - { - ( (uint32_t*)TempBufs[j] )[0] = i + j; - desination[j] = (uint64_t*)( (uint8_t *)Garbage + ( (i+j) - * GARBAGE_CHUNK_SIZE ) ); - } - sha512Compute32b_parallel( TempBufs, desination ); - } - - for ( int i = 0; i < SHA512_PARALLEL_N; ++i ) - free( TempBufs[i] ); -#else - uint32_t TempBuf[8]; - memcpy( TempBuf, MidHash, 32 ); - - for ( uint32_t i = StartChunk; i < EndChunk; ++i ) - { - TempBuf[0] = i; - SHA512( ( uint8_t *)TempBuf, 32, - ( (uint8_t *)Garbage ) + ( i * GARBAGE_CHUNK_SIZE ) ); - } -#endif -} - -/* -void Rev256(uint32_t *Dest, const uint32_t *Src) -{ - for(int i = 0; i < 8; ++i) Dest[i] = swab32(Src[i]); -} -*/ - -int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ -#if defined(__SSE4_2__) -//#ifdef __AVX__ - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - int threadNumber = mythr->id; - CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf; - CacheEntry Cache[AES_PARALLEL_N] __attribute__ ((aligned (64))); - __m128i* data[AES_PARALLEL_N]; - const __m128i* next[AES_PARALLEL_N]; - uint32_t CollisionCount = 0; - - for ( int n=0; n> 2) - 1] & (COMPARE_SIZE - 1); //% COMPARE_SIZE; - next[n] = Garbage[nextLocation].dqwords; - - __m128i last[2]; - last[0] = _mm_xor_si128(Cache[n].dqwords[254], next[n][254]); - last[1] = _mm_xor_si128(Cache[n].dqwords[255], next[n][255]); - - // Key is last 32b of Cache - // IV is last 16b of Cache - ExpandAESKey256(ExpKey[n], last); - ivs[n] = last[1]; - } - AES256CBC(data, next, ExpKey, ivs); - } - - for(int n=0; n> 2) - 1] & (COMPARE_SIZE - 1)) < 1000) - { - uint32_t BlockHdr[22], FinalPoW[8]; - - swab32_array( BlockHdr, pdata, 20 ); - - BlockHdr[20] = k + n; - BlockHdr[21] = Cache[n].dwords[(GARBAGE_SLICE_SIZE >> 2) - 2]; - - sha256d( (uint8_t *)FinalPoW, (uint8_t *)BlockHdr, 88 ); - CollisionCount++; - if( FinalPoW[7] <= ptarget[7] ) - { - pdata[20] = swab32( BlockHdr[20] ); - pdata[21] = swab32( BlockHdr[21] ); - *hashes_done = CollisionCount; - submit_solution( work, FinalPoW, mythr ); - return(0); - } - } - } - - *hashes_done = CollisionCount; - return(0); - - -#else // no AVX - - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - uint32_t BlockHdr[22], FinalPoW[8]; - CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf; - CacheEntry Cache; - uint32_t CollisionCount = 0; - int threadNumber = mythr->id; - - swab32_array( BlockHdr, pdata, 20 ); - // Search for pattern in psuedorandom data - int searchNumber = COMPARE_SIZE / opt_n_threads; - int startLoc = threadNumber * searchNumber; - - if ( opt_debug ) - applog( LOG_DEBUG,"Hash target= %08lx", ptarget[7] ); - - for(int32_t k = startLoc; k < startLoc + searchNumber && !work_restart[threadNumber].restart; k++) - { - // copy data to first l2 cache - memcpy(Cache.dwords, Garbage + k, GARBAGE_SLICE_SIZE); - for(int j = 0; j < AES_ITERATIONS; j++) - { - CacheEntry TmpXOR; - __m128i ExpKey[16]; - - // use last 4 bytes of first cache as next location - uint32_t nextLocation = Cache.dwords[(GARBAGE_SLICE_SIZE >> 2) - - 1] & (COMPARE_SIZE - 1); //% COMPARE_SIZE; - - // Copy data from indicated location to second l2 cache - - memcpy(&TmpXOR, Garbage + nextLocation, GARBAGE_SLICE_SIZE); - //XOR location data into second cache - for( int i = 0; i < (GARBAGE_SLICE_SIZE >> 4); ++i ) - TmpXOR.dqwords[i] = _mm_xor_si128( Cache.dqwords[i], - TmpXOR.dqwords[i] ); - // Key is last 32b of TmpXOR - // IV is last 16b of TmpXOR - - ExpandAESKey256( ExpKey, TmpXOR.dqwords + - (GARBAGE_SLICE_SIZE / sizeof(__m128i)) - 2 ); - AES256CBC( Cache.dqwords, TmpXOR.dqwords, ExpKey, - TmpXOR.dqwords[ (GARBAGE_SLICE_SIZE / sizeof(__m128i)) - - 1 ], 256 ); } - // use last X bits as solution - if( ( Cache.dwords[ (GARBAGE_SLICE_SIZE >> 2) - 1 ] - & (COMPARE_SIZE - 1) ) < 1000 ) - { - BlockHdr[20] = k; - BlockHdr[21] = Cache.dwords[ (GARBAGE_SLICE_SIZE >> 2) - 2 ]; - sha256d( (uint8_t *)FinalPoW, (uint8_t *)BlockHdr, 88 ); - CollisionCount++; - if( FinalPoW[7] <= ptarget[7] ) - { - pdata[20] = swab32( BlockHdr[20] ); - pdata[21] = swab32( BlockHdr[21] ); - *hashes_done = CollisionCount; - submit_solution( work, FinalPoW, mythr ); - return(0); - } - } - } - - *hashes_done = CollisionCount; - return(0); - -#endif // AVX else - -} - -void GenRandomGarbage(CacheEntry *Garbage, uint32_t *pdata, int thr_id) -{ - uint32_t BlockHdr[20], MidHash[8]; - swab32_array( BlockHdr, pdata, 20 ); - sha256d((uint8_t *)MidHash, (uint8_t *)BlockHdr, 80); - GenerateGarbageCore(Garbage, thr_id, opt_n_threads, MidHash); -} - -#endif // AES - diff --git a/algo/hodl/hodl-wolf.h b/algo/hodl/hodl-wolf.h deleted file mode 100644 index 47c8fb87..00000000 --- a/algo/hodl/hodl-wolf.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef __HODL_H -#define __HODL_H - -#include -#include -#include "miner.h" - -#define AES_ITERATIONS 15 - -#define GARBAGE_SIZE (1 << 30) -#define GARBAGE_CHUNK_SIZE (1 << 6) -#define GARBAGE_SLICE_SIZE (1 << 12) -#define TOTAL_CHUNKS (1 << 24) // GARBAGE_SIZE / GARBAGE_CHUNK_SIZE -#define COMPARE_SIZE (1 << 18) // GARBAGE_SIZE / GARBAGE_SLICE_SIZE - -typedef union _CacheEntry -{ - uint32_t dwords[GARBAGE_SLICE_SIZE >> 2] __attribute__((aligned(16))); - __m128i dqwords[GARBAGE_SLICE_SIZE >> 4] __attribute__((aligned(16))); -} CacheEntry; - -int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); - -void GenRandomGarbage( CacheEntry *Garbage, uint32_t *pdata, int thr_id); - -#endif // __HODL_H diff --git a/algo/hodl/hodlminer.1 b/algo/hodl/hodlminer.1 deleted file mode 100644 index da855e61..00000000 --- a/algo/hodl/hodlminer.1 +++ /dev/null @@ -1,208 +0,0 @@ -.TH MINERD 1 "March 2016" "cpuminer 2.4.3" -.SH NAME -hodlminer \- CPU miner for Hodlcoin -.SH SYNOPSIS -.B hodlminer -[\fIOPTION\fR]... -.SH DESCRIPTION -.B hodlminer -is a multi-threaded CPU miner for Hodlcoin. -It supports the getwork and getblocktemplate (BIP 22) methods, -as well as the Stratum mining protocol. -.PP -In its normal mode of operation, \fBhodlminer\fR connects to a mining server -(specified with the \fB\-o\fR option), receives work from it and starts hashing. -As soon as a solution is found, it is submitted to the same mining server, -which can accept or reject it. -When using getwork or getblocktemplate, -\fBhodlminer\fR can take advantage of long polling, if the server supports it; -in any case, fresh work is fetched as needed. -When using the Stratum protocol this is not possible, -and the server is responsible for sending fresh work at least every minute; -if it fails to do so, -\fBhodlminer\fR may drop the connection and try reconnecting again. -.PP -By default, \fBhodlminer\fR writes all its messages to standard error. -On systems that have a syslog, the \fB\-\-syslog\fR option can be used -to write to it instead. -.PP -On start, the nice value of all miner threads is set to 19. -On Linux, the scheduling policy is also changed to SCHED_IDLE, -or to SCHED_BATCH if that fails. -On multiprocessor systems, \fBhodlminer\fR -automatically sets the CPU affinity of miner threads -if the number of threads is a multiple of the number of processors. -.SH EXAMPLES -To connect to the Hodlcoin mining pool that provides a Stratum server -at hodl.blockquarry.com on port 8332, authenticating as worker "user.worker" with password "x": -.PP -.nf -.RS -hodlminer \-o stratum+tcp://hodl.blockquarry.com:8332 \-u user.worker -p x -q -.RE -.fi -.PP -To mine to a local Hodlcoin instance running on port 18332, -authenticating with username "rpcuser" and password "rpcpass": -.PP -.nf -.RS -hodlminer \-a hodl \-o http://localhost:18332 \-O rpcuser:rpcpass \\ - \-\-coinbase\-addr=mpXwg4jMtRhuSpVq4xS3HFHmCmWp9NyGKt -.RE -.fi -.PP -.SH OPTIONS -.TP -\fB\-a\fR, \fB\-\-algo\fR=\fIALGORITHM\fR -Set the hashing algorithm to use. -Default is hodl. -Possible values are: -.RS 11 -.TP 10 -.B hodl -.TP -\fB\-\-benchmark\fR -Run in offline benchmark mode. -.TP -\fB\-B\fR, \fB\-\-background\fR -Run in the background as a daemon. -.TP -\fB\-\-cert\fR=\fIFILE\fR -Set an SSL certificate to use with the mining server. -Only supported when using the HTTPS protocol. -.TP -\fB\-\-coinbase\-addr\fR=\fIADDRESS\fR -Set a payout address for solo mining. -This is only used in getblocktemplate mode, -and only if the server does not provide a coinbase transaction. -.TP -\fB\-\-coinbase\-sig\fR=\fITEXT\fR -Set a string to be included in the coinbase (if allowed by the server). -This is only used in getblocktemplate mode. -.TP -\fB\-c\fR, \fB\-\-config\fR=\fIFILE\fR -Load options from a configuration file. -\fIFILE\fR must contain a JSON object -mapping long options to their arguments (as strings), -or to \fBtrue\fR if no argument is required. -Sample configuration file: - -.nf - { - "url": "stratum+tcp://hodl.blockquarry.com:8332", - "userpass": "foo:bar", - "retry-pause": "10", - "quiet": true - } -.fi -.TP -\fB\-D\fR, \fB\-\-debug\fR -Enable debug output. -.TP -\fB\-h\fR, \fB\-\-help\fR -Print a help message and exit. -.TP -\fB\-\-no\-gbt\fR -Do not use the getblocktemplate RPC method. -.TP -\fB\-\-no\-getwork\fR -Do not use the getwork RPC method. -.TP -\fB\-\-no\-longpoll\fR -Do not use long polling. -.TP -\fB\-\-no\-redirect\fR -Ignore requests from the server to switch to a different URL. -.TP -\fB\-\-no\-stratum\fR -Do not switch to Stratum, even if the server advertises support for it. -.TP -\fB\-o\fR, \fB\-\-url\fR=[\fISCHEME\fR://][\fIUSERNAME\fR[:\fIPASSWORD\fR]@]\fIHOST\fR:\fIPORT\fR[/\fIPATH\fR] -Set the URL of the mining server to connect to. -Supported schemes are \fBhttp\fR, \fBhttps\fR, \fBstratum+tcp\fR -and \fBstratum+tcps\fR. -If no scheme is specified, http is assumed. -Specifying a \fIPATH\fR is only supported for HTTP and HTTPS. -Specifying credentials has the same effect as using the \fB\-O\fR option. - -By default, on HTTP and HTTPS, -the miner tries to use the getblocktemplate RPC method, -and falls back to using getwork if getblocktemplate is unavailable. -This behavior can be modified by using the \fB\-\-no\-gbt\fR -and \fB\-\-no\-getwork\fR options. -.TP -\fB\-O\fR, \fB\-\-userpass\fR=\fIUSERNAME\fR:\fIPASSWORD\fR -Set the credentials to use for connecting to the mining server. -Any value previously set with \fB\-u\fR or \fB\-p\fR is discarded. -.TP -\fB\-p\fR, \fB\-\-pass\fR=\fIPASSWORD\fR -Set the password to use for connecting to the mining server. -Any password previously set with \fB\-O\fR is discarded. -.TP -\fB\-P\fR, \fB\-\-protocol\-dump\fR -Enable output of all protocol-level activities. -.TP -\fB\-q\fR, \fB\-\-quiet\fR -Disable per-thread hashmeter output. -.TP -\fB\-r\fR, \fB\-\-retries\fR=\fIN\fR -Set the maximum number of times to retry if a network call fails. -If not specified, the miner will retry indefinitely. -.TP -\fB\-R\fR, \fB\-\-retry\-pause\fR=\fISECONDS\fR -Set how long to wait between retries. Default is 30 seconds. -.TP -\fB\-s\fR, \fB\-\-scantime\fR=\fISECONDS\fR -Set an upper bound on the time the miner can go without fetching fresh work. -This setting has no effect in Stratum mode or when long polling is activated. -Default is 5 seconds. -.TP -\fB\-S\fR, \fB\-\-syslog\fR -Log to the syslog facility instead of standard error. -.TP -\fB\-t\fR, \fB\-\-threads\fR=\fIN\fR -Set the number of miner threads. -If not specified, the miner will try to detect the number of available processors -and use that. -.TP -\fB\-T\fR, \fB\-\-timeout\fR=\fISECONDS\fR -Set a timeout for long polling. -.TP -\fB\-u\fR, \fB\-\-user\fR=\fIUSERNAME\fR -Set the username to use for connecting to the mining server. -Any username previously set with \fB\-O\fR is discarded. -.TP -\fB\-V\fR, \fB\-\-version\fR -Display version information and quit. -.TP -\fB\-x\fR, \fB\-\-proxy\fR=[\fISCHEME\fR://][\fIUSERNAME\fR:\fIPASSWORD\fR@]\fIHOST\fR:\fIPORT\fR -Connect to the mining server through a proxy. -Supported schemes are: \fBhttp\fR, \fBsocks4\fR, \fBsocks5\fR. -Since libcurl 7.18.0, the following are also supported: -\fBsocks4a\fR, \fBsocks5h\fR (SOCKS5 with remote name resolving). -If no scheme is specified, the proxy is treated as an HTTP proxy. -.SH ENVIRONMENT -The following environment variables can be specified in lower case or upper case; -the lower-case version has precedence. \fBhttp_proxy\fR is an exception -as it is only available in lower case. -.PP -.RS -.TP -\fBhttp_proxy\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR -Sets the proxy server to use for HTTP. -.TP -\fBHTTPS_PROXY\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR -Sets the proxy server to use for HTTPS. -.TP -\fBALL_PROXY\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR -Sets the proxy server to use if no protocol-specific proxy is set. -.RE -.PP -Using an environment variable to set the proxy has the same effect as -using the \fB\-x\fR option. -.SH AUTHOR -Most of the code in the current version of minerd was written by -Pooler with contributions from others. - -The original minerd was written by Jeff Garzik . diff --git a/algo/hodl/sha512-avx.h b/algo/hodl/sha512-avx.h deleted file mode 100644 index 6fbb5bf7..00000000 --- a/algo/hodl/sha512-avx.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _SHA512_H -#define _SHA512_H - -#include -#include "emmintrin.h" - -//SHA-512 block size -#define SHA512_BLOCK_SIZE 128 -//SHA-512 digest size -#define SHA512_DIGEST_SIZE 64 - -/* -#ifndef __AVX2__ -#ifndef __AVX__ -#error "Either AVX or AVX2 supported needed" -#endif // __AVX__ -#endif // __AVX2__ -*/ - -typedef struct -{ -#ifdef __AVX2__ - __m256i h[8]; - __m256i w[80]; -#elif defined(__SSE4_2__) -//#elif defined(__AVX__) - __m128i h[8]; - __m128i w[80]; -#else - int dummy; -#endif -} Sha512Context; - -#ifdef __AVX2__ -#define SHA512_PARALLEL_N 8 -#elif defined(__SSE4_2__) -//#elif defined(__AVX__) -#define SHA512_PARALLEL_N 4 -#else -#define SHA512_PARALLEL_N 1 // dummy value -#endif - -//SHA-512 related functions -void sha512Compute32b_parallel( - uint64_t *data[SHA512_PARALLEL_N], - uint64_t *digest[SHA512_PARALLEL_N]); - -void sha512ProcessBlock(Sha512Context contexti[2] ); - -#endif diff --git a/algo/hodl/sha512_avx.c b/algo/hodl/sha512_avx.c deleted file mode 100644 index 1c7c0892..00000000 --- a/algo/hodl/sha512_avx.c +++ /dev/null @@ -1,235 +0,0 @@ -#ifndef __AVX2__ - -#if defined(__SSE4_2__) -//#ifdef __AVX__ - -//Dependencies -#include -#include - -#ifdef __FreeBSD__ -#include -#endif - -#if defined(__CYGWIN__) -#include -#endif - -#include "tmmintrin.h" -#include "smmintrin.h" - -#include "sha512-avx.h" -#if ((defined(_WIN64) || defined(__WINDOWS__))) -#include "hodl-endian.h" -#endif - -//SHA-512 auxiliary functions -#define Ch(x, y, z) (((x) & (y)) | (~(x) & (z))) -#define Maj(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) -#define SIGMA1(x) (ROR64(x, 28) ^ ROR64(x, 34) ^ ROR64(x, 39)) -#define SIGMA2(x) (ROR64(x, 14) ^ ROR64(x, 18) ^ ROR64(x, 41)) -#define SIGMA3(x) (ROR64(x, 1) ^ ROR64(x, 8) ^ SHR64(x, 7)) -#define SIGMA4(x) (ROR64(x, 19) ^ ROR64(x, 61) ^ SHR64(x, 6)) - -//Rotate right operation -#define ROR64(a, n) _mm_or_si128(_mm_srli_epi64(a, n), _mm_slli_epi64(a, 64 - n)) - -//Shift right operation -#define SHR64(a, n) _mm_srli_epi64(a, n) - -__m128i mm_htobe_epi64(__m128i a) { - __m128i mask = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7); - return _mm_shuffle_epi8(a, mask); -} - -__m128i mm_betoh_epi64(__m128i a) { - return mm_htobe_epi64(a); -} - -//SHA-512 padding -static const uint8_t padding[128] = -{ - 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -}; - -//SHA-512 constants -static const uint64_t k[80] = -{ - 0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC, - 0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118, - 0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2, - 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x9BDC06A725C71235, 0xC19BF174CF692694, - 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65, - 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5, - 0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4, - 0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0x06CA6351E003826F, 0x142929670A0E6E70, - 0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF, - 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x81C2C92E47EDAEE6, 0x92722C851482353B, - 0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xC24B8B70D0F89791, 0xC76C51A30654BE30, - 0xD192E819D6EF5218, 0xD69906245565A910, 0xF40E35855771202A, 0x106AA07032BBD1B8, - 0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8, - 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3, - 0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x84C87814A1F0AB72, 0x8CC702081A6439EC, - 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B, - 0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178, - 0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x113F9804BEF90DAE, 0x1B710B35131C471B, - 0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C, - 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817 -}; - - -void sha512Compute32b_parallel(uint64_t *data[SHA512_PARALLEL_N], uint64_t *digest[SHA512_PARALLEL_N]) { - Sha512Context context[2]; - context[0].h[0] = _mm_set1_epi64x(0x6A09E667F3BCC908); - context[0].h[1] = _mm_set1_epi64x(0xBB67AE8584CAA73B); - context[0].h[2] = _mm_set1_epi64x(0x3C6EF372FE94F82B); - context[0].h[3] = _mm_set1_epi64x(0xA54FF53A5F1D36F1); - context[0].h[4] = _mm_set1_epi64x(0x510E527FADE682D1); - context[0].h[5] = _mm_set1_epi64x(0x9B05688C2B3E6C1F); - context[0].h[6] = _mm_set1_epi64x(0x1F83D9ABFB41BD6B); - context[0].h[7] = _mm_set1_epi64x(0x5BE0CD19137E2179); - - context[1].h[0] = _mm_set1_epi64x(0x6A09E667F3BCC908); - context[1].h[1] = _mm_set1_epi64x(0xBB67AE8584CAA73B); - context[1].h[2] = _mm_set1_epi64x(0x3C6EF372FE94F82B); - context[1].h[3] = _mm_set1_epi64x(0xA54FF53A5F1D36F1); - context[1].h[4] = _mm_set1_epi64x(0x510E527FADE682D1); - context[1].h[5] = _mm_set1_epi64x(0x9B05688C2B3E6C1F); - context[1].h[6] = _mm_set1_epi64x(0x1F83D9ABFB41BD6B); - context[1].h[7] = _mm_set1_epi64x(0x5BE0CD19137E2179); - - for(int i=0; i<4; ++i) { - context[0].w[i] = _mm_set_epi64x ( data[1][i], data[0][i] ); - context[1].w[i] = _mm_set_epi64x ( data[3][i], data[2][i] ); - } - for(int i=0; i<10; ++i) { - context[0].w[i+4] = _mm_set1_epi64x( ((uint64_t*)padding)[i] ); - context[1].w[i+4] = _mm_set1_epi64x( ((uint64_t*)padding)[i] ); - } - - //Length of the original message (before padding) - uint64_t totalSize = 32 * 8; - - //Append the length of the original message - context[0].w[14] = _mm_set1_epi64x(0); - context[0].w[15] = _mm_set1_epi64x(htobe64(totalSize)); - - context[1].w[14] = _mm_set1_epi64x(0); - context[1].w[15] = _mm_set1_epi64x(htobe64(totalSize)); - - //Calculate the message digest - sha512ProcessBlock(context); - - //Convert from host byte order to big-endian byte order - for (int i = 0; i < 8; i++) { - context[0].h[i] = mm_htobe_epi64(context[0].h[i]); - context[1].h[i] = mm_htobe_epi64(context[1].h[i]); - } - - //Copy the resulting digest - for(int i=0; i<8; ++i) { - digest[0][i] = _mm_extract_epi64(context[0].h[i], 0); - digest[1][i] = _mm_extract_epi64(context[0].h[i], 1); - digest[2][i] = _mm_extract_epi64(context[1].h[i], 0); - digest[3][i] = _mm_extract_epi64(context[1].h[i], 1); - } -} - -#define blk0(n, i) (block[n][i] = mm_betoh_epi64(block[n][i])) -#define blk(n, i) (block[n][i] = block[n][i - 16] + SIGMA3(block[n][i - 15]) + \ - SIGMA4(block[n][i - 2]) + block[n][i - 7]) - -#define ROUND512(a,b,c,d,e,f,g,h) \ - T0 += (h[0]) + SIGMA2(e[0]) + Ch((e[0]), (f[0]), (g[0])) + k[i]; \ - T1 += (h[1]) + SIGMA2(e[1]) + Ch((e[1]), (f[1]), (g[1])) + k[i]; \ - (d[0]) += T0; \ - (d[1]) += T1; \ - (h[0]) = T0 + SIGMA1(a[0]) + Maj((a[0]), (b[0]), (c[0])); \ - (h[1]) = T1 + SIGMA1(a[1]) + Maj((a[1]), (b[1]), (c[1])); \ - i++ - -#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h) \ - T0 = blk0(0, i); \ - T1 = blk0(1, i); \ - ROUND512(a,b,c,d,e,f,g,h) - -#define ROUND512_16_TO_80(a,b,c,d,e,f,g,h) \ - T0 = blk(0, i); \ - T1 = blk(1, i); \ - ROUND512(a,b,c,d,e,f,g,h) - -#define R512_0 \ - ROUND512_0_TO_15(a, b, c, d, e, f, g, h); \ - ROUND512_0_TO_15(h, a, b, c, d, e, f, g); \ - ROUND512_0_TO_15(g, h, a, b, c, d, e, f); \ - ROUND512_0_TO_15(f, g, h, a, b, c, d, e); \ - ROUND512_0_TO_15(e, f, g, h, a, b, c, d); \ - ROUND512_0_TO_15(d, e, f, g, h, a, b, c); \ - ROUND512_0_TO_15(c, d, e, f, g, h, a, b); \ - ROUND512_0_TO_15(b, c, d, e, f, g, h, a) - -#define R512_16 \ - ROUND512_16_TO_80(a, b, c, d, e, f, g, h); \ - ROUND512_16_TO_80(h, a, b, c, d, e, f, g); \ - ROUND512_16_TO_80(g, h, a, b, c, d, e, f); \ - ROUND512_16_TO_80(f, g, h, a, b, c, d, e); \ - ROUND512_16_TO_80(e, f, g, h, a, b, c, d); \ - ROUND512_16_TO_80(d, e, f, g, h, a, b, c); \ - ROUND512_16_TO_80(c, d, e, f, g, h, a, b); \ - ROUND512_16_TO_80(b, c, d, e, f, g, h, a) - -#define INIT(x,n) \ - x[0] = context[0].h[n]; \ - x[1] = context[1].h[n]; \ - -void sha512ProcessBlock(Sha512Context context[2]) -{ - __m128i* block[2]; - block[0] = context[0].w; - block[1] = context[1].w; - - __m128i T0, T1; - __m128i a[2], b[2], c[2], d[2], e[2], f[2], g[2], h[2]; - INIT(a, 0) - INIT(b, 1) - INIT(c, 2) - INIT(d, 3) - INIT(e, 4) - INIT(f, 5) - INIT(g, 6) - INIT(h, 7) - - int i = 0; - R512_0; R512_0; - for(int j=0; j<8; ++j) { - R512_16; - } - - context[0].h[0] += a[0]; - context[0].h[1] += b[0]; - context[0].h[2] += c[0]; - context[0].h[3] += d[0]; - context[0].h[4] += e[0]; - context[0].h[5] += f[0]; - context[0].h[6] += g[0]; - context[0].h[7] += h[0]; - - context[1].h[0] += a[1]; - context[1].h[1] += b[1]; - context[1].h[2] += c[1]; - context[1].h[3] += d[1]; - context[1].h[4] += e[1]; - context[1].h[5] += f[1]; - context[1].h[6] += g[1]; - context[1].h[7] += h[1]; -} - -#endif // __AVX__ -#endif // __AVX2__ diff --git a/algo/hodl/sha512_avx2.c b/algo/hodl/sha512_avx2.c deleted file mode 100644 index 58e421c7..00000000 --- a/algo/hodl/sha512_avx2.c +++ /dev/null @@ -1,241 +0,0 @@ -#ifdef __AVX2__ - -//Dependencies -#include -#include - -#ifdef __FreeBSD__ -#include -#endif - -#if defined(__CYGWIN__) -#include -#endif - -#include "tmmintrin.h" -#include "smmintrin.h" -#include "immintrin.h" - -#include "sha512-avx.h" -#if ((defined(_WIN64) || defined(__WINDOWS__))) -#include "hodl-endian.h" -#endif - -//SHA-512 auxiliary functions -#define Ch(x, y, z) (((x) & (y)) | (~(x) & (z))) -#define Maj(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) -#define SIGMA1(x) (ROR64(x, 28) ^ ROR64(x, 34) ^ ROR64(x, 39)) -#define SIGMA2(x) (ROR64(x, 14) ^ ROR64(x, 18) ^ ROR64(x, 41)) -#define SIGMA3(x) (ROR64(x, 1) ^ ROR64(x, 8) ^ SHR64(x, 7)) -#define SIGMA4(x) (ROR64(x, 19) ^ ROR64(x, 61) ^ SHR64(x, 6)) - -//Rotate right operation -#define ROR64(a, n) _mm256_or_si256(_mm256_srli_epi64(a, n), _mm256_slli_epi64(a, 64 - n)) - -//Shift right operation -#define SHR64(a, n) _mm256_srli_epi64(a, n) - -__m256i mm256_htobe_epi64(__m256i a) { - __m256i mask = _mm256_set_epi8( - 24,25,26,27,28,29,30,31, - 16,17,18,19,20,21,22,23, - 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7); - return _mm256_shuffle_epi8(a, mask); -} - -__m256i mm256_betoh_epi64(__m256i a) { - return mm256_htobe_epi64(a); -} - -//SHA-512 padding -static const uint8_t padding[128] = -{ - 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -}; - -//SHA-512 constants -static const uint64_t k[80] = -{ - 0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC, - 0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118, - 0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2, - 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x9BDC06A725C71235, 0xC19BF174CF692694, - 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65, - 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5, - 0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4, - 0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0x06CA6351E003826F, 0x142929670A0E6E70, - 0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF, - 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x81C2C92E47EDAEE6, 0x92722C851482353B, - 0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xC24B8B70D0F89791, 0xC76C51A30654BE30, - 0xD192E819D6EF5218, 0xD69906245565A910, 0xF40E35855771202A, 0x106AA07032BBD1B8, - 0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8, - 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3, - 0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x84C87814A1F0AB72, 0x8CC702081A6439EC, - 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B, - 0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178, - 0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x113F9804BEF90DAE, 0x1B710B35131C471B, - 0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C, - 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817 -}; - - -void sha512Compute32b_parallel(uint64_t *data[SHA512_PARALLEL_N], uint64_t *digest[SHA512_PARALLEL_N]) { - Sha512Context context[2]; - context[0].h[0] = _mm256_set1_epi64x(0x6A09E667F3BCC908); - context[0].h[1] = _mm256_set1_epi64x(0xBB67AE8584CAA73B); - context[0].h[2] = _mm256_set1_epi64x(0x3C6EF372FE94F82B); - context[0].h[3] = _mm256_set1_epi64x(0xA54FF53A5F1D36F1); - context[0].h[4] = _mm256_set1_epi64x(0x510E527FADE682D1); - context[0].h[5] = _mm256_set1_epi64x(0x9B05688C2B3E6C1F); - context[0].h[6] = _mm256_set1_epi64x(0x1F83D9ABFB41BD6B); - context[0].h[7] = _mm256_set1_epi64x(0x5BE0CD19137E2179); - - context[1].h[0] = _mm256_set1_epi64x(0x6A09E667F3BCC908); - context[1].h[1] = _mm256_set1_epi64x(0xBB67AE8584CAA73B); - context[1].h[2] = _mm256_set1_epi64x(0x3C6EF372FE94F82B); - context[1].h[3] = _mm256_set1_epi64x(0xA54FF53A5F1D36F1); - context[1].h[4] = _mm256_set1_epi64x(0x510E527FADE682D1); - context[1].h[5] = _mm256_set1_epi64x(0x9B05688C2B3E6C1F); - context[1].h[6] = _mm256_set1_epi64x(0x1F83D9ABFB41BD6B); - context[1].h[7] = _mm256_set1_epi64x(0x5BE0CD19137E2179); - - for(int i=0; i<4; ++i) { - context[0].w[i] = _mm256_set_epi64x ( data[3][i], data[2][i], data[1][i], data[0][i] ); - context[1].w[i] = _mm256_set_epi64x ( data[7][i], data[6][i], data[5][i], data[4][i] ); - } - for(int i=0; i<10; ++i) { - context[0].w[i+4] = _mm256_set1_epi64x( ((uint64_t*)padding)[i] ); - context[1].w[i+4] = _mm256_set1_epi64x( ((uint64_t*)padding)[i] ); - } - - //Length of the original message (before padding) - uint64_t totalSize = 32 * 8; - - //Append the length of the original message - context[0].w[14] = _mm256_set1_epi64x(0); - context[0].w[15] = _mm256_set1_epi64x(htobe64(totalSize)); - - context[1].w[14] = _mm256_set1_epi64x(0); - context[1].w[15] = _mm256_set1_epi64x(htobe64(totalSize)); - - //Calculate the message digest - sha512ProcessBlock(context); - - //Convert from host byte order to big-endian byte order - for (int i = 0; i < 8; i++) { - context[0].h[i] = mm256_htobe_epi64(context[0].h[i]); - context[1].h[i] = mm256_htobe_epi64(context[1].h[i]); - } - - //Copy the resulting digest - for(int i=0; i<8; ++i) { - digest[0][i] = _mm256_extract_epi64(context[0].h[i], 0); - digest[1][i] = _mm256_extract_epi64(context[0].h[i], 1); - digest[2][i] = _mm256_extract_epi64(context[0].h[i], 2); - digest[3][i] = _mm256_extract_epi64(context[0].h[i], 3); - - digest[4][i] = _mm256_extract_epi64(context[1].h[i], 0); - digest[5][i] = _mm256_extract_epi64(context[1].h[i], 1); - digest[6][i] = _mm256_extract_epi64(context[1].h[i], 2); - digest[7][i] = _mm256_extract_epi64(context[1].h[i], 3); - } -} - -#define blk0(n, i) (block[n][i] = mm256_betoh_epi64(block[n][i])) -#define blk(n, i) (block[n][i] = block[n][i - 16] + SIGMA3(block[n][i - 15]) + \ - SIGMA4(block[n][i - 2]) + block[n][i - 7]) - -#define ROUND512(a,b,c,d,e,f,g,h) \ - T0 += (h[0]) + SIGMA2(e[0]) + Ch((e[0]), (f[0]), (g[0])) + k[i]; \ - T1 += (h[1]) + SIGMA2(e[1]) + Ch((e[1]), (f[1]), (g[1])) + k[i]; \ - (d[0]) += T0; \ - (d[1]) += T1; \ - (h[0]) = T0 + SIGMA1(a[0]) + Maj((a[0]), (b[0]), (c[0])); \ - (h[1]) = T1 + SIGMA1(a[1]) + Maj((a[1]), (b[1]), (c[1])); \ - i++ - -#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h) \ - T0 = blk0(0, i); \ - T1 = blk0(1, i); \ - ROUND512(a,b,c,d,e,f,g,h) - -#define ROUND512_16_TO_80(a,b,c,d,e,f,g,h) \ - T0 = blk(0, i); \ - T1 = blk(1, i); \ - ROUND512(a,b,c,d,e,f,g,h) - -#define R512_0 \ - ROUND512_0_TO_15(a, b, c, d, e, f, g, h); \ - ROUND512_0_TO_15(h, a, b, c, d, e, f, g); \ - ROUND512_0_TO_15(g, h, a, b, c, d, e, f); \ - ROUND512_0_TO_15(f, g, h, a, b, c, d, e); \ - ROUND512_0_TO_15(e, f, g, h, a, b, c, d); \ - ROUND512_0_TO_15(d, e, f, g, h, a, b, c); \ - ROUND512_0_TO_15(c, d, e, f, g, h, a, b); \ - ROUND512_0_TO_15(b, c, d, e, f, g, h, a) - -#define R512_16 \ - ROUND512_16_TO_80(a, b, c, d, e, f, g, h); \ - ROUND512_16_TO_80(h, a, b, c, d, e, f, g); \ - ROUND512_16_TO_80(g, h, a, b, c, d, e, f); \ - ROUND512_16_TO_80(f, g, h, a, b, c, d, e); \ - ROUND512_16_TO_80(e, f, g, h, a, b, c, d); \ - ROUND512_16_TO_80(d, e, f, g, h, a, b, c); \ - ROUND512_16_TO_80(c, d, e, f, g, h, a, b); \ - ROUND512_16_TO_80(b, c, d, e, f, g, h, a) - -#define INIT(x,n) \ - x[0] = context[0].h[n]; \ - x[1] = context[1].h[n]; \ - -void sha512ProcessBlock(Sha512Context context[2]) -{ - __m256i* block[2]; - block[0] = context[0].w; - block[1] = context[1].w; - - __m256i T0, T1; - __m256i a[2], b[2], c[2], d[2], e[2], f[2], g[2], h[2]; - INIT(a, 0) - INIT(b, 1) - INIT(c, 2) - INIT(d, 3) - INIT(e, 4) - INIT(f, 5) - INIT(g, 6) - INIT(h, 7) - - int i = 0; - R512_0; R512_0; - for(int j=0; j<8; ++j) { - R512_16; - } - - context[0].h[0] += a[0]; - context[0].h[1] += b[0]; - context[0].h[2] += c[0]; - context[0].h[3] += d[0]; - context[0].h[4] += e[0]; - context[0].h[5] += f[0]; - context[0].h[6] += g[0]; - context[0].h[7] += h[0]; - - context[1].h[0] += a[1]; - context[1].h[1] += b[1]; - context[1].h[2] += c[1]; - context[1].h[3] += d[1]; - context[1].h[4] += e[1]; - context[1].h[5] += f[1]; - context[1].h[6] += g[1]; - context[1].h[7] += h[1]; -} - -#endif // __AVX2__ diff --git a/algo/hodl/wolf-aes.h b/algo/hodl/wolf-aes.h deleted file mode 100644 index b33407f2..00000000 --- a/algo/hodl/wolf-aes.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __WOLF_AES_H -#define __WOLF_AES_H - -#include -#include - -void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf); - -#if defined(__SSE4_2__) -//#ifdef __AVX__ - -#define AES_PARALLEL_N 8 -#define BLOCK_COUNT 256 - -void AES256CBC( __m128i** data, const __m128i** next, __m128i ExpandedKey[][16], - __m128i* IV ); - -#else - -void AES256CBC( __m128i *Ciphertext, const __m128i *Plaintext, - const __m128i *ExpandedKey, __m128i IV, uint32_t BlockCount ); - -#endif - -#endif // __WOLF_AES_H diff --git a/algo/m7m/m7m.c b/algo/m7m/m7m.c deleted file mode 100644 index 2bf4a11f..00000000 --- a/algo/m7m/m7m.c +++ /dev/null @@ -1,312 +0,0 @@ -#include "cpuminer-config.h" -#include "algo-gate-api.h" - -#include -#include -#include -#include -#include -#include -#include "algo/keccak/sph_keccak.h" -#include "algo/haval/sph-haval.h" -#include "algo/tiger/sph_tiger.h" -#include "algo/whirlpool/sph_whirlpool.h" -#include "algo/ripemd/sph_ripemd.h" -#include "algo/sha/sph_sha2.h" -#include "algo/sha/sha256-hash.h" - -#define EPSa DBL_EPSILON -#define EPS1 DBL_EPSILON -#define EPS2 3.0e-11 - -inline double exp_n( double xt ) -{ - if ( xt < -700.0 ) - return 0; - else if ( xt > 700.0 ) - return 1e200; - else if ( xt > -0.8e-8 && xt < 0.8e-8 ) - return ( 1.0 + xt ); - else - return exp( xt ); -} - -inline double exp_n2( double x1, double x2 ) -{ - double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8, - p5 = 37., p6 = 700.; - double xt = x1 - x2; - if ( xt < p1+1.e-200 ) - return 1.; - else if ( xt > p1 && xt < p2 + 1.e-200 ) - return ( 1. - exp(xt) ); - else if ( xt > p2 && xt < p3 + 1.e-200 ) - return ( 1. / ( 1. + exp(xt) ) ); - else if ( xt > p3 && xt < p4 ) - return ( 1. / (2. + xt) ); - else if ( xt > p4 - 1.e-200 && xt < p5 ) - return ( exp(-xt) / ( 1. + exp(-xt) ) ); - else if ( xt > p5 - 1.e-200 && xt < p6 ) - return ( exp(-xt) ); - else if ( xt > p6 - 1.e-200 ) - return 0.; -} - -double swit2_( double wvnmb ) -{ - return pow( ( 5.55243 * ( exp_n( -0.3 * wvnmb / 15.762 ) - - exp_n( -0.6 * wvnmb / 15.762 ) ) ) * wvnmb, 0.5 ) - / 1034.66 * pow( sin( wvnmb / 65. ), 2. ); -} - -double GaussianQuad_N2( const double x1, const double x2 ) -{ - double s = 0.0; - double x[6], w[6]; - //gauleg(a2, b2, x, w); - - double z1, z, xm, xl, pp, p3, p2, p1; - xm = 0.5 * ( x2 + x1 ); - xl = 0.5 * ( x2 - x1 ); - for( int i = 1; i <= 3; i++ ) - { - z = (i == 2) ? 0.540641 : ( (i == 1) ? 0.909632 : -0.0 ); - do - { - p1 = ( ( 3.0 * z * z ) - 1 ) / 2; - p2 = p1; - p1 = ( ( 5.0 * z * p2 ) - ( 2.0 * z ) ) / 3; - p3 = p2; - p2 = p1; - p1 = ( ( 7.0 * z * p2 ) - ( 3.0 * p3 ) ) / 4; - p3 = p2; - p2 = p1; - p1 = ( ( 9.0 * z * p2 ) - ( 4.0 * p3 ) ) / 5; - pp = 5 * ( z * p1 - p2 ) / ( z * z - 1.0 ); - z1 = z; - z = z1 - p1 / pp; - } while ( fabs( z - z1 ) > 3.0e-11 ); - - x[i] = xm - xl * z; - x[ 5+1-i ] = xm + xl * z; - w[i] = 2.0 * xl / ( ( 1.0 - z * z ) * pp * pp ); - w[ 5+1-i ] = w [i]; - } - - for( int j = 1; j <= 5; j++ ) s += w[j] * swit2_( x[j] ); - - return s; -} - -uint32_t sw2_( int nnounce ) -{ - double wmax = ( ( sqrt( (double)(nnounce) ) * ( 1.+EPSa ) ) / 450+100 ); - return ( (uint32_t)( GaussianQuad_N2( 0., wmax ) * ( 1.+EPSa ) * 1.e6 ) ); -} - -typedef struct { - sha256_context sha256; - sph_sha512_context sha512; - sph_keccak512_context keccak; - sph_whirlpool_context whirlpool; - sph_haval256_5_context haval; - sph_tiger_context tiger; - sph_ripemd160_context ripemd; -} m7m_ctx_holder; - -m7m_ctx_holder m7m_ctx; - -void init_m7m_ctx() -{ - sha256_ctx_init( &m7m_ctx.sha256 ); - sph_sha512_init( &m7m_ctx.sha512 ); - sph_keccak512_init( &m7m_ctx.keccak ); - sph_whirlpool_init( &m7m_ctx.whirlpool ); - sph_haval256_5_init( &m7m_ctx.haval ); - sph_tiger_init( &m7m_ctx.tiger ); - sph_ripemd160_init( &m7m_ctx.ripemd ); -} - -#define BITS_PER_DIGIT 3.32192809488736234787 -#define EPS (DBL_EPSILON) - -#define NM7M 5 -#define SW_DIVS 5 -#define M7_MIDSTATE_LEN 76 -int scanhash_m7m_hash( struct work* work, uint64_t max_nonce, - unsigned long *hashes_done, struct thr_info *mythr ) -{ - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - uint32_t data[32] __attribute__((aligned(64))); - uint32_t *data_p64 = data + (M7_MIDSTATE_LEN / sizeof(data[0])); - uint32_t hash[8] __attribute__((aligned(64))); - uint8_t bhash[7][64] __attribute__((aligned(64))); - uint32_t n = pdata[19] - 1; - int thr_id = mythr->id; - uint32_t usw_, mpzscale; - const uint32_t first_nonce = pdata[19]; - char data_str[161], hash_str[65], target_str[65]; - uint8_t bdata[8192] __attribute__ ((aligned (64))); - int i, digits; - int bytes; - size_t p = sizeof(unsigned long), a = 64/p, b = 32/p; - - m7m_ctx_holder ctx1, ctx2 __attribute__ ((aligned (64))); - memcpy( &ctx1, &m7m_ctx, sizeof(m7m_ctx) ); - - memcpy(data, pdata, 80); - - sha256_update( &ctx1.sha256, data, M7_MIDSTATE_LEN ); - sph_sha512( &ctx1.sha512, data, M7_MIDSTATE_LEN ); - sph_keccak512( &ctx1.keccak, data, M7_MIDSTATE_LEN ); - sph_whirlpool( &ctx1.whirlpool, data, M7_MIDSTATE_LEN ); - sph_haval256_5( &ctx1.haval, data, M7_MIDSTATE_LEN ); - sph_tiger( &ctx1.tiger, data, M7_MIDSTATE_LEN ); - sph_ripemd160( &ctx1.ripemd, data, M7_MIDSTATE_LEN ); - - mpz_t magipi, magisw, product, bns0, bns1; - mpf_t magifpi, magifpi0, mpt1, mpt2, mptmp, mpten; - - mpz_inits(magipi, magisw, bns0, bns1, NULL); - mpz_init2(product, 512); - - mp_bitcnt_t prec0 = (long int)((int)((sqrt((double)(INT_MAX))*(1.+EPS))/9000+75)*BITS_PER_DIGIT+16); - mpf_set_default_prec(prec0); - - mpf_init(magifpi); - mpf_init(magifpi0); - mpf_init(mpt1); - mpf_init(mpt2); - mpf_init(mptmp); - mpf_init_set_ui(mpten, 10); - mpf_set_str(mpt2, "0.8e3b1a9b359805c2e54c6415037f2e336893b6457f7754f6b4ae045eb6c5f2bedb26a114030846be7", 16); - mpf_set_str(magifpi0, "0.b7bfc6837e20bdb22653f1fc419f6bc33ca80eb65b7b0246f7f3b65689560aea1a2f2fd95f254d68c", 16); - - do { - data[19] = ++n; - memset(bhash, 0, 7 * 64); - - memcpy( &ctx2, &ctx1, sizeof(m7m_ctx) ); - - sha256_update( &ctx2.sha256, data_p64, 80 - M7_MIDSTATE_LEN ); - sha256_final( &ctx2.sha256, bhash[0] ); - - sph_sha512( &ctx2.sha512, data_p64, 80 - M7_MIDSTATE_LEN ); - sph_sha512_close( &ctx2.sha512, bhash[1] ); - - sph_keccak512( &ctx2.keccak, data_p64, 80 - M7_MIDSTATE_LEN ); - sph_keccak512_close( &ctx2.keccak, (void*)(bhash[2]) ); - - sph_whirlpool( &ctx2.whirlpool, data_p64, 80 - M7_MIDSTATE_LEN ); - sph_whirlpool_close( &ctx2.whirlpool, (void*)(bhash[3]) ); - - sph_haval256_5( &ctx2.haval, data_p64, 80 - M7_MIDSTATE_LEN ); - sph_haval256_5_close( &ctx2.haval, (void*)(bhash[4])) ; - - sph_tiger( &ctx2.tiger, data_p64, 80 - M7_MIDSTATE_LEN ); - sph_tiger_close( &ctx2.tiger, (void*)(bhash[5]) ); - - sph_ripemd160( &ctx2.ripemd, data_p64, 80 - M7_MIDSTATE_LEN ); - sph_ripemd160_close( &ctx2.ripemd, (void*)(bhash[6]) ); - - mpz_import(bns0, a, -1, p, -1, 0, bhash[0]); - mpz_set(bns1, bns0); - mpz_set(product, bns0); - for ( i=1; i < 7; i++ ) - { - mpz_import(bns0, a, -1, p, -1, 0, bhash[i]); - mpz_add(bns1, bns1, bns0); - mpz_mul(product, product, bns0); - } - mpz_mul(product, product, bns1); - - mpz_mul(product, product, product); - bytes = mpz_sizeinbase(product, 256); - mpz_export((void *)bdata, NULL, -1, 1, 0, 0, product); - - sha256_full( hash, bdata, bytes ); - - digits=(int)((sqrt((double)(n/2))*(1.+EPS))/9000+75); - mp_bitcnt_t prec = (long int)(digits*BITS_PER_DIGIT+16); - mpf_set_prec_raw(magifpi, prec); - mpf_set_prec_raw(mptmp, prec); - mpf_set_prec_raw(mpt1, prec); - mpf_set_prec_raw(mpt2, prec); - - usw_ = sw2_(n/2); - mpzscale = 1; - mpz_set_ui(magisw, usw_); - - for ( i = 0; i < 5; i++ ) - { - mpf_set_d(mpt1, 0.25*mpzscale); - mpf_sub(mpt1, mpt1, mpt2); - mpf_abs(mpt1, mpt1); - mpf_div(magifpi, magifpi0, mpt1); - mpf_pow_ui(mptmp, mpten, digits >> 1); - mpf_mul(magifpi, magifpi, mptmp); - mpz_set_f(magipi, magifpi); - mpz_add(magipi,magipi,magisw); - mpz_add(product,product,magipi); - mpz_import(bns0, b, -1, p, -1, 0, (void*)(hash)); - mpz_add(bns1, bns1, bns0); - mpz_mul(product,product,bns1); - mpz_cdiv_q (product, product, bns0); - - bytes = mpz_sizeinbase(product, 256); - mpzscale=bytes; - mpz_export(bdata, NULL, -1, 1, 0, 0, product); - - sha256_full( hash, bdata, bytes ); - } - - if ( unlikely( valid_hash( (uint64_t*)hash, (uint64_t*)ptarget ) - && !opt_benchmark ) ) - { - if ( opt_debug ) - { - bin2hex( hash_str, (unsigned char *)hash, 32 ); - bin2hex( target_str, (unsigned char *)ptarget, 32 ); - bin2hex( data_str, (unsigned char *)data, 80 ); - applog( LOG_DEBUG, "DEBUG: [%d thread] Found share!\ndata %s\nhash %s\ntarget %s", - thr_id, data_str, hash_str, target_str ); - } - pdata[19] = data[19]; - submit_solution( work, hash, mythr ); - } - } while ( n < max_nonce && !work_restart[thr_id].restart ); - - pdata[19] = n; - - mpf_set_prec_raw( magifpi, prec0 ); - mpf_set_prec_raw( magifpi0, prec0 ); - mpf_set_prec_raw( mptmp, prec0 ); - mpf_set_prec_raw( mpt1, prec0 ); - mpf_set_prec_raw( mpt2, prec0 ); - mpf_clear( magifpi ); - mpf_clear( magifpi0 ); - mpf_clear( mpten ); - mpf_clear( mptmp ); - mpf_clear( mpt1 ); - mpf_clear( mpt2 ); - mpz_clears( magipi, magisw, product, bns0, bns1, NULL ); - - *hashes_done = n - first_nonce + 1; - return 0; -} - -bool register_m7m_algo( algo_gate_t *gate ) -{ - gate->optimizations = SHA_OPT; - init_m7m_ctx(); - gate->scanhash = (void*)&scanhash_m7m_hash; - gate->build_stratum_request = (void*)&std_be_build_stratum_request; - gate->work_decode = (void*)&std_be_work_decode; - gate->submit_getwork_result = (void*)&std_be_submit_getwork_result; - gate->set_work_data_endian = (void*)&set_work_data_big_endian; - opt_target_factor = 65536.0; - return true; -} - - diff --git a/algo/m7m/magimath.cpp b/algo/m7m/magimath.cpp deleted file mode 100644 index c8c64e23..00000000 --- a/algo/m7m/magimath.cpp +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) 2014 The Magi developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#include -#include -#include -#include -#include -#include -#include - -#include "magimath.h" - -#define EPS1 (std::numeric_limits::epsilon()) -#define EPS2 3.0e-11 - -static void gauleg(double x1, double x2, double x[], double w[], const int n) -{ - int m,j,i; - double z1, z, xm, xl, pp, p3, p2, p1; - m=(n+1)/2; - xm=0.5*(x2+x1); - xl=0.5*(x2-x1); - for (i=1;i<=m;i++) { - z=cos(3.141592654*(i-0.25)/(n+0.5)); - do { - p1=1.0; - p2=0.0; - for (j=1;j<=n;j++) { - p3=p2; - p2=p1; - p1=((2.0*j-1.0)*z*p2-(j-1.0)*p3)/j; - } - pp=n*(z*p1-p2)/(z*z-1.0); - z1=z; - z=z1-p1/pp; - } while (fabs(z-z1) > EPS2); - x[i]=xm-xl*z; - x[n+1-i]=xm+xl*z; - w[i]=2.0*xl/((1.0-z*z)*pp*pp); - w[n+1-i]=w[i]; - } -} - -static double GaussianQuad_N(double func(const double), const double a2, const double b2, const int NptGQ) -{ - double s=0.0; -#ifdef _MSC_VER -#define SW_DIVS 23 - double x[SW_DIVS+1], w[SW_DIVS+1]; -#else - double x[NptGQ+1], w[NptGQ+1]; -#endif - - gauleg(a2, b2, x, w, NptGQ); - - for (int j=1; j<=NptGQ; j++) { - s += w[j]*func(x[j]); - } - - return s; -} - -static double swit_(double wvnmb) -{ - return pow( (5.55243*(exp_n(-0.3*wvnmb/15.762) - exp_n(-0.6*wvnmb/15.762)))*wvnmb, 0.5) - / 1034.66 * pow(sin(wvnmb/65.), 2.); -} - -uint32_t sw_(int nnounce, int divs) -{ - double wmax = ((sqrt((double)(nnounce))*(1.+EPS1))/450+100); - return ((uint32_t)(GaussianQuad_N(swit_, 0., wmax, divs)*(1.+EPS1)*1.e6)); -} diff --git a/algo/m7m/magimath.h b/algo/m7m/magimath.h deleted file mode 100644 index b57eb806..00000000 --- a/algo/m7m/magimath.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2014 The Magi developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. -#ifndef MAGI_MATH_H -#define MAGI_MATH_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -uint32_t sw_(int nnounce, int divs); - -#ifdef __cplusplus -} -#endif - - -inline double exp_n(double xt) -{ - double p1 = -700.0, p3 = -0.8e-8, p4 = 0.8e-8, p6 = 700.0; - if(xt < p1) - return 0; - else if(xt > p6) - return 1e200; - else if(xt > p3 && xt < p4) - return (1.0 + xt); - else - return exp(xt); -} - -// 1 / (1 + exp(x1-x2)) -inline double exp_n2(double x1, double x2) -{ - double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8, p5 = 37., p6 = 700.; - double xt = x1 - x2; - if (xt < p1+1.e-200) - return 1.; - else if (xt > p1 && xt < p2 + 1.e-200) - return ( 1. - exp(xt) ); - else if (xt > p2 && xt < p3 + 1.e-200) - return ( 1. / (1. + exp(xt)) ); - else if (xt > p3 && xt < p4) - return ( 1. / (2. + xt) ); - else if (xt > p4 - 1.e-200 && xt < p5) - return ( exp(-xt) / (1. + exp(-xt)) ); - else if (xt > p5 - 1.e-200 && xt < p6) - return ( exp(-xt) ); - else //if (xt > p6 - 1.e-200) - return 0.; -} - -#endif