fluencelabs
diff --git a/‎src/assembly_generator_x86.cpp
-6 b/‎src/assembly_generator_x86.cpp
-6
diff --git a/‎src/configuration.h
-6 b/‎src/configuration.h
-6
diff --git a/‎src/dataset.cpp
-7 b/‎src/dataset.cpp
-7
diff --git a/‎src/instruction.hpp
+21-24 b/‎src/instruction.hpp
+21-24
diff --git a/‎src/instructions_portable.cpp
+10-10 b/‎src/instructions_portable.cpp
+10-10
diff --git a/‎src/jit_compiler_x86.cpp
+9-89 b/‎src/jit_compiler_x86.cpp
+9-89
@@ -34,8 +34,6 @@ namespace randomx {
 	static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" };
 	static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" };
 
-	static const char* fsumInstr[4] = { "paddb", "paddw", "paddd", "paddq" };
-
 	static const char* regA4 = "xmm12";
 	static const char* dblMin = "xmm13";
 	static const char* absMask = "xmm14";
@@ -58,7 +56,6 @@ namespace randomx {
 			instr.src %= RegistersCount;
 			instr.dst %= RegistersCount;
 			generateCode(instr, i);
-			//asmCode << std::endl;
 		}
 	}
 
@@ -494,7 +491,6 @@ namespace randomx {
 	//2 uOPs
 	void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) {
 		if (instr.src != instr.dst) {
-			//std::swap(registerUsage[instr.dst], registerUsage[instr.src]);
 			registerUsage[instr.dst] = i;
 			registerUsage[instr.src] = i;
 			asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
@@ -516,7 +512,6 @@ namespace randomx {
 		instr.dst %= 4;
 		instr.src %= 4;
 		asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
-		//asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl;
 		traceflt(instr);
 	}
 
@@ -534,7 +529,6 @@ namespace randomx {
 		instr.dst %= 4;
 		instr.src %= 4;
 		asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
-		//asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl;
 		traceflt(instr);
 	}
 
 
@@ -40,12 +40,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 //Dataset size in bytes. Must be a power of 2.
 #define RANDOMX_DATASET_SIZE       (2ULL * 1024 * 1024 * 1024)
 
-//Number of blocks per epoch
-#define RANDOMX_EPOCH_BLOCKS       2048
-
-//Number of blocks between the seed block and the start of new epoch
-#define RANDOMX_EPOCH_LAG          64
-
 //Number of instructions in a RandomX program
 #define RANDOMX_PROGRAM_SIZE       256
 
 
@@ -40,13 +40,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #include "argon2.h"
 #include "argon2_core.h"
 
-#if defined(__SSE2__)
-#include <wmmintrin.h>
-#define PREFETCHNTA(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
-#else
-#define PREFETCH(memory)
-#endif
-
 randomx_dataset::~randomx_dataset() {
 
 }
 
@@ -43,30 +43,27 @@ namespace randomx {
 		constexpr int ISMULH_R = 10;
 		constexpr int ISMULH_M = 11;
 		constexpr int IMUL_RCP = 12;
-		//constexpr int ISDIV_C = 13;
-		constexpr int INEG_R = 14;
-		constexpr int IXOR_R = 15;
-		constexpr int IXOR_M = 16;
-		constexpr int IROR_R = 17;
-		constexpr int IROL_R = 18;
-		constexpr int ISWAP_R = 19;
-		constexpr int FSWAP_R = 20;
-		constexpr int FADD_R = 21;
-		constexpr int FADD_M = 22;
-		constexpr int FSUB_R = 23;
-		constexpr int FSUB_M = 24;
-		constexpr int FSCAL_R = 25;
-		constexpr int FMUL_R = 26;
-		constexpr int FMUL_M = 27;
-		constexpr int FDIV_R = 28;
-		constexpr int FDIV_M = 29;
-		constexpr int FSQRT_R = 30;
-		constexpr int COND_R = 31;
-		constexpr int COND_M = 32;
-		constexpr int CFROUND = 33;
-		constexpr int ISTORE = 34;
-		constexpr int FSTORE = 35;
-		constexpr int NOP = 36;
+		constexpr int INEG_R = 13;
+		constexpr int IXOR_R = 14;
+		constexpr int IXOR_M = 15;
+		constexpr int IROR_R = 16;
+		constexpr int IROL_R = 17;
+		constexpr int ISWAP_R = 18;
+		constexpr int FSWAP_R = 19;
+		constexpr int FADD_R = 20;
+		constexpr int FADD_M = 21;
+		constexpr int FSUB_R = 22;
+		constexpr int FSUB_M = 23;
+		constexpr int FSCAL_R = 24;
+		constexpr int FMUL_R = 25;
+		constexpr int FDIV_M = 26;
+		constexpr int FSQRT_R = 27;
+		constexpr int COND_R = 28;
+		constexpr int COND_M = 29;
+		constexpr int CFROUND = 30;
+		constexpr int ISTORE = 31;
+		constexpr int FSTORE = 32;
+		constexpr int NOP = 33;
 	}
 
 	class Instruction {
 
@@ -73,14 +73,14 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 		#define HAVE_SMULH
 	#endif
 
-	static void setRoundMode__(uint32_t mode) {
+	static void setRoundMode_(uint32_t mode) {
 		_controlfp(mode, _MCW_RC);
 	}
 	#define HAVE_SETROUNDMODE_IMPL
 #endif
 
 #ifndef HAVE_SETROUNDMODE_IMPL
-	static void setRoundMode__(uint32_t mode) {
+	static void setRoundMode_(uint32_t mode) {
 		fesetround(mode);
 	}
 #endif
@@ -135,7 +135,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 
 #if defined(__has_builtin)
 #if __has_builtin(__builtin_sub_overflow)
-	static inline bool subOverflow__(uint32_t a, uint32_t b) {
+	static inline bool subOverflow_(uint32_t a, uint32_t b) {
 		int32_t temp;
 		return __builtin_sub_overflow(unsigned32ToSigned2sCompl(a), unsigned32ToSigned2sCompl(b), &temp);
 	}
@@ -144,7 +144,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #endif
 
 #ifndef HAVE_SUB_OVERFLOW
-	static inline bool subOverflow__(uint32_t a, uint32_t b) {
+	static inline bool subOverflow_(uint32_t a, uint32_t b) {
 		auto c = unsigned32ToSigned2sCompl(a - b);
 		return (c < unsigned32ToSigned2sCompl(a)) != (unsigned32ToSigned2sCompl(b) > 0);
 	}
@@ -166,16 +166,16 @@ static inline double FlushNaN(double x) {
 void setRoundMode(uint32_t rcflag) {
 	switch (rcflag & 3) {
 		case RoundDown:
-			setRoundMode__(FE_DOWNWARD);
+			setRoundMode_(FE_DOWNWARD);
 			break;
 		case RoundUp:
-			setRoundMode__(FE_UPWARD);
+			setRoundMode_(FE_UPWARD);
 			break;
 		case RoundToZero:
-			setRoundMode__(FE_TOWARDZERO);
+			setRoundMode_(FE_TOWARDZERO);
 			break;
 		case RoundToNearest:
-			setRoundMode__(FE_TONEAREST);
+			setRoundMode_(FE_TONEAREST);
 			break;
 		default:
 			UNREACHABLE;
@@ -194,9 +194,9 @@ bool condition(uint32_t type, uint32_t value, uint32_t imm32) {
 		case 3:
 			return unsigned32ToSigned2sCompl(value - imm32) >= 0;
 		case 4:
-			return subOverflow__(value, imm32);
+			return subOverflow_(value, imm32);
 		case 5:
-			return !subOverflow__(value, imm32);
+			return !subOverflow_(value, imm32);
 		case 6:
 			return unsigned32ToSigned2sCompl(value) < unsigned32ToSigned2sCompl(imm32);
 		case 7:
 
@@ -103,13 +103,11 @@ namespace randomx {
 	; xmm11 -> "a3"
 	; xmm12 -> temporary
 	; xmm13 -> mantissa mask    = 0x000fffffffffffff000fffffffffffff
-	; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000
+	; xmm14 -> exponent 2**-240 = 0x30f00000000xxxxx30f00000000xxxxx
 	; xmm15 -> scale mask       = 0x81f000000000000081f0000000000000
 
 	*/
 
-#define NOP_TEST true
-
 	const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
 	const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
 	const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
@@ -254,18 +252,10 @@ namespace randomx {
 
 	void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) {
 		generateProgramPrologue(prog, pcfg);
-		//if (superscalar) {
 		emit(codeReadDatasetLightSshInit, readDatasetLightInitSize);
 		emitByte(CALL);
 		emit32(superScalarHashOffset - (codePos + 4));
 		emit(codeReadDatasetLightSshFin, readDatasetLightFinSize);
-		/*}
-		else {
-			memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize);
-			codePos += readDatasetLightSize;
-			emitByte(CALL);
-			emit32(readDatasetLightSubOffset - (codePos + 4));
-		}*/
 		generateProgramEpilogue(prog);
 	}
 
@@ -483,10 +473,6 @@ namespace randomx {
 			emitByte(0xc0 + instr.dst);
 			emit32(instr.getImm32());
 		}*/
-		if (false && NOP_TEST) {
-			emit(NOP4);
-			return;
-		}
 		emit(REX_LEA);
 		if (instr.dst == RegisterNeedsDisplacement)
 			emitByte(0xac);
@@ -527,18 +513,10 @@ namespace randomx {
 	void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
 		registerUsage[instr.dst] = i;
 		if (instr.src != instr.dst) {
-			if (false && NOP_TEST) {
-				emit(NOP3);
-				return;
-			}
 			emit(REX_SUB_RR);
 			emitByte(0xc0 + 8 * instr.dst + instr.src);
 		}
 		else {
-			if (false && NOP_TEST) {
-				emit(NOP7);
-				return;
-			}
 			emit(REX_81);
 			emitByte(0xe8 + instr.dst);
 			emit32(instr.getImm32());
@@ -571,18 +549,10 @@ namespace randomx {
 	void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
 		registerUsage[instr.dst] = i;
 		if (instr.src != instr.dst) {
-			if (false && NOP_TEST) {
-				emit(NOP4);
-				return;
-			}
 			emit(REX_IMUL_RR);
 			emitByte(0xc0 + 8 * instr.dst + instr.src);
 		}
 		else {
-			if (false && NOP_TEST) {
-				emit(NOP7);
-				return;
-			}
 			emit(REX_IMUL_RRI);
 			emitByte(0xc0 + 9 * instr.dst);
 			emit32(instr.getImm32());
@@ -606,12 +576,6 @@ namespace randomx {
 
 	void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) {
 		registerUsage[instr.dst] = i;
-		if (false && NOP_TEST) {
-			emit(NOP3);
-			emit(NOP3);
-			emit(NOP3);
-			return;
-		}
 		emit(REX_MOV_RR64);
 		emitByte(0xc0 + instr.dst);
 		emit(REX_MUL_R);
@@ -641,12 +605,6 @@ namespace randomx {
 
 	void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) {
 		registerUsage[instr.dst] = i;
-		if (false && NOP_TEST) {
-			emit(NOP3);
-			emit(NOP3);
-			emit(NOP3);
-			return;
-		}
 		emit(REX_MOV_RR64);
 		emitByte(0xc0 + instr.dst);
 		emit(REX_MUL_R);
@@ -676,13 +634,6 @@ namespace randomx {
 
 	void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
 		if (instr.getImm32() != 0) {
-			if (false && NOP_TEST) {
-				emitByte(0x66);
-				emitByte(0x66);
-				emit(NOP8);
-				emit(NOP4);
-				return;
-			}
 			registerUsage[instr.dst] = i;
 			emit(MOV_RAX_I);
 			emit64(randomx_reciprocal(instr.getImm32()));
@@ -704,18 +655,10 @@ namespace randomx {
 	void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) {
 		registerUsage[instr.dst] = i;
 		if (instr.src != instr.dst) {
-			if (false && NOP_TEST) {
-				emit(NOP3);
-				return;
-			}
 			emit(REX_XOR_RR);
 			emitByte(0xc0 + 8 * instr.dst + instr.src);
 		}
 		else {
-			if (false && NOP_TEST) {
-				emit(NOP7);
-				return;
-			}
 			emit(REX_XOR_RI);
 			emitByte(0xf0 + instr.dst);
 			emit32(instr.getImm32());
@@ -740,21 +683,12 @@ namespace randomx {
 	void JitCompilerX86::h_IROR_R(Instruction& instr, int i) {
 		registerUsage[instr.dst] = i;
 		if (instr.src != instr.dst) {
-			if (false && NOP_TEST) {
-				emit(NOP3);
-				emit(NOP3);
-				return;
-			}
 			emit(REX_MOV_RR);
 			emitByte(0xc8 + instr.src);
 			emit(REX_ROT_CL);
 			emitByte(0xc8 + instr.dst);
 		}
 		else {
-			if (false && NOP_TEST) {
-				emit(NOP4);
-				return;
-			}
 			emit(REX_ROT_I8);
 			emitByte(0xc8 + instr.dst);
 			emitByte(instr.getImm32() & 63);
@@ -949,21 +883,14 @@ namespace randomx {
 		const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
 		int reg = getConditionRegister();
 		int target = registerUsage[reg] + 1;
-		if (false && NOP_TEST) {
-			emit(NOP7);
-			emit(NOP7);
-			emit(NOP6);
-		}
-		else {
-			emit(REX_ADD_I);
-			emitByte(0xc0 + reg);
-			emit32(1 << shift);
-			emit(REX_TEST);
-			emitByte(0xc0 + reg);
-			emit32(conditionMask);
-			emit(JZ);
-			emit32(instructionOffsets[target] - (codePos + 4));
-		}
+		emit(REX_ADD_I);
+		emitByte(0xc0 + reg);
+		emit32(1 << shift);
+		emit(REX_TEST);
+		emitByte(0xc0 + reg);
+		emit32(conditionMask);
+		emit(JZ);
+		emit32(instructionOffsets[target] - (codePos + 4));
 		for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
 			registerUsage[j] = i;
 		}
@@ -973,13 +900,6 @@ namespace randomx {
 #ifdef RANDOMX_JUMP
 		handleCondition(instr, i);
 #endif
-		if (false && NOP_TEST) {
-			emit(NOP3);
-			emit(NOP7);
-			emit(NOP3);
-			emit(NOP3);
-			return;
-		}
 		emit(XOR_ECX_ECX);
 		emit(REX_CMP_R32I);
 		emitByte(0xf8 + instr.src);