Skip to content

Commit 018c1a5

Browse files
committed
Big endian bug fixes
1 parent 1c3666a commit 018c1a5

File tree

7 files changed

+47
-43
lines changed

7 files changed

+47
-43
lines changed

Diff for: src/blake2/endian.h

+11-3
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,15 @@ static FORCE_INLINE uint32_t load32(const void *src) {
4141
#endif
4242
}
4343

44-
static FORCE_INLINE uint64_t load64(const void *src) {
45-
#if defined(NATIVE_LITTLE_ENDIAN)
44+
static FORCE_INLINE uint64_t load64_native(const void *src) {
4645
uint64_t w;
4746
memcpy(&w, src, sizeof w);
4847
return w;
48+
}
49+
50+
static FORCE_INLINE uint64_t load64(const void *src) {
51+
#if defined(NATIVE_LITTLE_ENDIAN)
52+
return load64_native(src);
4953
#else
5054
const uint8_t *p = (const uint8_t *)src;
5155
uint64_t w = *p++;
@@ -75,9 +79,13 @@ static FORCE_INLINE void store32(void *dst, uint32_t w) {
7579
#endif
7680
}
7781

82+
static FORCE_INLINE void store64_native(void *dst, uint64_t w) {
83+
memcpy(dst, &w, sizeof w);
84+
}
85+
7886
static FORCE_INLINE void store64(void *dst, uint64_t w) {
7987
#if defined(NATIVE_LITTLE_ENDIAN)
80-
memcpy(dst, &w, sizeof w);
88+
store64_native(dst, w);
8189
#else
8290
uint8_t *p = (uint8_t *)dst;
8391
*p++ = (uint8_t)w;

Diff for: src/dataset.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ namespace randomx {
192192
executeSuperscalar(rl, prog, &cache->reciprocalCache);
193193

194194
for (unsigned q = 0; q < 8; ++q)
195-
rl[q] ^= load64(mixBlock + 8 * q);
195+
rl[q] ^= load64_native(mixBlock + 8 * q);
196196

197197
registerValue = rl[prog.getAddressRegister()];
198198
}

Diff for: src/intrin_portable.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,8 @@ inline __m128i _mm_slli_si128(__m128i _A, int _Imm) {
295295

296296
inline __m128i _mm_loadl_epi64(__m128i const* mem_addr) {
297297
__m128i x;
298-
x.u64[0] = load64(mem_addr);
298+
x.u32[0] = load32((uint8_t*)mem_addr + 0);
299+
x.u32[1] = load32((uint8_t*)mem_addr + 4);
299300
return x;
300301
}
301302

Diff for: src/superscalar.cpp

+17-25
Original file line numberDiff line numberDiff line change
@@ -573,36 +573,28 @@ namespace randomx {
573573
constexpr int LOOK_FORWARD_CYCLES = 4;
574574
constexpr int MAX_THROWAWAY_COUNT = 256;
575575

576-
#ifndef _DEBUG
577-
constexpr bool TRACE = false;
578-
constexpr bool INFO = false;
579-
#else
580-
constexpr bool TRACE = true;
581-
constexpr bool INFO = true;
582-
#endif
583-
584576
template<bool commit>
585577
static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) {
586578
//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
587579
//port P1 (multiplication) by instructions that can go to any port.
588580
for (; cycle < CYCLE_MAP_SIZE; ++cycle) {
589581
if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) {
590582
if (commit) {
591-
if (TRACE) std::cout << "; P5 at cycle " << cycle << std::endl;
583+
if (trace) std::cout << "; P5 at cycle " << cycle << std::endl;
592584
portBusy[cycle][2] = uop;
593585
}
594586
return cycle;
595587
}
596588
if ((uop & ExecutionPort::P0) != 0 && !portBusy[cycle][0]) {
597589
if (commit) {
598-
if (TRACE) std::cout << "; P0 at cycle " << cycle << std::endl;
590+
if (trace) std::cout << "; P0 at cycle " << cycle << std::endl;
599591
portBusy[cycle][0] = uop;
600592
}
601593
return cycle;
602594
}
603595
if ((uop & ExecutionPort::P1) != 0 && !portBusy[cycle][1]) {
604596
if (commit) {
605-
if (TRACE) std::cout << "; P1 at cycle " << cycle << std::endl;
597+
if (trace) std::cout << "; P1 at cycle " << cycle << std::endl;
606598
portBusy[cycle][1] = uop;
607599
}
608600
return cycle;
@@ -621,7 +613,7 @@ namespace randomx {
621613
//move instructions are eliminated and don't need an execution unit
622614
if (mop.isEliminated()) {
623615
if (commit)
624-
if (TRACE) std::cout << "; (eliminated)" << std::endl;
616+
if (trace) std::cout << "; (eliminated)" << std::endl;
625617
return cycle;
626618
}
627619
else if (mop.isSimple()) {
@@ -677,7 +669,7 @@ namespace randomx {
677669

678670
//select a decode configuration
679671
decodeBuffer = decodeBuffer->fetchNext(currentInstruction.getType(), decodeCycle, mulCount, gen);
680-
if (TRACE) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;
672+
if (trace) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;
681673

682674
int bufferIndex = 0;
683675

@@ -692,15 +684,15 @@ namespace randomx {
692684
//select an instruction so that the first macro-op fits into the current slot
693685
currentInstruction.createForSlot(gen, decodeBuffer->getCounts()[bufferIndex], decodeBuffer->getIndex(), decodeBuffer->getSize() == bufferIndex + 1, bufferIndex == 0);
694686
macroOpIndex = 0;
695-
if (TRACE) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
687+
if (trace) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
696688
}
697689
const MacroOp& mop = currentInstruction.getInfo().getOp(macroOpIndex);
698-
if (TRACE) std::cout << mop.getName() << " ";
690+
if (trace) std::cout << mop.getName() << " ";
699691

700692
//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
701693
int scheduleCycle = scheduleMop<false>(mop, portBusy, cycle, depCycle);
702694
if (scheduleCycle < 0) {
703-
if (TRACE) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
695+
if (trace) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
704696
//__debugbreak();
705697
portsSaturated = true;
706698
break;
@@ -711,7 +703,7 @@ namespace randomx {
711703
int forward;
712704
//if no suitable operand is ready, look up to LOOK_FORWARD_CYCLES forward
713705
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectSource(scheduleCycle, registers, gen); ++forward) {
714-
if (TRACE) std::cout << "; src STALL at cycle " << cycle << std::endl;
706+
if (trace) std::cout << "; src STALL at cycle " << cycle << std::endl;
715707
++scheduleCycle;
716708
++cycle;
717709
}
@@ -720,39 +712,39 @@ namespace randomx {
720712
if (throwAwayCount < MAX_THROWAWAY_COUNT) {
721713
throwAwayCount++;
722714
macroOpIndex = currentInstruction.getInfo().getSize();
723-
if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
715+
if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
724716
//cycle = topCycle;
725717
continue;
726718
}
727719
//abort this decode buffer
728-
if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
720+
if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
729721
currentInstruction = SuperscalarInstruction::Null;
730722
break;
731723
}
732-
if (TRACE) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
724+
if (trace) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
733725
}
734726
//find a destination register that will be ready when this instruction executes
735727
if (macroOpIndex == currentInstruction.getInfo().getDstOp()) {
736728
int forward;
737729
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) {
738-
if (TRACE) std::cout << "; dst STALL at cycle " << cycle << std::endl;
730+
if (trace) std::cout << "; dst STALL at cycle " << cycle << std::endl;
739731
++scheduleCycle;
740732
++cycle;
741733
}
742734
if (forward == LOOK_FORWARD_CYCLES) { //throw instruction away
743735
if (throwAwayCount < MAX_THROWAWAY_COUNT) {
744736
throwAwayCount++;
745737
macroOpIndex = currentInstruction.getInfo().getSize();
746-
if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
738+
if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
747739
//cycle = topCycle;
748740
continue;
749741
}
750742
//abort this decode buffer
751-
if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
743+
if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
752744
currentInstruction = SuperscalarInstruction::Null;
753745
break;
754746
}
755-
if (TRACE) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
747+
if (trace) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
756748
}
757749
throwAwayCount = 0;
758750

@@ -773,7 +765,7 @@ namespace randomx {
773765
ri.latency = retireCycle;
774766
ri.lastOpGroup = currentInstruction.getGroup();
775767
ri.lastOpPar = currentInstruction.getGroupPar();
776-
if (TRACE) std::cout << "; RETIRED at cycle " << retireCycle << std::endl;
768+
if (trace) std::cout << "; RETIRED at cycle " << retireCycle << std::endl;
777769
}
778770
codeSize += mop.getSize();
779771
bufferIndex++;

Diff for: src/tests/benchmark.cpp

+6-11
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,6 @@ const uint8_t blockTemplate_[] = {
3737
0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09
3838
};
3939

40-
constexpr char hexmap[] = "0123456789abcdef";
41-
void outputHex(std::ostream& os, const char* data, int length) {
42-
for (int i = 0; i < length; ++i) {
43-
os << hexmap[(data[i] & 0xF0) >> 4];
44-
os << hexmap[data[i] & 0x0F];
45-
}
46-
}
47-
4840
class AtomicHash {
4941
public:
5042
AtomicHash() {
@@ -101,19 +93,22 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
10193
int main(int argc, char** argv) {
10294
bool softAes, miningMode, verificationMode, help, largePages, jit;
10395
int noncesCount, threadCount, initThreadCount;
104-
int32_t seed;
96+
int32_t seedValue;
97+
char seed[4];
10598

10699
readOption("--softAes", argc, argv, softAes);
107100
readOption("--mine", argc, argv, miningMode);
108101
readOption("--verify", argc, argv, verificationMode);
109102
readIntOption("--threads", argc, argv, threadCount, 1);
110103
readIntOption("--nonces", argc, argv, noncesCount, 1000);
111104
readIntOption("--init", argc, argv, initThreadCount, 1);
112-
readIntOption("--seed", argc, argv, seed, 0);
105+
readIntOption("--seed", argc, argv, seedValue, 0);
113106
readOption("--largePages", argc, argv, largePages);
114107
readOption("--jit", argc, argv, jit);
115108
readOption("--help", argc, argv, help);
116109

110+
store32(&seed, seedValue);
111+
117112
std::cout << "RandomX benchmark" << std::endl;
118113

119114
if (help || (!miningMode && !verificationMode)) {
@@ -229,7 +224,7 @@ int main(int argc, char** argv) {
229224
double elapsed = sw.getElapsed();
230225
std::cout << "Calculated result: ";
231226
result.print(std::cout);
232-
if (noncesCount == 1000 && seed == 0)
227+
if (noncesCount == 1000 && seedValue == 0)
233228
std::cout << "Reference result: b69741719152625854031c2337ceae68c3030f2b9581a73acebaa69fc9b555fc" << std::endl;
234229
if (!miningMode) {
235230
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;

Diff for: src/tests/utility.hpp

+8
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
2424
#include <iostream>
2525
#include <fstream>
2626

27+
constexpr char hexmap[] = "0123456789abcdef";
28+
inline void outputHex(std::ostream& os, const char* data, int length) {
29+
for (int i = 0; i < length; ++i) {
30+
os << hexmap[(data[i] & 0xF0) >> 4];
31+
os << hexmap[data[i] & 0x0F];
32+
}
33+
}
34+
2735
inline void dump(const char* buffer, uint64_t count, const char* name) {
2836
std::ofstream fout(name, std::ios::out | std::ios::binary);
2937
fout.write(buffer, count);

Diff for: src/vm_interpreted.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ namespace randomx {
114114
template<class Allocator, bool softAes>
115115
void InterpretedVm<Allocator, softAes>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
116116
auto& ibc = byteCode[ic];
117-
if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
117+
if (trace && ibc.type != InstructionType::NOP) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
118118
switch (ibc.type)
119119
{
120120
case InstructionType::IADD_RS: {
@@ -270,7 +270,7 @@ namespace randomx {
270270
default:
271271
UNREACHABLE;
272272
}
273-
if (trace) {
273+
if (trace && ibc.type != InstructionType::NOP) {
274274
if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32)
275275
print(*ibc.idst);
276276
else //if(ibc.type >= 20 && ibc.type <= 30)

0 commit comments

Comments
 (0)