Skip to content

Commit

Permalink
Merge pull request #44 from okuhara/nmakefile
Browse files Browse the repository at this point in the history
Add NMakefile for Windows self build and fix incompatibilities
  • Loading branch information
abulmo authored Dec 13, 2024
2 parents de4b1e0 + 84680d9 commit aa75ba9
Show file tree
Hide file tree
Showing 16 changed files with 171 additions and 50 deletions.
26 changes: 22 additions & 4 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,15 @@ jobs:
strategy:
fail-fast: false # We want results from all OSes even if one fails.
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
os: [ubuntu-latest, macos-latest]
include:
- os: ubuntu-latest
build_command: make build ARCH=x86-64-v3 COMP=gcc OS=linux
- os: windows-latest
build_command: make build ARCH=x86-64-v3 COMP=gcc OS=windows
- os: macos-latest
build_command: make build ARCH=armv8.5-a COMP=gcc OS=osx

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: build
run: |-
Expand All @@ -36,3 +34,23 @@ jobs:
with:
name: artifact_${{ runner.os }}
path: bin

windows-build:
runs-on: windows-latest
timeout-minutes: 10

steps:
- uses: actions/checkout@v4

- name: build
run: |-
mkdir -p bin
cd src
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
nmake -fnmakefile vc-x64
shell: cmd

- uses: actions/upload-artifact@v4
with:
name: artifact_${{ runner.os }}
path: bin
70 changes: 70 additions & 0 deletions src/NMakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#
# makefile
#
# Compilation options for Microsoft Visual C++ for Windows & nmake.
#
# x64-v4 x64 with sse2, avx, sse4.2 & popcount & avx2 & avx512 support
# x64-v3 x64 with sse2, avx, sse4.2 & popcount & avx2 support
# x64-v2 with sse2, avx, sse4.2 & popcount support
# x64 x64 with sse2 support
# a64 ARM v8

VC_FLAGS = /std:c17 /DUNICODE /utf-8 /D_CRT_SECURE_NO_DEPRECATE /I"..\include" /O2 /fp:fast /GS- /D NDEBUG /MT

vc-x64-v4:
# remove /vlen=256 for cl earlier than 14.42
cl $(VC_FLAGS) /arch:AVX512 /experimental:c11atomics /GL /vlen=256 /D__POPCNT__ /D__CRC32__ all.c ws2_32.lib /Fe..\bin\wEdax-x64-v4.exe /link /VERSION:4.6

vc-x64-v3:
cl $(VC_FLAGS) /arch:AVX2 /experimental:c11atomics /GL /D__POPCNT__ /D__CRC32__ all.c ws2_32.lib /Fe..\bin\wEdax-x64-v3.exe /link /VERSION:4.6

vc-x64-v2:
cl $(VC_FLAGS) /experimental:c11atomics /GL /D__SSE2__ /D__POPCNT__ /D__CRC32__ all.c ws2_32.lib /Fe..\bin\wEdax-x64-v2.exe /link /VERSION:4.6

vc-x64:
cl $(VC_FLAGS) /experimental:c11atomics /GL /D__SSE2__ all.c ws2_32.lib /Fe..\bin\wEdax-x64.exe /link /VERSION:4.6

vc-a64:
# vcvarsamd64_arm64.bat
cl $(VC_FLAGS) /experimental:c11atomics /GL /D__ARM_NEON all.c ws2_32.lib /Fe..\bin\wEdax-a64.exe /link /VERSION:4.6

clang-x64-v4:
clang-cl -mprefer-vector-width=256 $(VC_FLAGS) /U__STDC_NO_THREADS__ /arch:AVX512 all.c ws2_32.lib /Fe..\bin\wEdax-x64-v4.exe /link /VERSION:4.6

clang-x64-v3:
clang-cl $(VC_FLAGS) /U__STDC_NO_THREADS__ /arch:AVX2 all.c ws2_32.lib /Fe..\bin\wEdax-x64-v3.exe /link /VERSION:4.6

clang-x64-v2:
clang-cl -march=x86-64-v2 $(VC_FLAGS) /U__STDC_NO_THREADS__ all.c ws2_32.lib /Fe..\bin\wEdax-x64-v2.exe /link /VERSION:4.6

clang-x64:
clang-cl $(VC_FLAGS) /U__STDC_NO_THREADS__ all.c ws2_32.lib /Fe..\bin\wEdax-x64.exe /link /VERSION:4.6

clang-a64:
clang-cl --target=aarch64-win32-msvc $(VC_FLAGS) /U__STDC_NO_THREADS__ /D__ARM_NEON all.c ws2_32.lib /Fe..\bin\wEdax-a64.exe /link /VERSION:4.6

vc-pgo-x64-v3:
set VCPROFILE_PATH=..\src
cl $(VC_FLAGS) /arch:AVX2 /experimental:c11atomics /GL /D__POPCNT__ /D__CRC32__ all.c ws2_32.lib /Fe..\bin\wEdax-x64-v3.exe /link /ltcg:pgi /VERSION:4.6
cd ..\bin
wEdax-x64-v3 -l 60 -solve ..\problem\fforum-20-39.obf
wEdax-x64-v3 -l 18 -auto-store on -auto-start on -repeat 2 -auto-quit on -mode 2 -book-file book.pgo
del book.pgo book.pgo.store
cd ..\src
link all.obj ws2_32.lib /out:..\bin\wEdax-x64-v3.exe /ltcg:pgo /VERSION:4.6
del *.pgc ..\bin\*.pgd

vc-pgo-x64:
set VCPROFILE_PATH=..\src
cl $(VC_FLAGS) /experimental:c11atomics /GL /D__POPCNT__ /D__CRC32__ all.c ws2_32.lib /Fe..\bin\wEdax-x64.exe /link /ltcg:pgi /VERSION:4.6
cd ..\bin
wEdax-x64 -l 60 -solve ..\problem\fforum-20-39.obf
wEdax-x64 -l 18 -auto-store on -auto-start on -repeat 2 -auto-quit on -mode 2 -book-file book.pgo
del book.pgo book.pgo.store
cd ..\src
link all.obj ws2_32.lib /out:..\bin\wEdax-x64.exe /ltcg:pgo /VERSION:4.6
del *.pgc ..\bin\*.pgd

clean:
del -f pgopti* *.dyn all.gc* *~ *.p* *.obj

30 changes: 22 additions & 8 deletions src/bit.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ int bit_count_64(const uint64_t b)

return stdc_count_ones_ul(b); // C23 version

#elif defined(_MSC_VER)
#elif defined(_MSC_VER) && defined(__POPCNT__)

return __popcnt64(b); // Microsoft Visual C/C++ version

Expand Down Expand Up @@ -120,7 +120,7 @@ int bit_count_32(const uint32_t b)

return stdc_count_ones_ui(b); // C23 version

#elif defined(_MSC_VER)
#elif defined(_MSC_VER) && defined(__POPCNT__)

return __popcnt(b); // Microsoft Visual C/C++ version

Expand Down Expand Up @@ -150,14 +150,21 @@ int bit_leading_zeros_64(uint64_t b)

return stdc_leading_zeros_ul(b); // C23 version

#elif defined(_MSC_VER) && defined(__AVX2__)

return __lzcnt64(b); // Microsoft Visual C/C++ BMI1 version

#elif defined(_MSC_VER)

return __lzcnt64(b); // Microsoft Visual C/C++ version
unsigned long index;
if (_BitScanReverse64(&index, b))
return 63 - (int) index;
return 64;

#elif defined(__GNUC__)

// return b ? __builtin_clzl(b) : 64; // GNUC/CLANG version
return __builtin_clzl(b); // GNUC/CLANG version
// return b ? __builtin_clzll(b) : 64; // GNUC/CLANG version
return __builtin_clzll(b); // GNUC/CLANG version

#else

Expand All @@ -170,7 +177,7 @@ int bit_leading_zeros_64(uint64_t b)
c = b >> 4; if (c != 0) { n = n - 4; b = c; }
c = b >> 2; if (c != 0) { n = n - 2; b = c; }
c = b >> 1; if (c != 0) return n - 2;
return n - x;
return n - b;


#endif
Expand All @@ -188,9 +195,16 @@ int bit_leading_zeros_32(uint32_t b)

return stdc_leading_zeros_ui(b); // C23 version

#elif defined(_MSC_VER) && defined(__AVX2__)

return __lzcnt(b); // Microsoft Visual C/C++ BMI1 version

#elif defined(_MSC_VER)

return __lzcnt(b); // Microsoft Visual C/C++ version
unsigned long index;
if (_BitScanReverse(&index, b))
return 31 - (int) index;
return 32;

#elif defined(__GNUC__)

Expand All @@ -207,7 +221,7 @@ int bit_leading_zeros_32(uint32_t b)
c = b >> 4; if (c != 0) { n = n - 4; b = c; }
c = b >> 2; if (c != 0) { n = n - 2; b = c; }
c = b >> 1; if (c != 0) return n - 2;
return n - x;
return n - b;


#endif
Expand Down
4 changes: 4 additions & 0 deletions src/bit.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
#include <stdio.h>
#include <stdint.h>

#ifndef __has_builtin // Compatibility with non-clang compilers.
#define __has_builtin(x) 0
#endif

struct Random;

/* declaration */
Expand Down
6 changes: 3 additions & 3 deletions src/crc32c.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@
#include "settings.h"

#if !USE_CRC32C || (!defined(__CRC32__) && !defined(__ARM_FEATURE_CRC32))
#define SOFT_CRC32C true
#define SOFT_CRC32C 1 /* true */
#else
#define SOFT_CRC32C false
#define SOFT_CRC32C 0 /* false */
#endif

#include <assert.h>
#ifdef __ARM_FEATURE_CRC32
#include <arm_acle.h>
#elif defined(__CRC32__)
#ifdef __MSC_VER
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <x86intrin.h>
Expand Down
6 changes: 3 additions & 3 deletions src/flip_bitscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,9 @@ static const uint64_t FLIPPED_5_V[18] = {
*/
#if __has_builtin(__builtin_subcll)
static inline uint64_t OutflankToFlipmask(uint64_t outflank) {
uint64_t flipmask, cy;
flipmask = __builtin_subcl(outflank, 1, 0, &cy);
return __builtin_addcl(flipmask, 0, cy, &cy);
unsigned long long flipmask, cy;
flipmask = __builtin_subcll(outflank, 1, 0, &cy);
return __builtin_addcll(flipmask, 0, cy, &cy);
}
#elif (defined(_M_X64) && (_MSC_VER >= 1800)) || (defined(__x86_64__) && defined(__GNUC__) && (__GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2)))
static inline uint64_t OutflankToFlipmask(uint64_t outflank) {
Expand Down
8 changes: 4 additions & 4 deletions src/flip_carry_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,11 +374,11 @@ static const uint64_t FLIPPED_5_U[137] = {
/*
* Set all bits below the sole outflank bit if outfrank != 0
*/
#if __has_builtin(__builtin_subcl)
#if __has_builtin(__builtin_subcll)
static inline uint64_t OutflankToFlipmask(uint64_t outflank) {
uint64_t flipmask, cy;
flipmask = __builtin_subcl(outflank, 1, 0, &cy);
return __builtin_addcl(flipmask, 0, cy, &cy);
unsigned long long flipmask, cy;
flipmask = __builtin_subcll(outflank, 1, 0, &cy);
return __builtin_addcll(flipmask, 0, cy, &cy);
}
#elif (defined(_M_X64) && (_MSC_VER >= 1800)) || (defined(__x86_64__) && defined(__GNUC__) && (__GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2)))
static inline uint64_t OutflankToFlipmask(uint64_t outflank) {
Expand Down
6 changes: 3 additions & 3 deletions src/flip_neon_bitscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ static const uint64_t FLIPPED_4_H[19] = { // ...cbahg
*/
#if __has_builtin(__builtin_subcll)
static inline uint64_t OutflankToFlipmask(uint64_t outflank) {
uint64_t flipmask, cy;
flipmask = __builtin_subcl(outflank, 1, 0, &cy);
return __builtin_addcl(flipmask, 0, cy, &cy);
unsigned long long flipmask, cy;
flipmask = __builtin_subcll(outflank, 1, 0, &cy);
return __builtin_addcll(flipmask, 0, cy, &cy);
}
#else
#define OutflankToFlipmask(outflank) ((outflank) - (uint32_t) ((outflank) != 0))
Expand Down
6 changes: 5 additions & 1 deletion src/flip_sve_lzcnt.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ uint64_t flip(const int pos, const uint64_t P, const uint64_t O)

uint64_t board_flip(const Board *board, const int x)
{
return flip(x, P, O);
return flip(x, board->player, board->opponent);
}

uint64x2_t mm_flip(uint64x2_t OP, int pos)
{
return vdupq_n_u64(Flip(pos, vgetq_lane_u64(OP, 0), vgetq_lane_u64(OP, 1)));
}
12 changes: 6 additions & 6 deletions src/game.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ struct Random;
typedef struct Game {
Board initial_board;
struct {
uint16_t year;
uint8_t month;
uint8_t day;
uint8_t hour;
uint8_t minute;
uint8_t second;
int16_t year;
int8_t month;
int8_t day;
int8_t hour;
int8_t minute;
int8_t second;
} date;
char name[2][32];
uint8_t move[60];
Expand Down
26 changes: 18 additions & 8 deletions src/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,18 +257,28 @@ static void data_new(HashData *data, const HashStore *store)
}

/**
* @brief Initialize a new hash table item.
* @brief Prefetch the hash entry.
*
* This implementation tries to be robust against concurrency. Data are first
* set up in a local thread-safe structure, before being copied into the
* hashtable entry. Then the hashcode of the entry is xored with the thread
* safe structure ; so that any corrupted entry won't be readable.
* The hash entry may not be in the CPU cache and take long to read, so
* prefetch it as soon as the hash code is available.
*
* @param hashtable Hash table to fetch from.
* @param hashcode Hash code.
*/
void hash_prefetch(HashTable *hashtable, const uint64_t hashcode) {
#if defined(__GNUC__)
Hash *hash = hashtable->hash + (hashcode & hashtable->hash_mask);
__builtin_prefetch(hash);
__builtin_prefetch(hash + HASH_N_WAY - 1);
#if defined(__GNUC__)
__builtin_prefetch(hash);
__builtin_prefetch(hash + HASH_N_WAY - 1);
#elif defined(__SSE2__)
_mm_prefetch((char const *) hash, _MM_HINT_T0);
_mm_prefetch((char const *)(hash + HASH_N_WAY - 1), _MM_HINT_T0);
#elif defined(__ARM_ACLE)
__pld(hash);
__pld(hash + HASH_N_WAY - 1);
#elif defined(_M_ARM64)
__prefetch(hash);
__prefetch(hash + HASH_N_WAY - 1);
#endif
}

Expand Down
4 changes: 2 additions & 2 deletions src/search.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ const Selectivity selectivity_table [] = {

/** threshold values to try stability cutoff during NWS search */
// TODO: better values may exist.
const uint8_t NWS_STABILITY_THRESHOLD[] = { // 99 = unused value...
const int8_t NWS_STABILITY_THRESHOLD[] = { // 99 = unused value...
99, 99, 99, 99, 6, 8, 10, 12,
#if USE_SOLID
8, 10, 20, 22, 24, 26, 28, 30,
Expand All @@ -122,7 +122,7 @@ const uint8_t NWS_STABILITY_THRESHOLD[] = { // 99 = unused value...

/** threshold values to try stability cutoff during PVS search */
// TODO: better values may exist.
const uint8_t PVS_STABILITY_THRESHOLD[] = { // 99 = unused value...
const int8_t PVS_STABILITY_THRESHOLD[] = { // 99 = unused value...
99, 99, 99, 99, -2, 0, 2, 4,
6, 8, 12, 14, 16, 18, 20, 22,
24, 26, 28, 30, 32, 34, 36, 38,
Expand Down
4 changes: 2 additions & 2 deletions src/search.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ struct Node;
extern const uint8_t QUADRANT_ID[];
extern const Selectivity selectivity_table[];
extern const int NO_SELECTIVITY;
extern const uint8_t NWS_STABILITY_THRESHOLD[];
extern const uint8_t PVS_STABILITY_THRESHOLD[];
extern const int8_t NWS_STABILITY_THRESHOLD[];
extern const int8_t PVS_STABILITY_THRESHOLD[];
extern const uint8_t SQUARE_TYPE[];

/* function definition */
Expand Down
2 changes: 1 addition & 1 deletion src/settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
#define MOVE_GENERATOR MOVE_GENERATOR_AVX512CD
#elif defined __AVX2__
#define MOVE_GENERATOR MOVE_GENERATOR_AVX_ACEPCK
#elif defined __SSE__
#elif defined __SSE2__
#define MOVE_GENERATOR MOVE_GENERATOR_CARRY_64
#elif defined __ARM_NEON
#define MOVE_GENERATOR MOVE_GENERATOR_NEON_BITSCAN
Expand Down
Loading

0 comments on commit aa75ba9

Please sign in to comment.