Skip to content

Commit e94dfac

Browse files
committed
Implement ONNXRuntime backend
1 parent 43069b6 commit e94dfac

17 files changed

+1113
-33
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ out.txt
4949
# For clion IDE
5050
.idea
5151

52+
# For vscode
53+
.vscode
54+
5255
# For cmake
5356
CMakeCache.txt
5457
CMakeFiles/

cpp/CMakeLists.txt

Lines changed: 103 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,25 +22,9 @@ if(NOT WIN32)
2222
set(ColorBoldRed "${ColorRed}${ColorBold}")
2323
endif()
2424

25-
#--------------------------- CMAKE VARIABLES (partly for Cmake GUI) ----------------------------------------------------
26-
27-
set(USE_BACKEND CACHE STRING "Neural net backend")
28-
string(TOUPPER "${USE_BACKEND}" USE_BACKEND)
29-
set_property(CACHE USE_BACKEND PROPERTY STRINGS "" CUDA OPENCL EIGEN)
30-
31-
set(USE_TCMALLOC 0 CACHE BOOL "Use TCMalloc")
32-
set(NO_GIT_REVISION 0 CACHE BOOL "Disable embedding the git revision into the compiled exe")
33-
set(Boost_USE_STATIC_LIBS_ON 0 CACHE BOOL "Compile against boost statically instead of dynamically")
34-
set(USE_AVX2 0 CACHE BOOL "Compile with AVX2")
35-
set(USE_BIGGER_BOARDS_EXPENSIVE 0 CACHE BOOL "Allow boards up to size 29. Compiling with this Will use more memory and slow down KataGo, even when playing on boards of size 19.")
36-
37-
#--------------------------- NEURAL NET BACKEND ------------------------------------------------------------------------
38-
39-
message(STATUS "Building 'katago' executable for GTP engine and other tools.")
40-
if(USE_BACKEND STREQUAL "CUDA")
41-
message(STATUS "-DUSE_BACKEND=CUDA, using CUDA backend.")
42-
25+
#--------------------------- CUDA MACRO -------------------------------------------------------------------------------
4326

27+
macro(CONFIGURE_CUDA)
4428
# Ensure dynamic cuda linking (Versions prior to 3.17)
4529
if (${CMAKE_VERSION} VERSION_LESS "3.17")
4630
set(CMAKE_CUDA_FLAGS "" CACHE STRING "")
@@ -145,6 +129,26 @@ if(USE_BACKEND STREQUAL "CUDA")
145129
"
146130
)
147131
endif()
132+
endmacro()
133+
134+
#--------------------------- CMAKE VARIABLES (partly for Cmake GUI) ----------------------------------------------------
135+
136+
set(USE_BACKEND CACHE STRING "Neural net backend")
137+
string(TOUPPER "${USE_BACKEND}" USE_BACKEND)
138+
set_property(CACHE USE_BACKEND PROPERTY STRINGS "" CUDA OPENCL EIGEN ONNXRUNTIME)
139+
140+
set(USE_TCMALLOC 0 CACHE BOOL "Use TCMalloc")
141+
set(NO_GIT_REVISION 0 CACHE BOOL "Disable embedding the git revision into the compiled exe")
142+
set(Boost_USE_STATIC_LIBS_ON 0 CACHE BOOL "Compile against boost statically instead of dynamically")
143+
set(USE_AVX2 0 CACHE BOOL "Compile with AVX2")
144+
set(USE_BIGGER_BOARDS_EXPENSIVE 0 CACHE BOOL "Allow boards up to size 29. Compiling with this Will use more memory and slow down KataGo, even when playing on boards of size 19.")
145+
146+
#--------------------------- NEURAL NET BACKEND ------------------------------------------------------------------------
147+
148+
message(STATUS "Building 'katago' executable for GTP engine and other tools.")
149+
if(USE_BACKEND STREQUAL "CUDA")
150+
message(STATUS "-DUSE_BACKEND=CUDA, using CUDA backend.")
151+
configure_cuda()
148152
elseif(USE_BACKEND STREQUAL "OPENCL")
149153
message(STATUS "-DUSE_BACKEND=OPENCL, using OpenCL backend.")
150154
set(NEURALNET_BACKEND_SOURCES
@@ -161,8 +165,28 @@ elseif(USE_BACKEND STREQUAL "EIGEN")
161165
set(NEURALNET_BACKEND_SOURCES
162166
neuralnet/eigenbackend.cpp
163167
)
168+
elseif(USE_BACKEND STREQUAL "ONNXRUNTIME")
169+
message(STATUS "-DUSE_BACKEND=ONNXRUNTIME, using ONNXRuntime backend.")
170+
set(ORT_CUDA 0 CACHE BOOL "Use CUDA execution provider for ONNXRuntime.")
171+
set(ORT_TENSORRT 0 CACHE BOOL "Use TensorRT execution provider for ONNXRuntime.")
172+
set(ORT_DIRECTML 0 CACHE BOOL "Use DirectML execution provider for ONNXRuntime.")
173+
set(ORT_MIGRAPHX 0 CACHE BOOL "Use MIGraphX execution provider for ONNXRuntime.")
174+
if(ORT_CUDA OR ORT_TENSORRT)
175+
configure_cuda()
176+
endif()
177+
if(ORT_MIGRAPHX)
178+
set(NEURALNET_BACKEND_SOURCES
179+
neuralnet/ortbackend.cpp
180+
neuralnet/openclhelpers.cpp
181+
)
182+
else()
183+
set(NEURALNET_BACKEND_SOURCES
184+
neuralnet/ortbackend.cpp
185+
)
186+
endif()
187+
164188
elseif(USE_BACKEND STREQUAL "")
165-
message(WARNING "${ColorBoldRed}WARNING: Using dummy neural net backend, intended for non-neural-net testing only, will fail on any code path requiring a neural net. To use neural net, specify -DUSE_BACKEND=CUDA or -DUSE_BACKEND=OPENCL or -DUSE_BACKEND=EIGEN to compile with the respective backend.${ColorReset}")
189+
message(WARNING "${ColorBoldRed}WARNING: Using dummy neural net backend, intended for non-neural-net testing only, will fail on any code path requiring a neural net. To use neural net, specify -DUSE_BACKEND=CUDA or -DUSE_BACKEND=OPENCL or -DUSE_BACKEND=ONNXRUNTIME or -DUSE_BACKEND=EIGEN to compile with the respective backend.${ColorReset}")
166190
set(NEURALNET_BACKEND_SOURCES neuralnet/dummybackend.cpp)
167191
else()
168192
message(FATAL_ERROR "Unrecognized backend: " ${USE_BACKEND})
@@ -327,6 +351,66 @@ elseif(USE_BACKEND STREQUAL "EIGEN")
327351
endif()
328352
endif()
329353
endif()
354+
elseif(USE_BACKEND STREQUAL "ONNXRUNTIME")
355+
target_compile_definitions(katago PRIVATE USE_ONNXRUNTIME_BACKEND)
356+
set(ORT_LIB_DIR CACHE STRING "ONNXRuntime library location")
357+
set(ORT_INCLUDE_DIR CACHE STRING "ONNXRuntime header files location")
358+
message(STATUS "ORT_LIB_DIR: " ${ORT_LIB_DIR})
359+
message(STATUS "ORT_INCLUDE_DIR: " ${ORT_INCLUDE_DIR})
360+
include_directories(${ORT_INCLUDE_DIR})
361+
if(EXISTS ${ORT_INCLUDE_DIR}/core/session)
362+
include_directories(${ORT_INCLUDE_DIR}/core/session)
363+
endif()
364+
if(EXISTS ${ORT_INCLUDE_DIR}/core/providers/cpu)
365+
include_directories(${ORT_INCLUDE_DIR}/core/providers/cpu)
366+
endif()
367+
find_library(ORT_LIBRARY NAMES onnxruntime PATHS ${ORT_LIB_DIR})
368+
if(NOT ORT_LIBRARY)
369+
message(FATAL_ERROR "Could not find onnxruntime")
370+
endif()
371+
target_link_libraries(katago ${ORT_LIBRARY})
372+
if(ORT_CUDA)
373+
target_compile_definitions(katago PRIVATE USE_ORT_CUDA)
374+
endif()
375+
if(ORT_TENSORRT)
376+
target_compile_definitions(katago PRIVATE USE_ORT_TENSORRT)
377+
set(TENSORRT_LIB_DIR CACHE STRING "TensorRT library location")
378+
set(TENSORRT_INCLUDE_DIR CACHE STRING "TensorRT header file location")
379+
include_directories(${TENSORRT_INCLUDE_DIR})
380+
find_library(TENSORRT_LIBRARY NAMES nvinfer PATHS ${TENSORRT_LIB_DIR})
381+
if(NOT TENSORRT_LIBRARY)
382+
message(FATAL_ERROR "Could not find nvinfer")
383+
endif()
384+
target_link_libraries(katago ${TENSORRT_LIBRARY})
385+
if(EXISTS ${ORT_INCLUDE_DIR}/core/providers/tensorrt)
386+
include_directories(${ORT_INCLUDE_DIR}/core/providers/tensorrt)
387+
endif()
388+
endif()
389+
if(ORT_CUDA OR ORT_TENSORRT)
390+
find_package(CUDA REQUIRED)
391+
find_path(CUDNN_INCLUDE_DIR cudnn.h HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES cuda/include include)
392+
if((NOT CUDNN_INCLUDE_DIR))
393+
message(ERROR "${ColorBoldRed} cudnn.h was NOT found, specify CUDNN_INCLUDE_DIR to indicate where it is. ${ColorReset}")
394+
endif()
395+
find_library(CUDNN_LIBRARY libcudnn.so PATHS /usr/local/cuda/lib64 /opt/cuda/lib64)
396+
include_directories(SYSTEM ${CUDA_INCLUDE_DIRS} ${CUDNN_INCLUDE_DIR}) #SYSTEM is for suppressing some compiler warnings in thrust libraries
397+
target_link_libraries(katago ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_LIBRARIES})
398+
if(EXISTS ${ORT_INCLUDE_DIR}/core/providers/cuda)
399+
include_directories(${ORT_INCLUDE_DIR}/core/providers/cuda)
400+
endif()
401+
endif()
402+
if(ORT_DIRECTML)
403+
target_compile_definitions(katago PRIVATE USE_ORT_DIRECTML)
404+
if(EXISTS ${ORT_INCLUDE_DIR}/core/providers/directml)
405+
include_directories(${ORT_INCLUDE_DIR}/core/providers/directml)
406+
endif()
407+
endif()
408+
if(ORT_MIGRAPHX)
409+
target_compile_definitions(katago PRIVATE USE_ORT_MIGRAPHX)
410+
if(EXISTS ${ORT_INCLUDE_DIR}/core/providers/migraphx)
411+
include_directories(${ORT_INCLUDE_DIR}/core/providers/migraphx)
412+
endif()
413+
endif()
330414
endif()
331415

332416
if(USE_BIGGER_BOARDS_EXPENSIVE)

cpp/command/benchmark.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,10 @@ int MainCmds::benchmark(int argc, const char* const* argv) {
209209
cout << "If you have a strong GPU capable of FP16 tensor cores (e.g. RTX2080), "
210210
<< "using the Cuda version of KataGo instead may give a mild performance boost." << endl;
211211
#endif
212+
#ifdef USE_ONNXRUNTIME_BACKEND
213+
cout << "You are currently using the ONNXRuntime version of KataGo with "
214+
<< nnEval->getOnnxRuntimeExecutionProvider() << " execution provider." << endl;
215+
#endif
212216
#ifdef USE_EIGEN_BACKEND
213217
cout << "You are currently using the Eigen (CPU) version of KataGo. Due to having no GPU, it may be slow." << endl;
214218
#endif
@@ -564,6 +568,7 @@ int MainCmds::genconfig(int argc, const char* const* argv, const char* firstComm
564568
int64_t configMaxPlayouts = ((int64_t)1) << 50;
565569
double configMaxTime = 1e20;
566570
double configMaxPonderTime = -1.0;
571+
string configOnnxRuntimeExecutionProvider;
567572
vector<int> configDeviceIdxs;
568573
int configNNCacheSizePowerOfTwo = 20;
569574
int configNNMutexPoolSizePowerOfTwo = 16;
@@ -693,6 +698,41 @@ int MainCmds::genconfig(int argc, const char* const* argv, const char* firstComm
693698
});
694699
}
695700

701+
#ifdef USE_ONNXRUNTIME_BACKEND
702+
cout << endl;
703+
cout << "=========================================================================" << endl;
704+
cout << "ONNXRUNTIME EXECUTION PROVIDER" << endl;
705+
706+
{
707+
vector<string> executionProviders;
708+
#ifdef USE_ORT_CUDA
709+
executionProviders.push_back("CUDA");
710+
#endif
711+
#ifdef USE_ORT_TENSORRT
712+
executionProviders.push_back("TensorRT");
713+
#endif
714+
#ifdef USE_ORT_DIRECTML
715+
executionProviders.push_back("DirectML");
716+
#endif
717+
#ifdef USE_ORT_MIGRAPHX
718+
executionProviders.push_back("MIGraphX");
719+
#endif
720+
721+
cout << endl;
722+
cout << "Available ONNXRuntime execution providers:" << endl;
723+
for(const auto provider: executionProviders) {
724+
cout << provider << " ";
725+
}
726+
cout << endl << endl;
727+
728+
string prompt = "Specify an execution provider for ONNXRuntime. Leave blank to use the first available provider.\n";
729+
promptAndParseInput(prompt, [&](const string& line) {
730+
if(line == "") configOnnxRuntimeExecutionProvider = executionProviders[0];
731+
else configOnnxRuntimeExecutionProvider = line;
732+
});
733+
}
734+
#endif
735+
696736
cout << endl;
697737
cout << "=========================================================================" << endl;
698738
cout << "GPUS AND RAM" << endl;
@@ -701,7 +741,11 @@ int MainCmds::genconfig(int argc, const char* const* argv, const char* firstComm
701741
{
702742
cout << endl;
703743
cout << "Finding available GPU-like devices..." << endl;
744+
#ifndef USE_ONNXRUNTIME_BACKEND
704745
NeuralNet::printDevices();
746+
#else
747+
NeuralNet::printDevices(configOnnxRuntimeExecutionProvider);
748+
#endif
705749
cout << endl;
706750

707751
string prompt =
@@ -789,6 +833,7 @@ int MainCmds::genconfig(int argc, const char* const* argv, const char* firstComm
789833
configMaxPlayouts,
790834
configMaxTime,
791835
configMaxPonderTime,
836+
configOnnxRuntimeExecutionProvider,
792837
configDeviceIdxs,
793838
configNNCacheSizePowerOfTwo,
794839
configNNMutexPoolSizePowerOfTwo,

cpp/core/rand.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ void Rand::init()
248248
int* heapVal = new int[1];
249249
size_t stackAddr = (size_t)(&stackVal);
250250
size_t heapAddr = (size_t)(heapVal);
251-
delete heapVal;
251+
delete[] heapVal;
252252
s += "|";
253253
s += Global::uint64ToHexString((uint64_t)stackAddr);
254254
s += Global::uint64ToHexString((uint64_t)heapAddr);

cpp/neuralnet/cudabackend.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2584,6 +2584,8 @@ ComputeContext* NeuralNet::createComputeContext(
25842584
int nnXLen,
25852585
int nnYLen,
25862586
const string& openCLTunerFile,
2587+
const string& onnxOptModelFile,
2588+
const string& onnxRuntimeExecutionProvider,
25872589
const string& homeDataDirOverride,
25882590
bool openCLReTunePerBoardSize,
25892591
enabled_t useFP16Mode,
@@ -2593,6 +2595,8 @@ ComputeContext* NeuralNet::createComputeContext(
25932595
(void)gpuIdxs;
25942596
(void)logger;
25952597
(void)openCLTunerFile;
2598+
(void)onnxOptModelFile;
2599+
(void)onnxRuntimeExecutionProvider;
25962600
(void)homeDataDirOverride;
25972601
(void)openCLReTunePerBoardSize;
25982602
(void)loadedModel;

cpp/neuralnet/dummybackend.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ ComputeContext* NeuralNet::createComputeContext(
1919
int nnXLen,
2020
int nnYLen,
2121
const string& openCLTunerFile,
22+
const string& onnxOptModelFile,
23+
const string& onnxRuntimeExecutionProvider,
2224
const string& homeDataDirOverride,
2325
bool openCLReTunePerBoardSize,
2426
enabled_t useFP16Mode,
@@ -30,6 +32,8 @@ ComputeContext* NeuralNet::createComputeContext(
3032
(void)nnXLen;
3133
(void)nnYLen;
3234
(void)openCLTunerFile;
35+
(void)onnxOptModelFile;
36+
(void)onnxRuntimeExecutionProvider;
3337
(void)homeDataDirOverride;
3438
(void)openCLReTunePerBoardSize;
3539
(void)useFP16Mode;

cpp/neuralnet/eigenbackend.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1429,6 +1429,8 @@ ComputeContext* NeuralNet::createComputeContext(
14291429
int nnXLen,
14301430
int nnYLen,
14311431
const string& openCLTunerFile,
1432+
const string& onnxOptModelFile,
1433+
const string& onnxRuntimeExecutionProvider,
14321434
const string& homeDataDirOverride,
14331435
bool openCLReTunePerBoardSize,
14341436
enabled_t useFP16Mode,
@@ -1438,6 +1440,8 @@ ComputeContext* NeuralNet::createComputeContext(
14381440
(void)gpuIdxs;
14391441
(void)logger;
14401442
(void)openCLTunerFile;
1443+
(void)onnxOptModelFile;
1444+
(void)onnxRuntimeExecutionProvider;
14411445
(void)homeDataDirOverride;
14421446
(void)openCLReTunePerBoardSize;
14431447

cpp/neuralnet/nneval.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ NNEvaluator::NNEvaluator(
6666
int nnMutexPoolSizePowerofTwo,
6767
bool skipNeuralNet,
6868
const string& openCLTunerFile,
69+
const string& onnxOptModelFile,
70+
const string& onnxRuntimeExecutionProvider,
6971
const string& homeDataDirOverride,
7072
bool openCLReTunePerBoardSize,
7173
enabled_t useFP16Mode,
@@ -83,6 +85,7 @@ NNEvaluator::NNEvaluator(
8385
requireExactNNLen(rExactNNLen),
8486
policySize(NNPos::getPolicySize(xLen,yLen)),
8587
inputsUseNHWC(iUseNHWC),
88+
ortExecutionProvider(onnxRuntimeExecutionProvider),
8689
usingFP16Mode(useFP16Mode),
8790
usingNHWCMode(useNHWCMode),
8891
numThreads(numThr),
@@ -145,8 +148,8 @@ NNEvaluator::NNEvaluator(
145148
inputsVersion = NNModelVersion::getInputsVersion(modelVersion);
146149
computeContext = NeuralNet::createComputeContext(
147150
gpuIdxs,logger,nnXLen,nnYLen,
148-
openCLTunerFile,homeDataDirOverride,openCLReTunePerBoardSize,
149-
usingFP16Mode,usingNHWCMode,loadedModel
151+
openCLTunerFile,onnxOptModelFile,onnxRuntimeExecutionProvider,
152+
homeDataDirOverride,openCLReTunePerBoardSize,usingFP16Mode,usingNHWCMode,loadedModel
150153
);
151154
}
152155
else {
@@ -224,6 +227,9 @@ int NNEvaluator::getNNXLen() const {
224227
int NNEvaluator::getNNYLen() const {
225228
return nnYLen;
226229
}
230+
string NNEvaluator::getOnnxRuntimeExecutionProvider() const{
231+
return ortExecutionProvider;
232+
}
227233
enabled_t NNEvaluator::getUsingFP16Mode() const {
228234
return usingFP16Mode;
229235
}

cpp/neuralnet/nneval.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ class NNEvaluator {
8989
int nnMutexPoolSizePowerofTwo,
9090
bool debugSkipNeuralNet,
9191
const std::string& openCLTunerFile,
92+
const std::string& onnxOptModelFile,
93+
const std::string& onnxRuntimeExecutionProvider,
9294
const std::string& homeDataDirOverride,
9395
bool openCLReTunePerBoardSize,
9496
enabled_t useFP16Mode,
@@ -113,6 +115,7 @@ class NNEvaluator {
113115
int getNumServerThreads() const;
114116
int getNNXLen() const;
115117
int getNNYLen() const;
118+
std::string getOnnxRuntimeExecutionProvider() const;
116119
enabled_t getUsingFP16Mode() const;
117120
enabled_t getUsingNHWCMode() const;
118121

@@ -172,6 +175,7 @@ class NNEvaluator {
172175
const bool requireExactNNLen;
173176
const int policySize;
174177
const bool inputsUseNHWC;
178+
const std::string ortExecutionProvider;
175179
const enabled_t usingFP16Mode;
176180
const enabled_t usingNHWCMode;
177181
int numThreads;

cpp/neuralnet/nninterface.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,16 @@ namespace NeuralNet {
3636

3737
//Print available backend devices
3838
void printDevices();
39+
void printDevices(const std::string& ortExecutionProvider);
40+
#if defined(USE_ORT_CUDA) || defined(USE_ORT_TENSORRT)
41+
void printCUDADevices();
42+
#endif
43+
#ifdef USE_ORT_DIRECTML
44+
void printDirectMLDevices();
45+
#endif
46+
#ifdef USE_ORT_MIGRAPHX
47+
void printOpenCLDevices();
48+
#endif
3949

4050
// Model I/O -----------------------------------------------------------------
4151

@@ -59,6 +69,8 @@ namespace NeuralNet {
5969
int nnXLen,
6070
int nnYLen,
6171
const std::string& openCLTunerFile,
72+
const std::string& onnxOptModelFile,
73+
const std::string& onnxRuntimeExecutionProvider,
6274
const std::string& homeDataDirOverride,
6375
bool openCLReTunePerBoardSize,
6476
enabled_t useFP16Mode,

0 commit comments

Comments
 (0)