Enable build for ROCm/HIPBLAS (#235)

65a · web-flow · commit d84f03c81a5e · 2023-09-24T00:10:31.000+02:00
diff --git a/Makefile b/Makefile
@@ -70,6 +70,10 @@ ifeq ($(UNAME_S),Haiku)
 	CXXFLAGS += -pthread
 endif
 
+# GPGPU specific
+GGML_CUDA_OBJ_PATH=CMakeFiles/ggml.dir/ggml-cuda.cu.o
+
+
 # Architecture specific
 # TODO: probably these flags need to be tweaked on some architectures
 #       feel free to update the Makefile for your architecture and send a pull request or issue
@@ -137,6 +141,18 @@ ifeq ($(BUILD_TYPE),cublas)
 	EXTRA_TARGETS+=llama.cpp/ggml-cuda.o
 endif
 
+ifeq ($(BUILD_TYPE),hipblas)
+	ROCM_HOME ?= "/opt/rocm"
+	CXX="$(ROCM_HOME)"/llvm/bin/clang++
+	CC="$(ROCM_HOME)"/llvm/bin/clang
+	EXTRA_LIBS=
+	GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
+	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
+	CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
+	EXTRA_TARGETS+=llama.cpp/ggml-cuda.o
+	GGML_CUDA_OBJ_PATH=CMakeFiles/ggml-rocm.dir/ggml-cuda.cu.o
+endif
+
 ifeq ($(BUILD_TYPE),clblas)
 	EXTRA_LIBS=
 	CMAKE_ARGS+=-DLLAMA_CLBLAST=ON
@@ -183,10 +199,10 @@ llama.cpp/ggml-alloc.o:
 
 llama.cpp/ggml.o: prepare
 	mkdir -p build
-	cd build && cmake ../llama.cpp $(CMAKE_ARGS) && VERBOSE=1 cmake --build . --config Release && cp -rf CMakeFiles/ggml.dir/ggml.c.o ../llama.cpp/ggml.o
+	cd build && CC="$(CC)" CXX="$(CXX)" cmake ../llama.cpp $(CMAKE_ARGS) && VERBOSE=1 cmake --build . --config Release && cp -rf CMakeFiles/ggml.dir/ggml.c.o ../llama.cpp/ggml.o
 
 llama.cpp/ggml-cuda.o: llama.cpp/ggml.o
-	cd build && cp -rf CMakeFiles/ggml.dir/ggml-cuda.cu.o ../llama.cpp/ggml-cuda.o
+	cd build && cp -rf "$(GGML_CUDA_OBJ_PATH)" ../llama.cpp/ggml-cuda.o
 
 llama.cpp/ggml-opencl.o: llama.cpp/ggml.o
 	cd build && cp -rf CMakeFiles/ggml.dir/ggml-opencl.cpp.o ../llama.cpp/ggml-opencl.o
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 [LLama.cpp](https://github.com/ggerganov/llama.cpp) golang bindings.
 
-The go-llama.cpp bindings are high level, as such most of the work is kept into the C/C++ code to avoid any extra computational cost, be more performant and lastly ease out maintenance, while keeping the usage as simple as possible. 
+The go-llama.cpp bindings are high level, as such most of the work is kept into the C/C++ code to avoid any extra computational cost, be more performant and lastly ease out maintenance, while keeping the usage as simple as possible.
 
 Check out [this](https://about.sourcegraph.com/blog/go/gophercon-2018-adventures-in-cgo-performance) and [this](https://www.cockroachlabs.com/blog/the-cost-and-complexity-of-cgo/) write-ups which summarize the impact of a low-level interface which calls C functions from Go.
 
@@ -57,6 +57,15 @@ BUILD_TYPE=cublas make libbinding.a
 CGO_LDFLAGS="-lcublas -lcudart -L/usr/local/cuda/lib64/" LIBRARY_PATH=$PWD C_INCLUDE_PATH=$PWD go run ./examples -m "/model/path/here" -t 14
 ```
 
+### ROCM
+
+To build with ROCM (HIPBLAS):
+
+```
+BUILD_TYPE=hipblas make libbinding.a
+CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ CGO_LDFLAGS="-O3 --hip-link --rtlib=compiler-rt -unwindlib=libgcc -lrocblas -lhipblas" LIBRARY_PATH=$PWD C_INCLUDE_PATH=$PWD go run ./examples -m "/model/path/here" -ngl 64 -t 32
+```
+
 ### OpenCL
 
 ```
@@ -68,9 +77,9 @@ CGO_LDFLAGS="-lOpenCL -lclblast -L/usr/local/lib64/" LIBRARY_PATH=$PWD C_INCLUDE
 You should see something like this from the output when using the GPU:
 
 ```
-ggml_opencl: selecting platform: 'Intel(R) OpenCL HD Graphics'                                            
-ggml_opencl: selecting device: 'Intel(R) Graphics [0x46a6]'                                               
-ggml_opencl: device FP16 support: true  
+ggml_opencl: selecting platform: 'Intel(R) OpenCL HD Graphics'
+ggml_opencl: selecting device: 'Intel(R) Graphics [0x46a6]'
+ggml_opencl: device FP16 support: true
 ```
 
 ## GPU offloading