Skip to content

Commit d84f03c

Browse files
authored
Enable build for ROCm/HIPBLAS (#235)
1 parent 1171213 commit d84f03c

File tree

2 files changed

+31
-6
lines changed

2 files changed

+31
-6
lines changed

Makefile

+18-2
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ ifeq ($(UNAME_S),Haiku)
7070
CXXFLAGS += -pthread
7171
endif
7272

73+
# GPGPU specific
74+
GGML_CUDA_OBJ_PATH=CMakeFiles/ggml.dir/ggml-cuda.cu.o
75+
76+
7377
# Architecture specific
7478
# TODO: probably these flags need to be tweaked on some architectures
7579
# feel free to update the Makefile for your architecture and send a pull request or issue
@@ -137,6 +141,18 @@ ifeq ($(BUILD_TYPE),cublas)
137141
EXTRA_TARGETS+=llama.cpp/ggml-cuda.o
138142
endif
139143

144+
ifeq ($(BUILD_TYPE),hipblas)
145+
ROCM_HOME ?= "/opt/rocm"
146+
CXX="$(ROCM_HOME)"/llvm/bin/clang++
147+
CC="$(ROCM_HOME)"/llvm/bin/clang
148+
EXTRA_LIBS=
149+
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
150+
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
151+
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
152+
EXTRA_TARGETS+=llama.cpp/ggml-cuda.o
153+
GGML_CUDA_OBJ_PATH=CMakeFiles/ggml-rocm.dir/ggml-cuda.cu.o
154+
endif
155+
140156
ifeq ($(BUILD_TYPE),clblas)
141157
EXTRA_LIBS=
142158
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON
@@ -183,10 +199,10 @@ llama.cpp/ggml-alloc.o:
183199

184200
llama.cpp/ggml.o: prepare
185201
mkdir -p build
186-
cd build && cmake ../llama.cpp $(CMAKE_ARGS) && VERBOSE=1 cmake --build . --config Release && cp -rf CMakeFiles/ggml.dir/ggml.c.o ../llama.cpp/ggml.o
202+
cd build && CC="$(CC)" CXX="$(CXX)" cmake ../llama.cpp $(CMAKE_ARGS) && VERBOSE=1 cmake --build . --config Release && cp -rf CMakeFiles/ggml.dir/ggml.c.o ../llama.cpp/ggml.o
187203

188204
llama.cpp/ggml-cuda.o: llama.cpp/ggml.o
189-
cd build && cp -rf CMakeFiles/ggml.dir/ggml-cuda.cu.o ../llama.cpp/ggml-cuda.o
205+
cd build && cp -rf "$(GGML_CUDA_OBJ_PATH)" ../llama.cpp/ggml-cuda.o
190206

191207
llama.cpp/ggml-opencl.o: llama.cpp/ggml.o
192208
cd build && cp -rf CMakeFiles/ggml.dir/ggml-opencl.cpp.o ../llama.cpp/ggml-opencl.o

README.md

+13-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
[LLama.cpp](https://github.com/ggerganov/llama.cpp) golang bindings.
44

5-
The go-llama.cpp bindings are high level, as such most of the work is kept into the C/C++ code to avoid any extra computational cost, be more performant and lastly ease out maintenance, while keeping the usage as simple as possible.
5+
The go-llama.cpp bindings are high level, as such most of the work is kept into the C/C++ code to avoid any extra computational cost, be more performant and lastly ease out maintenance, while keeping the usage as simple as possible.
66

77
Check out [this](https://about.sourcegraph.com/blog/go/gophercon-2018-adventures-in-cgo-performance) and [this](https://www.cockroachlabs.com/blog/the-cost-and-complexity-of-cgo/) write-ups which summarize the impact of a low-level interface which calls C functions from Go.
88

@@ -57,6 +57,15 @@ BUILD_TYPE=cublas make libbinding.a
5757
CGO_LDFLAGS="-lcublas -lcudart -L/usr/local/cuda/lib64/" LIBRARY_PATH=$PWD C_INCLUDE_PATH=$PWD go run ./examples -m "/model/path/here" -t 14
5858
```
5959

60+
### ROCM
61+
62+
To build with ROCM (HIPBLAS):
63+
64+
```
65+
BUILD_TYPE=hipblas make libbinding.a
66+
CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ CGO_LDFLAGS="-O3 --hip-link --rtlib=compiler-rt -unwindlib=libgcc -lrocblas -lhipblas" LIBRARY_PATH=$PWD C_INCLUDE_PATH=$PWD go run ./examples -m "/model/path/here" -ngl 64 -t 32
67+
```
68+
6069
### OpenCL
6170

6271
```
@@ -68,9 +77,9 @@ CGO_LDFLAGS="-lOpenCL -lclblast -L/usr/local/lib64/" LIBRARY_PATH=$PWD C_INCLUDE
6877
You should see something like this from the output when using the GPU:
6978

7079
```
71-
ggml_opencl: selecting platform: 'Intel(R) OpenCL HD Graphics'
72-
ggml_opencl: selecting device: 'Intel(R) Graphics [0x46a6]'
73-
ggml_opencl: device FP16 support: true
80+
ggml_opencl: selecting platform: 'Intel(R) OpenCL HD Graphics'
81+
ggml_opencl: selecting device: 'Intel(R) Graphics [0x46a6]'
82+
ggml_opencl: device FP16 support: true
7483
```
7584

7685
## GPU offloading

0 commit comments

Comments
 (0)