@@ -398,6 +398,7 @@ ifdef LLAMA_CUBLAS
398
398
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
399
399
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
400
400
OBJS += ggml-cuda.o
401
+ OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
401
402
MK_NVCCFLAGS += -use_fast_math
402
403
ifdef LLAMA_FATAL_WARNINGS
403
404
MK_NVCCFLAGS += -Werror all-warnings
@@ -458,12 +459,23 @@ endif # LLAMA_CUDA_NO_PEER_COPY
458
459
ifdef LLAMA_CUDA_CCBIN
459
460
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
460
461
endif
461
- ggml-cuda.o : ggml-cuda.cu ggml-cuda.h ggml-common.h
462
+
462
463
ifdef JETSON_EOL_MODULE_DETECT
464
+ define NVCC_COMPILE
463
465
$(NVCC ) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS ) $(CPPFLAGS ) -Xcompiler "$(CUDA_CXXFLAGS ) " -c $< -o $@
466
+ endef # NVCC_COMPILE
464
467
else
468
+ define NVCC_COMPILE
465
469
$(NVCC ) $(NVCCFLAGS ) $(CPPFLAGS ) -Xcompiler "$(CUDA_CXXFLAGS ) " -c $< -o $@
470
+ endef # NVCC_COMPILE
466
471
endif # JETSON_EOL_MODULE_DETECT
472
+
473
+ ggml-cuda/% .o : ggml-cuda/% .cu ggml-cuda/% .cuh ggml.h ggml-common.h ggml-cuda/common.cuh
474
+ $(NVCC_COMPILE )
475
+
476
+ ggml-cuda.o : ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/* .cuh)
477
+ $(NVCC_COMPILE )
478
+
467
479
endif # LLAMA_CUBLAS
468
480
469
481
ifdef LLAMA_CLBLAST
@@ -510,7 +522,6 @@ ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
510
522
endif # LLAMA_VULKAN
511
523
512
524
ifdef LLAMA_HIPBLAS
513
-
514
525
ifeq ($(wildcard /opt/rocm),)
515
526
ROCM_PATH ?= /usr
516
527
GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
@@ -539,8 +550,13 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
539
550
HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
540
551
endif # LLAMA_CUDA_NO_PEER_COPY
541
552
OBJS += ggml-cuda.o
542
- ggml-cuda.o : ggml-cuda.cu ggml-cuda.h
553
+ OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
554
+ ggml-cuda.o : ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/* .cuh)
555
+ $(HIPCC ) $(CXXFLAGS ) $(HIPFLAGS ) -x hip -c -o $@ $<
556
+
557
+ ggml-cuda/% .o : ggml-cuda/% .cu ggml-cuda/% .cuh ggml.h ggml-common.h ggml-cuda/common.cuh
543
558
$(HIPCC ) $(CXXFLAGS ) $(HIPFLAGS ) -x hip -c -o $@ $<
559
+
544
560
endif # LLAMA_HIPBLAS
545
561
546
562
ifdef LLAMA_METAL
@@ -687,6 +703,7 @@ libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
687
703
688
704
clean :
689
705
rm -vrf * .o tests/* .o * .so * .a * .dll benchmark-matmult lookup-create lookup-merge lookup-stats common/build-info.cpp * .dot $(COV_TARGETS ) $(BUILD_TARGETS ) $(TEST_TARGETS )
706
+ rm -vrf ggml-cuda/* .o
690
707
find examples pocs -type f -name " *.o" -delete
691
708
692
709
#
0 commit comments