Skip to content

Commit c0757e0

Browse files
committed
Support HIP
1 parent 104a8ca commit c0757e0

File tree

10 files changed

+82
-130
lines changed

10 files changed

+82
-130
lines changed

.github/workflows/cuda.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
name: cuda
22

3-
on: [push, pull_request]
3+
on:
4+
push:
5+
pull_request:
6+
schedule:
7+
- cron: "22 22 * * 6"
48

59
concurrency:
610
group: ${{ github.ref }}-${{ github.head_ref }}-cuda

.github/workflows/gcc.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
name: GCC
22

3-
on: [push, pull_request]
3+
on:
4+
push:
5+
pull_request:
6+
schedule:
7+
- cron: "22 22 * * 6"
48

59
concurrency:
610
group: ${{ github.ref }}-${{ github.head_ref }}-linux-gcc

Src/AMReX_Arena.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "AMReX_Arena.H"
2+
#include "AMReX_BLassert.H"
23
#include "AMReX_Gpu.H"
34

45
namespace amrex
@@ -8,11 +9,12 @@ void* allocate_host (std::size_t sz)
89
{
910
#if defined(AMREX_USE_CUDA)
1011
void* p;
11-
cudaHostAlloc(&p, sz, cudaHostAllocMapped);
12+
AMREX_CUDA_SAFE_CALL(cudaHostAlloc(&p, sz, cudaHostAllocMapped));
1213
return p;
1314
#elif defined(AMREX_USE_HIP)
1415
void* p;
15-
hipHostAlloc(&p, sz, hipHostAllocMapped | hipHostMallocNonCoherent);
16+
AMREX_HIP_SAFE_CALL(hipHostMalloc(&p, sz, hipHostMallocMapped |
17+
hipHostMallocNonCoherent));
1618
return p;
1719
#elif defined(AMREX_USE_SYCL)
1820
return sycl::malloc_host(...);
@@ -24,9 +26,9 @@ void* allocate_host (std::size_t sz)
2426
void free_host (void* pt)
2527
{
2628
#if defined(AMREX_USE_CUDA)
27-
cudaFreeHost(pt);
29+
AMREX_CUDA_SAFE_CALL(cudaFreeHost(pt));
2830
#elif defined(AMREX_USE_HIP)
29-
hipHostFree(pt);
31+
AMREX_HIP_SAFE_CALL(hipHostFree(pt));
3032
#elif defined(AMREX_USE_SYCL)
3133
sycl::free(...);
3234
#else
@@ -38,9 +40,9 @@ void* allocate_device (std::size_t sz)
3840
{
3941
void* p;
4042
#if defined(AMREX_USE_CUDA)
41-
cudaMalloc(&p, sz);
43+
AMREX_CUDA_SAFE_CALL(cudaMalloc(&p, sz));
4244
#elif defined(AMREX_USE_HIP)
43-
hipMalloc(&p, sz);
45+
AMREX_HIP_SAFE_CALL(hipMalloc(&p, sz));
4446
#elif defined(AMREX_USE_SYCL)
4547
p = sycl::malloc_device(...);
4648
#else
@@ -52,9 +54,9 @@ void* allocate_device (std::size_t sz)
5254
void free_device (void* pt)
5355
{
5456
#if defined(AMREX_USE_CUDA)
55-
cudaFree(pt);
57+
AMREX_CUDA_SAFE_CALL(cudaFree(pt));
5658
#elif defined(AMREX_USE_HIP)
57-
hipFree(pt);
59+
AMREX_HIP_SAFE_CALL(hipFree(pt));
5860
#elif defined(AMREX_USE_SYCL)
5961
sycl::free(...);
6062
#else

Src/AMReX_BLassert.H

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#include "AMReX_Extension.H"
77
#include "AMReX_Gpu.H"
88
#include <cassert>
9+
#include <stdexcept>
10+
#include <string>
911

1012
namespace amrex {
1113

@@ -29,4 +31,29 @@ void Assert (const char* EX, const char* file, int line, const char* msg)
2931
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX,MSG) (EX)?((void)0):amrex::Assert( # EX , __FILE__, __LINE__ , # MSG)
3032
#define AMREX_ALWAYS_ASSERT(EX) (EX)?((void)0):amrex::Assert( # EX , __FILE__, __LINE__)
3133

32-
#endif /*BL_BL_ASSERT_H*/
34+
35+
#if defined (AMREX_USE_CUDA)
36+
37+
#define AMREX_CUDA_SAFE_CALL(call) { \
38+
auto amrex_i_err = call; \
39+
if (cudaSuccess != amrex_i_err) { \
40+
std::string errStr(std::string("CUDA error in file ") + __FILE__ \
41+
+ " line " + std::to_string(__LINE__) \
42+
+ ": " + cudaGetErrorString(amrex_i_err)); \
43+
throw std::runtime_error(errStr); \
44+
}}
45+
46+
#elif defined (AMREX_USE_HIP)
47+
48+
#define AMREX_HIP_SAFE_CALL(call) { \
49+
auto amrex_i_err = call; \
50+
if (hipSuccess != amrex_i_err) { \
51+
std::string errStr(std::string("HIP error in file ") + __FILE__ \
52+
+ " line " + std::to_string(__LINE__) \
53+
+ " " + hipGetErrorString(amrex_i_err)); \
54+
throw std::runtime_error(errStr); \
55+
}}
56+
57+
#endif
58+
59+
#endif

Src/AMReX_BLassert.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#include "AMReX_BLassert.H"
22

33
#include <cstdio>
4-
#include <stdexcept>
54

65
namespace amrex
76
{

Src/AMReX_Gpu.H

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
#if defined(AMREX_USE_GPU) && !defined(AMREX_USE_SYCL)
1010

11-
#if defined(AMREX_USE_HIP)
11+
#if defined(AMREX_USE_CUDA)
12+
#include <cuda_runtime.h>
13+
#elif defined(AMREX_USE_HIP)
1214
#include <hip/hip_runtime.h>
1315
#endif
1416

@@ -106,7 +108,7 @@ namespace Gpu {
106108
void streamSynchronize ();
107109
}
108110

109-
#if defined(AMREX_USE_CUDA)
111+
#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
110112

111113
template <typename T, typename L, typename M=std::enable_if_t<std::is_integral_v<T>> >
112114
void ParallelFor (T n, L const& f)

Src/AMReX_Gpu.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include "AMReX_BLassert.H"
12
#include "AMReX_Gpu.H"
23

34
#ifdef AMREX_USE_GPU
@@ -25,9 +26,9 @@ void setStream (gpuStream_t a_stream)
2526
void streamSynchronize ()
2627
{
2728
#if defined(AMREX_USE_CUDA)
28-
cudaStreamSynchronize(gpu_stream);
29+
AMREX_CUDA_SAFE_CALL(cudaStreamSynchronize(gpu_stream));
2930
#elif defined(AMREX_USE_HIP)
30-
hipStreamSynchronize(gpu_stream);
31+
AMREX_HIP_SAFE_CALL(hipStreamSynchronize(gpu_stream));
3132
#elif defined(AMREX_USE_SYCL)
3233
static_assert(false);
3334
#else
@@ -38,9 +39,11 @@ void streamSynchronize ()
3839
void htod_memcpy (void* p_d, void const* p_h, std::size_t sz)
3940
{
4041
#if defined(AMREX_USE_CUDA)
41-
cudaMemcpyAsync(p_d, p_h, sz, cudaMemcpyHostToDevice, gpu_stream);
42+
AMREX_CUDA_SAFE_CALL(cudaMemcpyAsync(p_d, p_h, sz, cudaMemcpyHostToDevice,
43+
gpu_stream));
4244
#elif defined(AMREX_USE_HIP)
43-
hipMemcpyAsync(p_d, p_h, sz, hipMemcpyHostToDevice, gpu_stream);
45+
AMREX_HIP_SAFE_CALL(hipMemcpyAsync(p_d, p_h, sz, hipMemcpyHostToDevice,
46+
gpu_stream));
4447
#elif defined(AMREX_USE_SYCL)
4548
static_assert(false);
4649
#else

Tests/GPU/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ int main (int argc, char* argv[])
1111
{
1212
amrex::ignore_unused(argc, argv);
1313

14-
std::size_t N = 256*256*256;
14+
std::size_t N = 256*256*256*8;
1515
auto* p = (double*)allocate_device(N*sizeof(double));
1616

1717
Parser parser("epsilon/kp*2*x/w0**2*exp(-(x**2+y**2)/w0**2)*sin(k0*z)");

Tools/GNUMake/Make.defs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -439,13 +439,6 @@ else ifeq ($(USE_HIP),TRUE)
439439
AMD_ARCH = $(AMREX_AMD_ARCH)
440440
endif
441441

442-
# For AMD GPUs, the wavefront is 64 except for gfx10??.
443-
ifeq ($(findstring gfx10,$(AMD_ARCH)),)
444-
DEFINES += -DAMREX_AMDGCN_WAVEFRONT_SIZE=64
445-
else
446-
DEFINES += -DAMREX_AMDGCN_WAVEFRONT_SIZE=32
447-
endif
448-
449442
ifeq ($(HIP_SAVE_TEMPS),TRUE)
450443
ifeq ($(USE_GPU_RDC),TRUE)
451444
$(warning *** HIP_SAVE_TEMPS requires USE_GPU_RDC=FALSE to obtain the assembly files for AMD GPU kernels.)

Tools/GNUMake/comps/hip.mak

Lines changed: 22 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,6 @@ ifneq ($(NO_CONFIG_CHECKING),TRUE)
1212
endif
1313

1414
CXX = $(HIP_PATH)/bin/hipcc
15-
CC = $(HIP_PATH)/bin/hipcc
16-
FC = gfortran
17-
F90 = gfortran
1815

1916
ifdef CXXSTD
2017
CXXSTD := $(strip $(CXXSTD))
@@ -24,12 +21,6 @@ endif
2421

2522
# Generic flags, always used
2623
CXXFLAGS = -std=$(CXXSTD) -m64
27-
CFLAGS = -std=c11 -m64
28-
29-
FFLAGS = -ffixed-line-length-none -fno-range-check -fno-second-underscore
30-
F90FLAGS = -ffree-line-length-none -fno-range-check -fno-second-underscore -fimplicit-none
31-
32-
FMODULES = -J$(fmoddir) -I $(fmoddir)
3324

3425
# rdc support
3526
ifeq ($(USE_GPU_RDC),TRUE)
@@ -44,110 +35,37 @@ HIPCC_FLAGS += -pthread
4435

4536
CXXFLAGS += $(HIPCC_FLAGS)
4637

47-
# add fopenmp targeting the gnu library
48-
ifeq ($(USE_OMP),TRUE)
49-
CXXFLAGS += -fopenmp=libgomp
50-
CFLAGS += -fopenmp=libgomp
51-
HIPCC_FLAGS += -fopenmp=libgomp
52-
endif
53-
54-
ifneq ($(BL_NO_FORT),TRUE)
55-
56-
# Taken straight from gnu
57-
# ask gfortran the name of the library to link in. First check for the
58-
# static version. If it returns only the name w/o a path, then it
59-
# was not found. In that case, ask for the shared-object version.
60-
gfortran_liba = $(shell $(F90) -print-file-name=libgfortran.a)
61-
gfortran_libso = $(shell $(F90) -print-file-name=libgfortran.so)
38+
# =============================================================================================
6239

63-
ifneq ($(gfortran_liba),libgfortran.a) # if found the full path is printed, thus `neq`.
64-
LIBRARY_LOCATIONS += $(dir $(gfortran_liba))
40+
ifeq ($(DEBUG),TRUE)
41+
CXXFLAGS += -g -O1
6542
else
66-
LIBRARY_LOCATIONS += $(dir $(gfortran_libso))
43+
CXXFLAGS += -gline-tables-only -fdebug-info-for-profiling -O3
6744
endif
6845

69-
override XTRALIBS += -lgfortran
70-
71-
quadmath_liba = $(shell $(F90) -print-file-name=libquadmath.a)
72-
quadmath_libso = $(shell $(F90) -print-file-name=libquadmath.so)
73-
74-
ifneq ($(quadmath_liba),libquadmath.a)
75-
override XTRALIBS += -lquadmath
76-
else ifneq ($(quadmath_libso),libquadmath.so)
77-
override XTRALIBS += -lquadmath
78-
endif
46+
ifeq ($(WARN_ALL),TRUE)
47+
warning_flags = -Wall -Wextra -Wunreachable-code -Wnull-dereference
48+
warning_flags += -Wfloat-conversion -Wextra-semi
7949

80-
endif # BL_NO_FORT
81-
82-
# =============================================================================================
83-
84-
ifeq ($(HIP_COMPILER),clang)
85-
86-
ifeq ($(DEBUG),TRUE)
87-
CXXFLAGS += -g -O1 -munsafe-fp-atomics
88-
CFLAGS += -g -O0
89-
90-
FFLAGS += -g -O0 -ggdb -fbounds-check -fbacktrace -Wuninitialized -Wunused -ffpe-trap=invalid,zero -finit-real=snan -finit-integer=2147483647 -ftrapv
91-
F90FLAGS += -g -O0 -ggdb -fbounds-check -fbacktrace -Wuninitialized -Wunused -ffpe-trap=invalid,zero -finit-real=snan -finit-integer=2147483647 -ftrapv
92-
93-
else # DEBUG=FALSE flags
94-
95-
CXXFLAGS += -gline-tables-only -fdebug-info-for-profiling -O3 -munsafe-fp-atomics
96-
CFLAGS += -gline-tables-only -fdebug-info-for-profiling -O3
97-
FFLAGS += -g1 -O3
98-
F90FLAGS += -g1 -O3
50+
warning_flags += -Wpedantic
9951

52+
ifneq ($(WARN_SHADOW),FALSE)
53+
warning_flags += -Wshadow
10054
endif
10155

102-
ifeq ($(WARN_ALL),TRUE)
103-
warning_flags = -Wall -Wextra -Wunreachable-code -Wnull-dereference
104-
warning_flags += -Wfloat-conversion -Wextra-semi
105-
106-
warning_flags += -Wpedantic
107-
108-
ifneq ($(WARN_SHADOW),FALSE)
109-
warning_flags += -Wshadow
110-
endif
111-
112-
CXXFLAGS += $(warning_flags) -Woverloaded-virtual
113-
CFLAGS += $(warning_flags)
114-
endif
115-
116-
ifeq ($(WARN_ERROR),TRUE)
117-
CXXFLAGS += -Werror -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments
118-
CFLAGS += -Werror
119-
endif
120-
121-
# Generic HIP info
122-
ROC_PATH=$(realpath $(dir $(HIP_PATH)))
123-
SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include $(HIP_PATH)/include
124-
125-
# rocRand
126-
SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include/hiprand $(ROC_PATH)/include/rocrand
127-
LIBRARY_LOCATIONS += $(ROC_PATH)/lib
128-
LIBRARIES += -Wl,--rpath=$(ROC_PATH)/lib -lhiprand -lrocrand
129-
130-
# rocPrim - Header only
131-
SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include/rocprim
132-
133-
# rocThrust - Header only
134-
# SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include/rocthrust
135-
136-
# rocTracer
137-
ifeq ($(USE_ROCTX),TRUE)
138-
CXXFLAGS += -DAMREX_USE_ROCTX
139-
HIPCC_FLAGS += -DAMREX_USE_ROCTX
140-
LIBRARY_LOCATIONS += $(ROC_PATH)/lib
141-
LIBRARIES += -Wl,--rpath=$(ROC_PATH)/lib -lroctracer64 -lroctx64
142-
endif
56+
CXXFLAGS += $(warning_flags) -Woverloaded-virtual
57+
CFLAGS += $(warning_flags)
58+
endif
14359

144-
# hipcc passes a lot of unused arguments to clang
145-
LEGACY_DEPFLAGS += -Wno-unused-command-line-argument
60+
ifeq ($(WARN_ERROR),TRUE)
61+
CXXFLAGS += -Werror -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments
62+
CFLAGS += -Werror
63+
endif
14664

147-
# =============================================================================================
65+
# Generic HIP info
66+
ROC_PATH=$(realpath $(dir $(HIP_PATH)))
67+
SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include $(HIP_PATH)/include
14868

149-
else ifeq ($(HIP_COMPILER),nvcc)
150-
$(error HIP_COMPILER nvcc is not supported at this time. Use USE_CUDA to compile for NVIDIA platforms.)
151-
endif
69+
# hipcc passes a lot of unused arguments to clang
70+
LEGACY_DEPFLAGS += -Wno-unused-command-line-argument
15271

153-
# =============================================================================================

0 commit comments

Comments
 (0)