Skip to content

Commit 29bd913

Browse files
authored
Merge pull request #102 from FluidNumerics/patch/hipcuda-language-support
Patch/hipcuda language support
2 parents 5f6d1e7 + d2c739f commit 29bd913

File tree

5 files changed

+138
-78
lines changed

5 files changed

+138
-78
lines changed

Diff for: .superci/armory.yml renamed to .superci/galapagos.mi210.yml

+13-14
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@ steps:
33
sbatch_options:
44
- "--account=fluidnumerics"
55
- "--gres=gpu:mi210:2"
6-
- "--ntasks=6"
7-
- "--cpus-per-task=2"
6+
- "--ntasks=2"
7+
- "--cpus-per-task=16"
88
- "--time=40:00"
9-
prerequisites:
10-
- "source /etc/profile.d/z11_lmod.sh"
9+
prerequisites: []
1110
modules:
1211
- cmake/3.31.2
1312
- gcc/12.4.0
@@ -17,36 +16,36 @@ steps:
1716
- feq-parse/2.2.2
1817
env:
1918
BUILD_DIR: ${WORKSPACE}/build
20-
PREFIX: ${WORKSPACE}/opt/self
19+
PREFIX: ${WORKSPACE}/install
2120
OUTDIR: ${WORKSPACE}/local
2221
GPU_ARCH: gfx90a
23-
BUILD_TYPE: coverage
22+
BUILD_TYPE: release
2423
ENABLE_GPU: ON
2524
ENABLE_DOUBLE_PRECISION: ON
2625
ENABLE_MULTITHREADING: OFF
26+
ENABLE_TESTING: ON
27+
ENABLE_EXAMPLES: ON
2728
NTHREADS: 4
28-
GCOV: gcov-12
29+
GCOV: gcov
2930
commands:
3031
- |
3132
set -e
3233
mkdir -p ${BUILD_DIR}
3334
mkdir -p ${OUTDIR}
34-
cd ${WORKSPACE}/build
35-
FC=gfortran \
36-
CXX=hipcc \
35+
cd ${BUILD_DIR}
3736
cmake -G Ninja \
38-
-DCMAKE_PREFIX_PATH=${ROCM_PATH} \
3937
-DCMAKE_INSTALL_PREFIX=${PREFIX} \
4038
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
4139
-DSELF_ENABLE_GPU=${ENABLE_GPU} \
4240
-DSELF_ENABLE_MULTITHREADING=${ENABLE_MULTITHREADING} \
4341
-DSELF_MULTITHREADING_NTHREADS=${NTHREADS} \
4442
-DSELF_ENABLE_DOUBLE_PRECISION=${ENABLE_DOUBLE_PRECISION} \
45-
-DAMDGPU_TARGETS=${GPU_ARCH} \
43+
-DCMAKE_HIP_ARCHITECTURES=${GPU_ARCH} \
44+
-DSELF_ENABLE_EXAMPLES=${ENABLE_EXAMPLES} \
45+
-DSELF_ENABLE_TESTING=${ENABLE_TESTING} \
4646
../
4747
ninja
4848
49-
5049
# Initialize coverage
5150
if [ "$BUILD_TYPE" = "coverage" ]; then
5251
lcov --capture \
@@ -79,7 +78,7 @@ steps:
7978
--sha "${COMMIT_SHA}" \
8079
--branch "${BRANCH_NAME}" \
8180
--pr "${PR_NUMBER}" \
82-
--flag "armory-noether-gfx90a-test" \
81+
--flag "galapagos-noether-gfx90a-test" \
8382
--file "${WORKSPACE}/coverage.info"
8483
fi
8584

Diff for: .superci/galapagos.v100.yml

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
steps:
2+
- name : "Build on Noether"
3+
sbatch_options:
4+
- "--account=fluidnumerics"
5+
- "--gres=gpu:v100:2"
6+
- "--ntasks=2"
7+
- "--cpus-per-task=6"
8+
- "--time=40:00"
9+
prerequisites: []
10+
modules:
11+
- gcc/12.4.0
12+
- cmake/3.31.2
13+
- cuda/12.4.1
14+
- openmpi/5.0.6
15+
- hdf5/1.14.5
16+
- feq-parse/2.2.2
17+
env:
18+
BUILD_DIR: ${WORKSPACE}/build
19+
PREFIX: ${WORKSPACE}/install
20+
OUTDIR: ${WORKSPACE}/local
21+
GPU_ARCH: 70
22+
BUILD_TYPE: release
23+
ENABLE_GPU: ON
24+
ENABLE_DOUBLE_PRECISION: ON
25+
ENABLE_MULTITHREADING: OFF
26+
ENABLE_TESTING: ON
27+
ENABLE_EXAMPLES: ON
28+
NTHREADS: 4
29+
GCOV: gcov
30+
commands:
31+
- |
32+
set -e
33+
mkdir -p ${BUILD_DIR}
34+
mkdir -p ${OUTDIR}
35+
cd ${BUILD_DIR}
36+
cmake -G Ninja \
37+
-DCMAKE_INSTALL_PREFIX=${PREFIX} \
38+
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
39+
-DSELF_ENABLE_GPU=${ENABLE_GPU} \
40+
-DSELF_ENABLE_MULTITHREADING=${ENABLE_MULTITHREADING} \
41+
-DSELF_MULTITHREADING_NTHREADS=${NTHREADS} \
42+
-DSELF_ENABLE_DOUBLE_PRECISION=${ENABLE_DOUBLE_PRECISION} \
43+
-DCMAKE_CUDA_ARCHITECTURES=${GPU_ARCH} \
44+
-DSELF_ENABLE_EXAMPLES=${ENABLE_EXAMPLES} \
45+
-DSELF_ENABLE_TESTING=${ENABLE_TESTING} \
46+
../
47+
ninja
48+
49+
50+
# Initialize coverage
51+
if [ "$BUILD_TYPE" = "coverage" ]; then
52+
lcov --capture \
53+
--initial \
54+
--directory ${BUILD_DIR}/src \
55+
--gcov=${GCOV} \
56+
--output-file ${WORKSPACE}/initial.info
57+
fi
58+
59+
60+
# Run ctests
61+
ctest --test-dir ${BUILD_DIR} --verbose
62+
63+
if [ "$BUILD_TYPE" = "coverage" ]; then
64+
# Compile coverage information
65+
lcov --capture \
66+
--directory ${BUILD_DIR}/src \
67+
--gcov=${GCOV} \
68+
--output-file ${WORKSPACE}/ctest-capture.info
69+
70+
lcov --add-tracefile ${WORKSPACE}/initial.info \
71+
--add-tracefile ${WORKSPACE}/ctest-capture.info \
72+
--gcov=${GCOV} \
73+
--output-file ${WORKSPACE}/coverage.info
74+
75+
# Generate summary
76+
lcov --summary ${WORKSPACE}/coverage.info
77+
78+
${HOME}/.local/bin/codecov-linux -t "${CODECOV_TOKEN}" \
79+
--sha "${COMMIT_SHA}" \
80+
--branch "${BRANCH_NAME}" \
81+
--pr "${PR_NUMBER}" \
82+
--flag "galapagos-oram-v100-test" \
83+
--file "${WORKSPACE}/coverage.info"
84+
fi
85+

Diff for: CMakeLists.txt

+32-51
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,9 @@ cmake_minimum_required(VERSION 3.21)
2828
cmake_policy(VERSION 3.21...3.27)
2929

3030
# C Language is needed in order to verify Fortran compiler is C-interoperable
31-
# CXX language is needed to properly find "hip" package
3231
project(SELF VERSION 1.0.0
3332
DESCRIPTION "Spectral Element Library in Fortran"
34-
LANGUAGES Fortran C CXX)
35-
33+
LANGUAGES Fortran C)
3634

3735
option(SELF_ENABLE_MULTITHREADING "Option to enable CPU multithreading for `do concurrent` loop blocks." OFF)
3836
option(SELF_ENABLE_TESTING "Option to enable build of tests. (Default On)" ON)
@@ -48,13 +46,6 @@ if(SELF_ENABLE_MULTITHREADING)
4846
set(SELF_MULITHREADING_NTHREADS "4" CACHE STRING "Number of threads to use for `do concurrent` loop blocks. This option is only used with GNU compilers. Other compilers use OMP_NUM_THREADS environment variable at runtime.")
4947
endif()
5048

51-
if(NOT DEFINED ROCM_PATH)
52-
if(NOT DEFINED ENV{ROCM_PATH})
53-
set(ROCM_PATH "/opt/rocm/" CACHE PATH "Path to which ROCm has been installed")
54-
else()
55-
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
56-
endif()
57-
endif()
5849

5950

6051
# Fortran compiler requirements
@@ -157,11 +148,7 @@ if(SELF_ENABLE_DOUBLE_PRECISION)
157148
set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DDOUBLE_PRECISION")
158149
set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DDOUBLE_PRECISION" )
159150

160-
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDOUBLE_PRECISION" )
161-
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDOUBLE_PRECISION" )
162-
set( CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_COVERAGE} -DDOUBLE_PRECISION")
163-
set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_PROFILE} -DDOUBLE_PRECISION")
164-
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DDOUBLE_PRECISION" )
151+
165152
endif()
166153

167154
if(SELF_ENABLE_GPU)
@@ -172,12 +159,6 @@ if(SELF_ENABLE_GPU)
172159
set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DENABLE_GPU")
173160
set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DENABLE_GPU" )
174161

175-
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_GPU" )
176-
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DENABLE_GPU" )
177-
set( CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_COVERAGE} -DENABLE_GPU")
178-
set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_PROFILE} -DENABLE_GPU")
179-
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DENABLE_GPU" )
180-
181162
# Check MPI for GPU awareness
182163
# Add SELF's cmake module directory to the search path
183164
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake")
@@ -193,49 +174,49 @@ if(SELF_ENABLE_GPU)
193174
if(hip_FOUND)
194175
if(MPI_HAS_QUERY_HIP_SUPPORT)
195176
find_package(hipblas REQUIRED)
196-
#message("-- HIP found. Enabling HIP language.")
197-
#enable_language(HIP)
177+
message("-- HIP found. Enabling HIP language.")
178+
enable_language(HIP)
198179
set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DHAVE_HIP" )
199180
set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -DHAVE_HIP" )
200181
set( CMAKE_Fortran_FLAGS_COVERAGE "${CMAKE_Fortran_FLAGS_COVERAGE} -DHAVE_HIP")
201182
set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DHAVE_HIP")
202183
set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DHAVE_HIP" )
203184

204-
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_HIP" )
205-
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAVE_HIP" )
206-
set( CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_COVERAGE} -DHAVE_HIP")
207-
set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_PROFILE} -DHAVE_HIP")
208-
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DHAVE_HIP" )
209-
185+
if(SELF_ENABLE_DOUBLE_PRECISION)
186+
set( CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -DDOUBLE_PRECISION" )
187+
set( CMAKE_HIP_FLAGS_DEBUG "${CMAKE_HIP_FLAGS_DEBUG} -DDOUBLE_PRECISION" )
188+
set( CMAKE_HIP_FLAGS_COVERAGE "${CMAKE_HIP_FLAGS_COVERAGE} -DDOUBLE_PRECISION")
189+
set( CMAKE_HIP_FLAGS_PROFILE "${CMAKE_HIP_FLAGS_PROFILE} -DDOUBLE_PRECISION")
190+
set( CMAKE_HIP_FLAGS_RELEASE "${CMAKE_HIP_FLAGS_RELEASE} -DDOUBLE_PRECISION" )
191+
endif()
210192
set( BACKEND_LIBRARIES hip::device roc::hipblas)
211193
else()
212194
message( FATAL_ERROR "MPI installation is not GPU-aware" )
213195
endif()
214196
else()
215-
# CUDA (Optional)
216-
find_package(cuda)
217-
if(cuda_FOUND)
218-
if(MPI_HAS_QUERY_CUDA_SUPPORT)
219-
#message("-- CUDA found. Enabling CUDA language.")
220-
#enable_language(CUDA)
221-
set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DHAVE_CUDA" )
222-
set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -DHAVE_CUDA" )
223-
set( CMAKE_Fortran_FLAGS_COVERAGE "${CMAKE_Fortran_FLAGS_COVERAGE} -DHAVE_CUDA")
224-
set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DHAVE_CUDA")
225-
set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DHAVE_CUDA" )
226-
227-
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_CUDA" )
228-
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAVE_CUDA" )
229-
set( CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_COVERAGE} -DHAVE_CUDA")
230-
set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_PROFILE} -DHAVE_CUDA")
231-
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DHAVE_CUDA" )
232-
233-
# TO DO - need cuda libraries and hipblas libraries
234-
else()
235-
message( FATAL_ERROR "MPI installation is not GPU-aware" )
197+
# CUDA
198+
find_package(CUDAToolkit REQUIRED)
199+
message("-- CUDA found. Enabling CUDA language.")
200+
enable_language(CUDA)
201+
if(MPI_HAS_QUERY_CUDA_SUPPORT)
202+
set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DHAVE_CUDA" )
203+
set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -DHAVE_CUDA" )
204+
set( CMAKE_Fortran_FLAGS_COVERAGE "${CMAKE_Fortran_FLAGS_COVERAGE} -DHAVE_CUDA")
205+
set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DHAVE_CUDA")
206+
set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DHAVE_CUDA" )
207+
208+
if(SELF_ENABLE_DOUBLE_PRECISION)
209+
set( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DDOUBLE_PRECISION" )
210+
set( CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -DDOUBLE_PRECISION" )
211+
set( CMAKE_CUDA_FLAGS_COVERAGE "${CMAKE_CUDA_FLAGS_COVERAGE} -DDOUBLE_PRECISION")
212+
set( CMAKE_CUDA_FLAGS_PROFILE "${CMAKE_CUDA_FLAGS_PROFILE} -DDOUBLE_PRECISION")
213+
set( CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -DDOUBLE_PRECISION" )
236214
endif()
215+
216+
set( BACKEND_LIBRARIES CUDA::cuda_driver CUDA::cudart CUDA::cublas)
217+
237218
else()
238-
message( FATAL_ERROR "Enabling GPU support requires either HIP or CUDA." )
219+
message( FATAL_ERROR "MPI installation is not GPU-aware" )
239220
endif()
240221
endif()
241222
endif()

Diff for: src/CMakeLists.txt

+6-12
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,12 @@ file(GLOB SELF_FSRC "${CMAKE_CURRENT_SOURCE_DIR}/*.f*")
2929
if(SELF_ENABLE_GPU)
3030
file(GLOB SELF_BACKEND_FSRC "${CMAKE_CURRENT_SOURCE_DIR}/gpu/*.f*")
3131
file(GLOB SELF_BACKEND_CPPSRC "${CMAKE_CURRENT_SOURCE_DIR}/gpu/*.cpp*")
32-
# Note : [email protected] (Oct. 1 2024)
33-
# Ultimately, we want to be able to use the language support for HIP/CUDA
34-
# rather than bringing in HIP/CUDA through `find_package`. At the moment
35-
# we are doing a hack overrided the CXX compiler with either hipcc or nvcc
36-
# The reason we're doing it this way (hacky) at the moment is that we get
37-
# segmentation faults on our AMD GPU tests when using the HIP language support
38-
# via Cmake, for some yet unknown reason.
39-
# if(hip_FOUND)
40-
# set_source_files_properties(${SELF_BACKEND_CPPSRC} PROPERTIES LANGUAGE HIP)
41-
# elseif(cuda_FOUND)
42-
# set_source_files_properties(${SELF_BACKEND_CPPSRC} PROPERTIES LANGUAGE CUDA)
43-
# endif()
32+
33+
if(hip_FOUND)
34+
set_source_files_properties(${SELF_BACKEND_CPPSRC} PROPERTIES LANGUAGE HIP)
35+
else()
36+
set_source_files_properties(${SELF_BACKEND_CPPSRC} PROPERTIES LANGUAGE CUDA)
37+
endif()
4438
else()
4539
file(GLOB SELF_BACKEND_FSRC "${CMAKE_CURRENT_SOURCE_DIR}/cpu/*.f*")
4640
endif()

Diff for: src/gpu/SELF_GPU_Macros.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#include <climits>
1616
#include <cstdio>
1717

18-
#ifdef HAVE_HIP
18+
#ifdef __HIP_PLATFORM_AMD__
1919

2020
#include <hip/hip_runtime.h>
2121

@@ -31,6 +31,7 @@ static void check(const hipError_t err, const char *const file, const int line)
3131
#else
3232

3333
#include <cuda_runtime.h>
34+
#include <stdint.h> // required to provide uint32_t
3435

3536
#define hipLaunchKernelGGL(F,G,B,M,S,...) F<<<G,B,M,S>>>(__VA_ARGS__)
3637

0 commit comments

Comments
 (0)