Skip to content

Commit 094eca2

Browse files
authored
Merge pull request ecmwf-ifs#162 from lukasm91/fix-overflow
Fix overflows up to at least tco1279
2 parents c14ee84 + f214d16 commit 094eca2

22 files changed

+368
-281
lines changed

src/trans/common/internal/tpm_distr.F90

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ MODULE TPM_DISTR
1212

1313
! Module for distributed memory environment.
1414

15-
USE EC_PARKIND ,ONLY : JPIM ,JPRD
15+
USE EC_PARKIND ,ONLY : JPIM ,JPRD, JPIB
1616

1717
IMPLICIT NONE
1818

@@ -97,7 +97,7 @@ MODULE TPM_DISTR
9797
INTEGER(KIND=JPIM) :: NDGL_FS ! Number of rows of latitudes for which this process is
9898
! performing Fourier Space calculations
9999

100-
INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTAGTF(:) ! Offset for specific latitude in
100+
INTEGER(KIND=JPIB) ,ALLOCATABLE :: NSTAGTF(:) ! Offset for specific latitude in
101101
! Fourier/gridpoint buffer
102102
INTEGER(KIND=JPIM) :: NLENGTF ! Second dimension of Fourier/gridpoint buffer
103103
! (sum of (NLOEN+3) over local latitudes)
@@ -171,7 +171,7 @@ MODULE TPM_DISTR
171171
REAL(KIND=JPRD) ,ALLOCATABLE :: RWEIGHT(:) ! Weight per grid-point (if weighted distribution)
172172
INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPROCA_GP(:) ! Number of grid-points per a-set
173173

174-
INTEGER(KIND=JPIM), ALLOCATABLE :: OFFSETS_GEMM1(:), OFFSETS_GEMM2(:)
174+
INTEGER(KIND=JPIB), ALLOCATABLE :: OFFSETS_GEMM1(:), OFFSETS_GEMM2(:)
175175

176176
END TYPE DISTR_TYPE
177177

src/trans/gpu/algor/buffered_allocator_mod.F90

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,11 @@ FUNCTION MAKE_BUFFERED_ALLOCATOR()
6969
MAKE_BUFFERED_ALLOCATOR%NEXT_BUF = 0
7070
END FUNCTION MAKE_BUFFERED_ALLOCATOR
7171

72-
FUNCTION RESERVE(ALLOCATOR, SZ)
72+
FUNCTION RESERVE(ALLOCATOR, SZ, WHO)
7373
IMPLICIT NONE
7474
TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR
7575
INTEGER(KIND=C_SIZE_T), INTENT(IN) :: SZ
76+
CHARACTER(*), INTENT(IN), OPTIONAL :: WHO
7677

7778
TYPE(ALLOCATION_RESERVATION_HANDLE) :: RESERVE
7879

@@ -88,7 +89,7 @@ SUBROUTINE INSTANTIATE_ALLOCATOR(ALLOCATOR, GROWING_ALLOCATION)
8889
IMPLICIT NONE
8990
TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR
9091
!!TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN), POINTER :: GROWING_ALLOCATION
91-
TYPE(GROWING_ALLOCATION_TYPE), target, INTENT(INout) :: GROWING_ALLOCATION
92+
TYPE(GROWING_ALLOCATION_TYPE), TARGET, INTENT(INOUT) :: GROWING_ALLOCATION
9293
INTEGER :: I
9394

9495
DO I = 0, NBUF-1
@@ -126,10 +127,13 @@ SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE
126127
INTEGER(KIND=4), INTENT(IN), OPTIONAL :: SET_STREAM
127128
LOGICAL :: SET_VALUE_EFF
128129
INTEGER(KIND=4) :: SET_STREAM_EFF
129-
INTEGER(KIND=C_SIZE_T) :: START_IN_BYTES, LENGTH_IN_BYTES
130+
INTEGER(KIND=C_SIZE_T) :: START_IN_BYTES, LENGTH_IN_BYTES, END_IN_BYTES, J
130131
IF (START_IN_BYTES + LENGTH_IN_BYTES - 1 > SIZE(SRC, KIND=C_SIZE_T)) THEN
131132
CALL ABORT_TRANS("Logical Error in ASSIGN_PTR - OOB assignment")
132133
ENDIF
134+
IF (START_IN_BYTES < 1) THEN
135+
CALL ABORT_TRANS("Logical Error in ASSIGN_PTR - OOB assignment")
136+
ENDIF
133137
IF (PRESENT(SET_VALUE)) THEN
134138
SET_VALUE_EFF = SET_VALUE
135139
ELSE
@@ -143,9 +147,11 @@ SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE
143147
IF (SET_VALUE_EFF .AND. LENGTH_IN_BYTES > 0) THEN
144148
! This option is turned off by default, but for experimentation we can turn it on. This is
145149
! setting all bits to 1 (meaning NaN in floating point)
146-
!$ACC KERNELS PRESENT(SRC) ASYNC(SET_STREAM_EFF)
147-
SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1) = -1
148-
!$ACC END KERNELS!! LOOP
150+
!$ACC PARALLEL PRESENT(SRC) ASYNC(SET_STREAM_EFF)
151+
DO J=1_C_SIZE_T,LENGTH_IN_BYTES
152+
SRC(J) = -1
153+
ENDDO
154+
!$ACC END PARALLEL
149155
ENDIF
150156
CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, &
151157
& [C_SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/C_SIZEOF(DST(0))])
@@ -159,10 +165,13 @@ SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALU
159165
INTEGER(KIND=4), INTENT(IN), OPTIONAL :: SET_STREAM
160166
LOGICAL :: SET_VALUE_EFF
161167
INTEGER(KIND=4) :: SET_STREAM_EFF
162-
INTEGER(KIND=C_SIZE_T) :: START_IN_BYTES, LENGTH_IN_BYTES
168+
INTEGER(KIND=C_SIZE_T) :: START_IN_BYTES, LENGTH_IN_BYTES, END_IN_BYTES, J
163169
IF (START_IN_BYTES + LENGTH_IN_BYTES - 1 > SIZE(SRC, KIND=C_SIZE_T)) THEN
164170
CALL ABORT_TRANS("Logical Error in ASSIGN_PTR - OOB assignment")
165171
ENDIF
172+
IF (START_IN_BYTES < 1) THEN
173+
CALL ABORT_TRANS("Logical Error in ASSIGN_PTR - OOB assignment")
174+
ENDIF
166175
IF (PRESENT(SET_VALUE)) THEN
167176
SET_VALUE_EFF = SET_VALUE
168177
ELSE
@@ -176,9 +185,12 @@ SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALU
176185
IF (SET_VALUE_EFF .AND. LENGTH_IN_BYTES > 0) THEN
177186
! This option is turned off by default, but for experimentation we can turn it on. This is
178187
! setting all bits to 1 (meaning NaN in floating point)
179-
!$ACC KERNELS PRESENT(SRC) ASYNC(SET_STREAM_EFF)
180-
SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1) = -1
181-
!$ACC END KERNELS!! LOOP
188+
END_IN_BYTES=START_IN_BYTES+LENGTH_IN_BYTES-1
189+
!$ACC PARALLEL PRESENT(SRC) ASYNC(SET_STREAM_EFF)
190+
DO J=1_C_SIZE_T,LENGTH_IN_BYTES
191+
SRC(J) = -1
192+
ENDDO
193+
!$ACC END PARALLEL
182194
ENDIF
183195
CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, &
184196
& [C_SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/C_SIZEOF(DST(0))])

src/trans/gpu/algor/ext_acc.F90

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ module openacc_ext_type
1818
end module
1919
module openacc_ext
2020
use iso_c_binding, only: c_ptr, c_size_t, c_loc, c_sizeof
21-
use openacc, only: acc_create, acc_copyin, acc_handle_kind
21+
use openacc, only: acc_handle_kind
2222
use openacc_ext_type, only: ext_acc_arr_desc
2323
implicit none
2424

@@ -247,7 +247,7 @@ function get_common_pointers(in_ptrs, out_ptrs) result(num_ranges)
247247
enddo
248248
end function
249249
subroutine ext_acc_create(ptrs, stream)
250-
use openacc, only: acc_create, acc_async_sync
250+
use openacc, only: acc_async_sync
251251
use iso_fortran_env, only: int32
252252
implicit none
253253
type(ext_acc_arr_desc), intent(in) :: ptrs(:)
@@ -269,8 +269,7 @@ subroutine ext_acc_create(ptrs, stream)
269269

270270
do i = 1, num_ranges
271271
call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/c_sizeof(pp(1))])
272-
!!call acc_create_async(pp, common_ptrs(i)%sz, async=stream_act)
273-
call acc_create(pp, int(common_ptrs(i)%sz))
272+
!$acc enter data create(pp) async(stream_act)
274273
enddo
275274
end subroutine
276275
subroutine ext_acc_copyin(ptrs, stream)
@@ -296,12 +295,11 @@ subroutine ext_acc_copyin(ptrs, stream)
296295

297296
do i = 1, num_ranges
298297
call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/c_sizeof(pp(1))])
299-
!!call acc_copyin_async(pp, common_ptrs(i)%sz, async=stream_act)
300-
call acc_copyin(pp, int(common_ptrs(i)%sz))
298+
!$acc enter data copyin(pp) async(stream_act)
301299
enddo
302300
end subroutine
303301
subroutine ext_acc_copyout(ptrs, stream)
304-
use openacc, only: acc_async_sync, acc_copyout
302+
use openacc, only: acc_async_sync
305303
implicit none
306304
type(ext_acc_arr_desc), intent(in) :: ptrs(:)
307305
integer(acc_handle_kind), optional :: stream
@@ -323,12 +321,11 @@ subroutine ext_acc_copyout(ptrs, stream)
323321

324322
do i = 1, num_ranges
325323
call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/c_sizeof(pp(1))])
326-
!!call acc_copyout_async(pp, common_ptrs(i)%sz, async=stream_act)
327-
call acc_copyout(pp, int(common_ptrs(i)%sz))
324+
!$acc exit data copyout(pp) async(stream_act)
328325
enddo
329326
end subroutine
330327
subroutine ext_acc_delete(ptrs, stream)
331-
use openacc, only: acc_async_sync, acc_delete
328+
use openacc, only: acc_async_sync
332329
implicit none
333330
type(ext_acc_arr_desc), intent(in) :: ptrs(:)
334331
integer(acc_handle_kind), optional :: stream
@@ -350,8 +347,7 @@ subroutine ext_acc_delete(ptrs, stream)
350347

351348
do i = 1, num_ranges
352349
call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/c_sizeof(pp(1))])
353-
!!call acc_delete_async(pp, common_ptrs(i)%sz, async=stream_act)
354-
call acc_delete(pp, int(common_ptrs(i)%sz))
350+
!$acc exit data delete(pp) async(stream_act)
355351
enddo
356352
end subroutine
357353
end module

src/trans/gpu/algor/hicblas_cutlass.cuda.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
// (C) Copyright 2000- ECMWF.
2+
// (C) Copyright 2024- NVIDIA.
3+
14
#ifdef USE_CUTLASS
25
//#include "hicblas.h"
36
#include "cutlass/gemm/device/gemm.h"
@@ -153,9 +156,9 @@ class cutlass_sgemm_grouped<CutlassType::cutlass_fp32, TransA, TransB> {
153156
template <cublasOperation_t TransA, cublasOperation_t TransB>
154157
void cutlass_sgemm_wrapper_grouped_op(int resol_id, int blas_id, int m, int *n, int *k,
155158
float alpha, const float *A, int lda,
156-
int *offsetsA, const float *B, int ldb,
157-
int *offsetsB, float beta, float *C,
158-
int ldc, int *offsetsC, int batchCount,
159+
int64_t *offsetsA, const float *B, int ldb,
160+
int64_t *offsetsB, float beta, float *C,
161+
int ldc, int64_t *offsetsC, int batchCount,
159162
cudaStream_t stream,
160163
void *growing_allocator) {
161164
using namespace detail;
@@ -180,9 +183,9 @@ void cutlass_sgemm_wrapper_grouped_op(int resol_id, int blas_id, int m, int *n,
180183

181184
void cutlass_sgemm_wrapper_grouped(int resol_id, int blas_id, char transa, char transb,
182185
int m, int *n, int *k, float alpha,
183-
const float *A, int lda, int *offsetsA,
184-
const float *B, int ldb, int *offsetsB, float beta,
185-
float *C, int ldc, int *offsetsC,
186+
const float *A, int lda, int64_t *offsetsA,
187+
const float *B, int ldb, int64_t *offsetsB, float beta,
188+
float *C, int ldc, int64_t *offsetsC,
186189
int batchCount, cudaStream_t stream,
187190
void *growing_allocator) {
188191

src/trans/gpu/algor/hicblas_gemm.hip.cpp

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// (C) Copyright 2000- ECMWF.
2+
// (C) Copyright 2024- NVIDIA.
23
//
34
// This software is licensed under the terms of the Apache Licence Version 2.0
45
// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
@@ -89,10 +90,10 @@ template <typename Gemm> void erase_from_caches(int resol_id) {
8990
// this version is using graphs and caches the graphs
9091
template <typename Gemm, typename Real>
9192
void run_group_graph(Gemm &&gemm, int resol_id, int m, int *n, int *k,
92-
Real alpha, const Real *A, int lda, int *offsetsA,
93-
const Real *B, int ldb, int *offsetsB, Real beta, Real *C,
94-
int ldc, int *offsetsC, int batchCount, hipStream_t stream,
95-
int blas_id, void *growing_allocator) {
93+
Real alpha, const Real *A, int lda, int64_t *offsetsA,
94+
const Real *B, int ldb, int64_t *offsetsB, Real beta,
95+
Real *C, int ldc, int64_t *offsetsC, int batchCount,
96+
hipStream_t stream, int blas_id, void *growing_allocator) {
9697
growing_allocator_register_free_c(growing_allocator,
9798
free_gemm_graph_cache<Gemm>);
9899

@@ -163,9 +164,10 @@ void run_group_graph(Gemm &&gemm, int resol_id, int m, int *n, int *k,
163164
// stupid simple gemm calls
164165
template <typename Gemm, typename Real>
165166
void run_group(Gemm &&gemm, int resol_id, int m, int *n, int *k, Real alpha,
166-
const Real *A, int lda, int *offsetsA, const Real *B, int ldb,
167-
int *offsetsB, Real beta, Real *C, int ldc, int *offsetsC,
168-
int batchCount, hipStream_t stream, int = -1) {
167+
const Real *A, int lda, int64_t *offsetsA, const Real *B,
168+
int ldb, int64_t *offsetsB, Real beta, Real *C, int ldc,
169+
int64_t *offsetsC, int batchCount, hipStream_t stream,
170+
int = -1) {
169171
for (int i = 0; i < batchCount; ++i) {
170172
if (m == 0 || n[i] == 0 || k[i] == 0)
171173
continue;
@@ -213,11 +215,14 @@ template <typename Real> struct hipblas_gemm_grouped {
213215

214216
#ifndef USE_CUTLASS
215217

216-
void hipblas_sgemm_wrapper_grouped(
217-
int resol_id, int blas_id, char transa, char transb, int m, int *n, int *k,
218-
float alpha, const float *A, int lda, int *offsetsA, const float *B,
219-
int ldb, int *offsetsB, float beta, float *C, int ldc, int *offsetsC,
220-
int batchCount, hipStream_t stream, void *growing_allocator) {
218+
void hipblas_sgemm_wrapper_grouped(int resol_id, int blas_id, char transa,
219+
char transb, int m, int *n, int *k,
220+
float alpha, const float *A, int lda,
221+
int64_t *offsetsA, const float *B, int ldb,
222+
int64_t *offsetsB, float beta, float *C,
223+
int ldc, int64_t *offsetsC, int batchCount,
224+
hipStream_t stream,
225+
void *growing_allocator) {
221226

222227
hipblasOperation_t op_t1 = HIPBLAS_OP_N, op_t2 = HIPBLAS_OP_N;
223228
if (transa == 'T' || transa == 't')
@@ -241,9 +246,9 @@ void hipblas_sgemm_wrapper_grouped(
241246
void hipblas_dgemm_wrapper_grouped(int resol_id, int blas_id, char transa,
242247
char transb, int m, int *n, int *k,
243248
double alpha, const double *A, int lda,
244-
int *offsetsA, const double *B, int ldb,
245-
int *offsetsB, double beta, double *C,
246-
int ldc, int *offsetsC, int batchCount,
249+
int64_t *offsetsA, const double *B, int ldb,
250+
int64_t *offsetsB, double beta, double *C,
251+
int ldc, int64_t *offsetsC, int batchCount,
247252
hipStream_t stream, void *) {
248253

249254
hipblasOperation_t op_t1 = HIPBLAS_OP_N, op_t2 = HIPBLAS_OP_N;
@@ -311,10 +316,10 @@ void hipblas_sgemm_wrapper(char transa, char transb, int m, int n, int k,
311316
void hipblas_sgemm_wrapper_grouped(int resol_id, int blas_id, char transa,
312317
char transb, int m, int *n, int *k,
313318
float alpha, const float *A, int lda,
314-
int *offsetsA, const float *B, int ldb,
315-
int *offsetsB, float beta, float *C, int ldc,
316-
int *offsetsC, int batchCount, size_t stream,
317-
void *growing_allocator) {
319+
int64_t *offsetsA, const float *B, int ldb,
320+
int64_t *offsetsB, float beta, float *C,
321+
int ldc, int64_t *offsetsC, int batchCount,
322+
size_t stream, void *growing_allocator) {
318323
#ifdef USE_CUTLASS
319324
cutlass_sgemm_wrapper_grouped(resol_id, blas_id, transa, transb, m, n, k,
320325
alpha, A, lda, offsetsA, B, ldb, offsetsB, beta,
@@ -331,9 +336,9 @@ void hipblas_sgemm_wrapper_grouped(int resol_id, int blas_id, char transa,
331336
void hipblas_dgemm_wrapper_grouped(int resol_id, int blas_id, char transa,
332337
char transb, int m, int *n, int *k,
333338
double alpha, const double *A, int lda,
334-
int *offsetsA, const double *B, int ldb,
335-
int *offsetsB, double beta, double *C,
336-
int ldc, int *offsetsC, int batchCount,
339+
int64_t *offsetsA, const double *B, int ldb,
340+
int64_t *offsetsB, double beta, double *C,
341+
int ldc, int64_t *offsetsC, int batchCount,
337342
size_t stream, void *growing_allocator) {
338343
hipblas_dgemm_wrapper_grouped(resol_id, blas_id, transa, transb, m, n, k,
339344
alpha, A, lda, offsetsA, B, ldb, offsetsB, beta,

src/trans/gpu/algor/hicblas_mod.F90

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
MODULE HICBLAS_MOD
1616

17-
USE EC_PARKIND, ONLY: JPIM, JPRM, JPRD
17+
USE EC_PARKIND, ONLY: JPIM, JPRM, JPRD, JPIB
1818
USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE
1919
USE OPENACC_LIB, ONLY: ACC_GET_HIP_STREAM
2020

@@ -81,10 +81,11 @@ SUBROUTINE HIP_DGEMM_GROUPED( &
8181
& C, LDC, OFFSETC, &
8282
& BATCHCOUNT, STREAM, ALLOC &
8383
&) BIND(C, NAME='hipblas_dgemm_wrapper_grouped')
84-
USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE, C_SIZE_T, C_PTR
84+
USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE, C_SIZE_T, C_PTR, C_INT64_T
8585
CHARACTER(1,C_CHAR), VALUE :: CTA, CTB
8686
INTEGER(C_INT), VALUE :: RESOL_ID, BLAS_ID, M, LDA, LDB, LDC, BATCHCOUNT
87-
INTEGER(C_INT) :: N(*), K(*), OFFSETA(*), OFFSETB(*), OFFSETC(*)
87+
INTEGER(C_INT) :: N(*), K(*)
88+
INTEGER(C_INT64_T) :: OFFSETA(*), OFFSETB(*), OFFSETC(*)
8889
REAL(C_DOUBLE), VALUE :: ALPHA,BETA
8990
REAL(C_DOUBLE) :: A(*), B(*), C(*)
9091
INTEGER(KIND=C_SIZE_T) :: STREAM
@@ -101,10 +102,11 @@ SUBROUTINE HIP_SGEMM_GROUPED( &
101102
& C, LDC, OFFSETC, &
102103
& BATCHCOUNT, STREAM, ALLOC &
103104
&) BIND(C, NAME='hipblas_sgemm_wrapper_grouped')
104-
USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT, C_SIZE_T, C_PTR
105+
USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT, C_SIZE_T, C_PTR, C_INT64_T
105106
CHARACTER(1,C_CHAR), VALUE :: CTA, CTB
106107
INTEGER(C_INT), VALUE :: RESOL_ID, BLAS_ID, M, LDA, LDB, LDC, BATCHCOUNT
107-
INTEGER(C_INT) :: N(*), K(*), OFFSETA(*), OFFSETB(*), OFFSETC(*)
108+
INTEGER(C_INT) :: N(*), K(*)
109+
INTEGER(C_INT64_T) :: OFFSETA(*), OFFSETB(*), OFFSETC(*)
108110
REAL(C_FLOAT), VALUE :: ALPHA,BETA
109111
REAL(C_FLOAT) :: A(*), B(*), C(*)
110112
INTEGER(KIND=C_SIZE_T) :: STREAM
@@ -227,14 +229,14 @@ SUBROUTINE HIP_DGEMM_GROUPED_OVERLOAD( &
227229
REAL(KIND=JPRD) :: ALPHA
228230
REAL(KIND=JPRD), DIMENSION(:) :: AARRAY
229231
INTEGER(KIND=JPIM) :: LDA
230-
INTEGER(KIND=JPIM) :: OFFSETA(:)
232+
INTEGER(KIND=JPIB) :: OFFSETA(:)
231233
REAL(KIND=JPRD), DIMENSION(*) :: BARRAY
232234
INTEGER(KIND=JPIM) :: LDB
233-
INTEGER(KIND=JPIM) :: OFFSETB(:)
235+
INTEGER(KIND=JPIB) :: OFFSETB(:)
234236
REAL(KIND=JPRD) :: BETA
235237
REAL(KIND=JPRD), DIMENSION(:) :: CARRAY
236238
INTEGER(KIND=JPIM) :: LDC
237-
INTEGER(KIND=JPIM) :: OFFSETC(:)
239+
INTEGER(KIND=JPIB) :: OFFSETC(:)
238240
INTEGER(KIND=JPIM) :: BATCHCOUNT
239241
INTEGER(KIND=C_INT) :: STREAM
240242
TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC
@@ -274,14 +276,14 @@ SUBROUTINE HIP_SGEMM_GROUPED_OVERLOAD(&
274276
REAL(KIND=JPRM) :: ALPHA
275277
REAL(KIND=JPRM), DIMENSION(:) :: AARRAY
276278
INTEGER(KIND=JPIM) :: LDA
277-
INTEGER(KIND=JPIM) :: OFFSETA(:)
279+
INTEGER(KIND=JPIB) :: OFFSETA(:)
278280
REAL(KIND=JPRM), DIMENSION(:,:,:) :: BARRAY
279281
INTEGER(KIND=JPIM) :: LDB
280-
INTEGER(KIND=JPIM) :: OFFSETB(:)
282+
INTEGER(KIND=JPIB) :: OFFSETB(:)
281283
REAL(KIND=JPRM) :: BETA
282284
REAL(KIND=JPRM), DIMENSION(:) :: CARRAY
283285
INTEGER(KIND=JPIM) :: LDC
284-
INTEGER(KIND=JPIM) :: OFFSETC(:)
286+
INTEGER(KIND=JPIB) :: OFFSETC(:)
285287
INTEGER(KIND=JPIM) :: BATCHCOUNT
286288
INTEGER(KIND=C_INT) :: STREAM
287289
TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC

0 commit comments

Comments
 (0)