Skip to content

Commit 3e42d60

Browse files
committed
cuda - update shared max matrix sizes
1 parent 0ff6123 commit 3e42d60

File tree

3 files changed

+35
-5
lines changed

3 files changed

+35
-5
lines changed

backends/cuda-shared/ceed-cuda-shared-basis.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
int CeedInit_CudaInterp(CeedScalar *d_B, CeedInt P_1d, CeedInt Q_1d, CeedScalar **c_B);
2525
int CeedInit_CudaGrad(CeedScalar *d_B, CeedScalar *d_G, CeedInt P_1d, CeedInt Q_1d, CeedScalar **c_B_ptr, CeedScalar **c_G_ptr);
2626
int CeedInit_CudaCollocatedGrad(CeedScalar *d_B, CeedScalar *d_G, CeedInt P_1d, CeedInt Q_1d, CeedScalar **c_B_ptr, CeedScalar **c_G_ptr);
27+
int CeedInit_CudaNonTensor(CeedScalar *d_B, CeedInt dim, CeedInt P, CeedInt Q, CeedScalar **c_B);
2728

2829
//------------------------------------------------------------------------------
2930
// Apply tensor basis
@@ -456,7 +457,7 @@ static int CeedBasisApplyNonTensorCore_Cuda_shared(CeedBasis basis, bool apply_a
456457
CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &Q));
457458
CeedInt thread = CeedIntMax(Q, P);
458459

459-
CeedCallBackend(CeedInit_CudaInterp(data->d_interp_1d, P, Q, &data->c_B));
460+
CeedCallBackend(CeedInit_CudaNonTensor(data->d_interp_1d, 1, P, Q, &data->c_B));
460461
void *interp_args[] = {(void *)&num_elem, &data->c_B, &d_u, &d_v};
461462

462463
{
@@ -480,7 +481,7 @@ static int CeedBasisApplyNonTensorCore_Cuda_shared(CeedBasis basis, bool apply_a
480481
CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &Q));
481482
CeedInt thread = CeedIntMax(Q, P);
482483

483-
CeedCallBackend(CeedInit_CudaInterp(data->d_grad_1d, P, Q * dim, &data->c_G));
484+
CeedCallBackend(CeedInit_CudaNonTensor(data->d_grad_1d, 3, P, Q * dim, &data->c_G));
484485
void *grad_args[] = {(void *)&num_elem, &data->c_G, &d_u, &d_v};
485486

486487
{
@@ -641,6 +642,10 @@ int CeedBasisCreateH1_Cuda_shared(CeedElemTopology topo, CeedInt dim, CeedInt nu
641642
CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
642643
CeedCallBackend(CeedCalloc(1, &data));
643644

645+
// Check max sizes
646+
CeedCheck(dim <= 3, ceed, CEED_ERROR_BACKEND, "Backend does not implement nontensor bases with dim > 3");
647+
CeedCheck(num_nodes * num_qpts * dim < 52 * 52 * 3, ceed, CEED_ERROR_BACKEND, "Backend does not implement nontensor bases with P * Q this large");
648+
644649
// Copy basis data to GPU
645650
CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
646651
CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad));
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
2+
// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3+
//
4+
// SPDX-License-Identifier: BSD-2-Clause
5+
//
6+
// This file is part of CEED: http://github.com/ceed
7+
8+
#include <ceed.h>
9+
#include <cuda.h>
10+
11+
const int MAX_SIZE = 52, MAX_DIM = 3;
12+
__constant__ CeedScalar c_B[MAX_SIZE * MAX_SIZE * MAX_DIM];
13+
14+
//------------------------------------------------------------------------------
15+
// Interp device initialization
16+
//------------------------------------------------------------------------------
17+
extern "C" int CeedInit_CudaNonTensor(CeedScalar *d_B, CeedInt P, CeedInt Q, CeedInt dim, CeedScalar **c_B_ptr) {
18+
const int bytes = P * Q * dim * sizeof(CeedScalar);
19+
20+
cudaMemcpyToSymbol(c_B, d_B, bytes, 0, cudaMemcpyDeviceToDevice);
21+
cudaGetSymbolAddress((void **)c_B_ptr, c_B);
22+
return CEED_ERROR_SUCCESS;
23+
}
24+
25+
//------------------------------------------------------------------------------

backends/cuda-shared/kernels/cuda-shared-basis.cu renamed to backends/cuda-shared/kernels/cuda-shared-basis-tensor.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
#include <ceed.h>
99
#include <cuda.h>
1010

11-
const int sizeMax = 16;
12-
__constant__ CeedScalar c_B[sizeMax * sizeMax];
13-
__constant__ CeedScalar c_G[sizeMax * sizeMax];
11+
const int MAX_SIZE = 16;
12+
__constant__ CeedScalar c_B[MAX_SIZE * MAX_SIZE];
13+
__constant__ CeedScalar c_G[MAX_SIZE * MAX_SIZE];
1414

1515
//------------------------------------------------------------------------------
1616
// Interp device initialization

0 commit comments

Comments
 (0)