Skip to content

Commit

Permalink
cuda - update shared max matrix sizes
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremylt committed Jan 3, 2025
1 parent 0ff6123 commit 3e42d60
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 5 deletions.
9 changes: 7 additions & 2 deletions backends/cuda-shared/ceed-cuda-shared-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
int CeedInit_CudaInterp(CeedScalar *d_B, CeedInt P_1d, CeedInt Q_1d, CeedScalar **c_B);
int CeedInit_CudaGrad(CeedScalar *d_B, CeedScalar *d_G, CeedInt P_1d, CeedInt Q_1d, CeedScalar **c_B_ptr, CeedScalar **c_G_ptr);
int CeedInit_CudaCollocatedGrad(CeedScalar *d_B, CeedScalar *d_G, CeedInt P_1d, CeedInt Q_1d, CeedScalar **c_B_ptr, CeedScalar **c_G_ptr);
int CeedInit_CudaNonTensor(CeedScalar *d_B, CeedInt dim, CeedInt P, CeedInt Q, CeedScalar **c_B);

//------------------------------------------------------------------------------
// Apply tensor basis
Expand Down Expand Up @@ -456,7 +457,7 @@ static int CeedBasisApplyNonTensorCore_Cuda_shared(CeedBasis basis, bool apply_a
CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &Q));
CeedInt thread = CeedIntMax(Q, P);

CeedCallBackend(CeedInit_CudaInterp(data->d_interp_1d, P, Q, &data->c_B));
CeedCallBackend(CeedInit_CudaNonTensor(data->d_interp_1d, 1, P, Q, &data->c_B));
void *interp_args[] = {(void *)&num_elem, &data->c_B, &d_u, &d_v};

{
Expand All @@ -480,7 +481,7 @@ static int CeedBasisApplyNonTensorCore_Cuda_shared(CeedBasis basis, bool apply_a
CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &Q));
CeedInt thread = CeedIntMax(Q, P);

CeedCallBackend(CeedInit_CudaInterp(data->d_grad_1d, P, Q * dim, &data->c_G));
CeedCallBackend(CeedInit_CudaNonTensor(data->d_grad_1d, 3, P, Q * dim, &data->c_G));
void *grad_args[] = {(void *)&num_elem, &data->c_G, &d_u, &d_v};

{
Expand Down Expand Up @@ -641,6 +642,10 @@ int CeedBasisCreateH1_Cuda_shared(CeedElemTopology topo, CeedInt dim, CeedInt nu
CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
CeedCallBackend(CeedCalloc(1, &data));

// Check max sizes
CeedCheck(dim <= 3, ceed, CEED_ERROR_BACKEND, "Backend does not implement nontensor bases with dim > 3");
CeedCheck(num_nodes * num_qpts * dim < 52 * 52 * 3, ceed, CEED_ERROR_BACKEND, "Backend does not implement nontensor bases with P * Q this large");

// Copy basis data to GPU
CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad));
Expand Down
25 changes: 25 additions & 0 deletions backends/cuda-shared/kernels/cuda-shared-basis-nontensor.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
//
// SPDX-License-Identifier: BSD-2-Clause
//
// This file is part of CEED: http://github.com/ceed

#include <ceed.h>
#include <cuda.h>

const int MAX_SIZE = 52, MAX_DIM = 3;
__constant__ CeedScalar c_B[MAX_SIZE * MAX_SIZE * MAX_DIM];

//------------------------------------------------------------------------------
// Interp device initialization
//------------------------------------------------------------------------------
extern "C" int CeedInit_CudaNonTensor(CeedScalar *d_B, CeedInt P, CeedInt Q, CeedInt dim, CeedScalar **c_B_ptr) {
const int bytes = P * Q * dim * sizeof(CeedScalar);

cudaMemcpyToSymbol(c_B, d_B, bytes, 0, cudaMemcpyDeviceToDevice);
cudaGetSymbolAddress((void **)c_B_ptr, c_B);
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
#include <ceed.h>
#include <cuda.h>

const int sizeMax = 16;
__constant__ CeedScalar c_B[sizeMax * sizeMax];
__constant__ CeedScalar c_G[sizeMax * sizeMax];
const int MAX_SIZE = 16;
__constant__ CeedScalar c_B[MAX_SIZE * MAX_SIZE];
__constant__ CeedScalar c_G[MAX_SIZE * MAX_SIZE];

//------------------------------------------------------------------------------
// Interp device initialization
Expand Down

0 comments on commit 3e42d60

Please sign in to comment.