@@ -194,7 +194,9 @@ static int CeedBasisApplyAtPointsCore_Cuda(CeedBasis basis, bool apply_add, cons
194
194
"BASIS_NUM_NODES" , CeedIntPow (P_1d , dim ), "BASIS_NUM_QPTS" , CeedIntPow (Q_1d , dim ), "BASIS_NUM_PTS" ,
195
195
max_num_points , "POINTS_BUFF_LEN" , CeedIntPow (Q_1d , dim - 1 )));
196
196
CeedCallBackend (CeedGetKernel_Cuda (ceed , data -> moduleAtPoints , "InterpAtPoints" , & data -> InterpAtPoints ));
197
+ CeedCallBackend (CeedGetKernel_Cuda (ceed , data -> moduleAtPoints , "InterpTransposeAtPoints" , & data -> InterpTransposeAtPoints ));
197
198
CeedCallBackend (CeedGetKernel_Cuda (ceed , data -> moduleAtPoints , "GradAtPoints" , & data -> GradAtPoints ));
199
+ CeedCallBackend (CeedGetKernel_Cuda (ceed , data -> moduleAtPoints , "GradTransposeAtPoints" , & data -> GradTransposeAtPoints ));
198
200
}
199
201
200
202
// Get read/write access to u, v
@@ -220,16 +222,17 @@ static int CeedBasisApplyAtPointsCore_Cuda(CeedBasis basis, bool apply_add, cons
220
222
// Basis action
221
223
switch (eval_mode ) {
222
224
case CEED_EVAL_INTERP : {
223
- void * interp_args [] = {( void * ) & num_elem , (void * )& is_transpose , & data -> d_chebyshev_interp_1d , & data -> d_points_per_elem , & d_x , & d_u , & d_v };
224
- const CeedInt block_size = CeedIntMin (CeedIntPow (Q_1d , dim ), max_block_size );
225
+ void * interp_args [] = { (void * )& num_elem , & data -> d_chebyshev_interp_1d , & data -> d_points_per_elem , & d_x , & d_u , & d_v };
226
+ const CeedInt block_size = CeedIntMin (CeedIntPow (Q_1d , dim ), max_block_size );
225
227
226
- CeedCallBackend (CeedRunKernel_Cuda (ceed , data -> InterpAtPoints , num_elem , block_size , interp_args ));
228
+ CeedCallBackend (
229
+ CeedRunKernel_Cuda (ceed , is_transpose ? data -> InterpTransposeAtPoints : data -> InterpAtPoints , num_elem , block_size , interp_args ));
227
230
} break ;
228
231
case CEED_EVAL_GRAD : {
229
- void * grad_args [] = {(void * )& num_elem , ( void * ) & is_transpose , & data -> d_chebyshev_interp_1d , & data -> d_points_per_elem , & d_x , & d_u , & d_v };
230
- const CeedInt block_size = CeedIntMin (CeedIntPow (Q_1d , dim ), max_block_size );
232
+ void * grad_args [] = {(void * )& num_elem , & data -> d_chebyshev_interp_1d , & data -> d_points_per_elem , & d_x , & d_u , & d_v };
233
+ const CeedInt block_size = CeedIntMin (CeedIntPow (Q_1d , dim ), max_block_size );
231
234
232
- CeedCallBackend (CeedRunKernel_Cuda (ceed , data -> GradAtPoints , num_elem , block_size , grad_args ));
235
+ CeedCallBackend (CeedRunKernel_Cuda (ceed , is_transpose ? data -> GradTransposeAtPoints : data -> GradAtPoints , num_elem , block_size , grad_args ));
233
236
} break ;
234
237
case CEED_EVAL_WEIGHT :
235
238
case CEED_EVAL_NONE : /* handled separately below */
0 commit comments