Skip to content

Commit 1dc8b1e

Browse files
authored
Merge pull request #1696 from CEED/jeremy/jit-include
JiT include update
2 parents 95f7ac9 + 6a96780 commit 1dc8b1e

File tree

184 files changed

+529
-663
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

184 files changed

+529
-663
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ $(OBJDIR)/%.o : $(CURDIR)/%.sycl.cpp | $$(@D)/.DIR
572572
$(call quiet,SYCLCXX) $(SYCLFLAGS) $(CPPFLAGS) -c -o $@ $(abspath $<)
573573

574574
$(OBJDIR)/%$(EXE_SUFFIX) : tests/%.c | $$(@D)/.DIR
575-
$(call quiet,LINK.c) $(CEED_LDFLAGS) -o $@ $(abspath $<) $(CEED_LIBS) $(CEED_LDLIBS) $(LDLIBS)
575+
$(call quiet,LINK.c) $(CEED_LDFLAGS) -o $@ $(abspath $<) $(CEED_LIBS) $(CEED_LDLIBS) $(LDLIBS) -I./tests/test-include
576576

577577
$(OBJDIR)/%$(EXE_SUFFIX) : tests/%.f90 | $$(@D)/.DIR
578578
$(call quiet,LINK.F) -DSOURCE_DIR='"$(abspath $(<D))/"' $(CEED_LDFLAGS) -o $@ $(abspath $<) $(CEED_LIBS) $(CEED_LDLIBS) $(LDLIBS)

backends/avx/ceed-avx-tensor.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#include <immintrin.h>
1111
#include <stdbool.h>
1212

13-
#ifdef CEED_F64_H
13+
#ifdef CEED_SCALAR_IS_FP64
1414
#define rtype __m256d
1515
#define loadu _mm256_loadu_pd
1616
#define storeu _mm256_storeu_pd

backends/cuda-gen/ceed-cuda-gen-operator-build.cpp

+12-33
Original file line numberDiff line numberDiff line change
@@ -696,42 +696,17 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op) {
696696
CeedCallBackend(CeedGetData(ceed, &ceed_data));
697697
CeedCallBackend(cudaGetDeviceProperties(&prop, ceed_data->device_id));
698698
if ((prop.major < 6) && (CEED_SCALAR_TYPE != CEED_SCALAR_FP32)) {
699-
char *atomic_add_source;
700-
const char *atomic_add_path;
701-
702-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-atomic-add-fallback.h", &atomic_add_path));
703-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Atomic Add Source -----\n");
704-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, atomic_add_path, &atomic_add_source));
705-
code << atomic_add_source;
706-
CeedCallBackend(CeedFree(&atomic_add_path));
707-
CeedCallBackend(CeedFree(&atomic_add_source));
699+
code << "// AtomicAdd fallback source\n";
700+
code << "#include <ceed/jit-source/cuda/cuda-atomic-add-fallback.h>\n\n";
708701
}
709702
}
710703

711704
// Load basis source files
712705
// TODO: Add non-tensor, AtPoints
713-
{
714-
char *tensor_basis_kernel_source;
715-
const char *tensor_basis_kernel_path;
716-
717-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-shared-basis-tensor-templates.h", &tensor_basis_kernel_path));
718-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Tensor Basis Kernel Source -----\n");
719-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, tensor_basis_kernel_path, &tensor_basis_kernel_source));
720-
code << tensor_basis_kernel_source;
721-
CeedCallBackend(CeedFree(&tensor_basis_kernel_path));
722-
CeedCallBackend(CeedFree(&tensor_basis_kernel_source));
723-
}
724-
{
725-
char *cuda_gen_template_source;
726-
const char *cuda_gen_template_path;
727-
728-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-gen-templates.h", &cuda_gen_template_path));
729-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Cuda-Gen Template Source -----\n");
730-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, cuda_gen_template_path, &cuda_gen_template_source));
731-
code << cuda_gen_template_source;
732-
CeedCallBackend(CeedFree(&cuda_gen_template_path));
733-
CeedCallBackend(CeedFree(&cuda_gen_template_source));
734-
}
706+
code << "// Tensor basis source\n";
707+
code << "#include <ceed/jit-source/cuda/cuda-shared-basis-tensor-templates.h>\n\n";
708+
code << "// CodeGen operator source\n";
709+
code << "#include <ceed/jit-source/cuda/cuda-gen-templates.h>\n\n";
735710

736711
// Get QFunction name
737712
std::string qfunction_name(qf_data->qfunction_name);
@@ -749,9 +724,13 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op) {
749724

750725
// Add user QFunction source
751726
{
752-
std::string qfunction_source(qf_data->qfunction_source);
727+
const char *source_path;
728+
729+
CeedCallBackend(CeedQFunctionGetSourcePath(qf, &source_path));
730+
CeedCheck(source_path, ceed, CEED_ERROR_UNSUPPORTED, "/gpu/cuda/gen backend requires QFunction source code file");
753731

754-
code << qfunction_source;
732+
code << "// User QFunction source\n";
733+
code << "#include \"" << source_path << "\"\n\n";
755734
}
756735

757736
// Setup

backends/cuda-gen/ceed-cuda-gen-qfunction.c

-5
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ static int CeedQFunctionDestroy_Cuda_gen(CeedQFunction qf) {
2727

2828
CeedCallBackend(CeedQFunctionGetData(qf, &data));
2929
CeedCallCuda(CeedQFunctionReturnCeed(qf), cudaFree(data->d_c));
30-
CeedCallBackend(CeedFree(&data->qfunction_source));
3130
CeedCallBackend(CeedFree(&data));
3231
return CEED_ERROR_SUCCESS;
3332
}
@@ -45,10 +44,6 @@ int CeedQFunctionCreate_Cuda_gen(CeedQFunction qf) {
4544

4645
// Read QFunction source
4746
CeedCallBackend(CeedQFunctionGetKernelName(qf, &data->qfunction_name));
48-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source -----\n");
49-
CeedCallBackend(CeedQFunctionLoadSourceToBuffer(qf, &data->qfunction_source));
50-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source Complete! -----\n");
51-
CeedCheck(data->qfunction_source, ceed, CEED_ERROR_UNSUPPORTED, "/gpu/cuda/gen backend requires QFunction source code file");
5247

5348
CeedCallBackend(CeedSetBackendFunction(ceed, "QFunction", qf, "Apply", CeedQFunctionApply_Cuda_gen));
5449
CeedCallBackend(CeedSetBackendFunction(ceed, "QFunction", qf, "Destroy", CeedQFunctionDestroy_Cuda_gen));

backends/cuda-gen/ceed-cuda-gen.h

-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ typedef struct {
2626

2727
typedef struct {
2828
const char *qfunction_name;
29-
const char *qfunction_source;
3029
void *d_c;
3130
} CeedQFunction_Cuda_gen;
3231

backends/cuda-ref/ceed-cuda-ref-basis.c

+10-41
Original file line numberDiff line numberDiff line change
@@ -182,24 +182,17 @@ static int CeedBasisApplyAtPointsCore_Cuda(CeedBasis basis, bool apply_add, cons
182182
}
183183

184184
// -- Compile kernels
185-
char *basis_kernel_source;
186-
const char *basis_kernel_path;
187-
CeedInt num_comp;
185+
const char basis_kernel_source[] = "// AtPoints basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-tensor-at-points.h>\n";
186+
CeedInt num_comp;
188187

189188
if (data->moduleAtPoints) CeedCallCuda(ceed, cuModuleUnload(data->moduleAtPoints));
190189
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
191-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-tensor-at-points.h", &basis_kernel_path));
192-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
193-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
194-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
195190
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
196191
Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
197192
"BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS",
198193
max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1)));
199194
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints));
200195
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints));
201-
CeedCallBackend(CeedFree(&basis_kernel_path));
202-
CeedCallBackend(CeedFree(&basis_kernel_source));
203196
}
204197

205198
// Get read/write access to u, v
@@ -419,8 +412,6 @@ static int CeedBasisDestroyNonTensor_Cuda(CeedBasis basis) {
419412
int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d,
420413
const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) {
421414
Ceed ceed;
422-
char *basis_kernel_source;
423-
const char *basis_kernel_path;
424415
CeedInt num_comp;
425416
const CeedInt q_bytes = Q_1d * sizeof(CeedScalar);
426417
const CeedInt interp_bytes = q_bytes * P_1d;
@@ -440,19 +431,15 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const
440431
CeedCallCuda(ceed, cudaMemcpy(data->d_grad_1d, grad_1d, interp_bytes, cudaMemcpyHostToDevice));
441432

442433
// Compile basis kernels
434+
const char basis_kernel_source[] = "// Tensor basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-tensor.h>\n";
435+
443436
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
444-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-tensor.h", &basis_kernel_path));
445-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
446-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
447-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
448437
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
449438
Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
450439
"BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim)));
451440
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
452441
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Grad", &data->Grad));
453442
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight));
454-
CeedCallBackend(CeedFree(&basis_kernel_path));
455-
CeedCallBackend(CeedFree(&basis_kernel_source));
456443

457444
CeedCallBackend(CeedBasisSetData(basis, data));
458445

@@ -471,8 +458,6 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const
471458
int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad,
472459
const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
473460
Ceed ceed;
474-
char *basis_kernel_source;
475-
const char *basis_kernel_path;
476461
CeedInt num_comp, q_comp_interp, q_comp_grad;
477462
const CeedInt q_bytes = num_qpts * sizeof(CeedScalar);
478463
CeedBasisNonTensor_Cuda *data;
@@ -501,20 +486,16 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes
501486
}
502487

503488
// Compile basis kernels
489+
const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-nontensor.h>\n";
490+
504491
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
505-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h", &basis_kernel_path));
506-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
507-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
508-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
509492
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
510493
q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp));
511494
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
512495
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
513496
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Deriv", &data->Deriv));
514497
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
515498
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight));
516-
CeedCallBackend(CeedFree(&basis_kernel_path));
517-
CeedCallBackend(CeedFree(&basis_kernel_source));
518499

519500
CeedCallBackend(CeedBasisSetData(basis, data));
520501

@@ -531,8 +512,6 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes
531512
int CeedBasisCreateHdiv_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *div,
532513
const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
533514
Ceed ceed;
534-
char *basis_kernel_source;
535-
const char *basis_kernel_path;
536515
CeedInt num_comp, q_comp_interp, q_comp_div;
537516
const CeedInt q_bytes = num_qpts * sizeof(CeedScalar);
538517
CeedBasisNonTensor_Cuda *data;
@@ -561,20 +540,16 @@ int CeedBasisCreateHdiv_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nod
561540
}
562541

563542
// Compile basis kernels
543+
const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-nontensor.h>\n";
544+
564545
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
565-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h", &basis_kernel_path));
566-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
567-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
568-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
569546
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
570547
q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp));
571548
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
572549
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
573550
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Deriv", &data->Deriv));
574551
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
575552
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight));
576-
CeedCallBackend(CeedFree(&basis_kernel_path));
577-
CeedCallBackend(CeedFree(&basis_kernel_source));
578553

579554
CeedCallBackend(CeedBasisSetData(basis, data));
580555

@@ -591,8 +566,6 @@ int CeedBasisCreateHdiv_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nod
591566
int CeedBasisCreateHcurl_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
592567
const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
593568
Ceed ceed;
594-
char *basis_kernel_source;
595-
const char *basis_kernel_path;
596569
CeedInt num_comp, q_comp_interp, q_comp_curl;
597570
const CeedInt q_bytes = num_qpts * sizeof(CeedScalar);
598571
CeedBasisNonTensor_Cuda *data;
@@ -621,20 +594,16 @@ int CeedBasisCreateHcurl_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_no
621594
}
622595

623596
// Compile basis kernels
597+
const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-nontensor.h>\n";
598+
624599
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
625-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h", &basis_kernel_path));
626-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
627-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
628-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
629600
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
630601
q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp));
631602
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
632603
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
633604
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Deriv", &data->Deriv));
634605
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
635606
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight));
636-
CeedCallBackend(CeedFree(&basis_kernel_path));
637-
CeedCallBackend(CeedFree(&basis_kernel_source));
638607

639608
CeedCallBackend(CeedBasisSetData(basis, data));
640609

0 commit comments

Comments
 (0)