Skip to content

Commit 0289d84

Browse files
committed
minor - fix style in gen templates
1 parent 8176a24 commit 0289d84

File tree

2 files changed

+53
-31
lines changed

2 files changed

+53
-31
lines changed

include/ceed/jit-source/cuda/cuda-gen-templates.h

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -179,13 +179,14 @@ inline __device__ void WriteLVecStrided2d(SharedData_Cuda &data, const CeedInt e
179179
template <int NUM_COMP, int COMP_STRIDE, int P_1d>
180180
inline __device__ void ReadLVecStandard3d(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedInt *__restrict__ indices,
181181
const CeedScalar *__restrict__ d_u, CeedScalar *__restrict__ r_u) {
182-
if (data.t_id_x < P_1d && data.t_id_y < P_1d)
182+
if (data.t_id_x < P_1d && data.t_id_y < P_1d) {
183183
for (CeedInt z = 0; z < P_1d; z++) {
184184
const CeedInt node = data.t_id_x + data.t_id_y * P_1d + z * P_1d * P_1d;
185185
const CeedInt ind = indices[node + elem * P_1d * P_1d * P_1d];
186186

187187
for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[z + comp * P_1d] = d_u[ind + COMP_STRIDE * comp];
188188
}
189+
}
189190
}
190191

191192
//------------------------------------------------------------------------------
@@ -194,13 +195,14 @@ inline __device__ void ReadLVecStandard3d(SharedData_Cuda &data, const CeedInt n
194195
template <int NUM_COMP, int P_1d, int STRIDES_NODE, int STRIDES_COMP, int STRIDES_ELEM>
195196
inline __device__ void ReadLVecStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedScalar *__restrict__ d_u,
196197
CeedScalar *__restrict__ r_u) {
197-
if (data.t_id_x < P_1d && data.t_id_y < P_1d)
198+
if (data.t_id_x < P_1d && data.t_id_y < P_1d) {
198199
for (CeedInt z = 0; z < P_1d; z++) {
199200
const CeedInt node = data.t_id_x + data.t_id_y * P_1d + z * P_1d * P_1d;
200201
const CeedInt ind = node * STRIDES_NODE + elem * STRIDES_ELEM;
201202

202203
for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[z + comp * P_1d] = d_u[ind + comp * STRIDES_COMP];
203204
}
205+
}
204206
}
205207

206208
//------------------------------------------------------------------------------
@@ -238,13 +240,14 @@ inline __device__ void ReadEVecSliceStrided3d(SharedData_Cuda &data, const CeedI
238240
template <int NUM_COMP, int COMP_STRIDE, int P_1d>
239241
inline __device__ void WriteLVecStandard3d(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedInt *__restrict__ indices,
240242
const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) {
241-
if (data.t_id_x < P_1d && data.t_id_y < P_1d)
243+
if (data.t_id_x < P_1d && data.t_id_y < P_1d) {
242244
for (CeedInt z = 0; z < P_1d; z++) {
243245
const CeedInt node = data.t_id_x + data.t_id_y * P_1d + z * P_1d * P_1d;
244246
const CeedInt ind = indices[node + elem * P_1d * P_1d * P_1d];
245247

246248
for (CeedInt comp = 0; comp < NUM_COMP; comp++) atomicAdd(&d_v[ind + COMP_STRIDE * comp], r_v[z + comp * P_1d]);
247249
}
250+
}
248251
}
249252

250253
//------------------------------------------------------------------------------
@@ -253,13 +256,14 @@ inline __device__ void WriteLVecStandard3d(SharedData_Cuda &data, const CeedInt
253256
template <int NUM_COMP, int P_1d, int STRIDES_NODE, int STRIDES_COMP, int STRIDES_ELEM>
254257
inline __device__ void WriteLVecStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedScalar *__restrict__ r_v,
255258
CeedScalar *__restrict__ d_v) {
256-
if (data.t_id_x < P_1d && data.t_id_y < P_1d)
259+
if (data.t_id_x < P_1d && data.t_id_y < P_1d) {
257260
for (CeedInt z = 0; z < P_1d; z++) {
258261
const CeedInt node = data.t_id_x + data.t_id_y * P_1d + z * P_1d * P_1d;
259262
const CeedInt ind = node * STRIDES_NODE + elem * STRIDES_ELEM;
260263

261264
for (CeedInt comp = 0; comp < NUM_COMP; comp++) d_v[ind + comp * STRIDES_COMP] += r_v[z + comp * P_1d];
262265
}
266+
}
263267
}
264268

265269
//------------------------------------------------------------------------------
@@ -274,15 +278,19 @@ inline __device__ void GradColloSlice3d(SharedData_Cuda &data, const CeedInt q,
274278
__syncthreads();
275279
// X derivative
276280
r_V[comp + 0 * NUM_COMP] = 0.0;
277-
for (CeedInt i = 0; i < Q_1d; i++)
278-
r_V[comp + 0 * NUM_COMP] += c_G[i + data.t_id_x * Q_1d] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction (X derivative)
281+
for (CeedInt i = 0; i < Q_1d; i++) {
282+
r_V[comp + 0 * NUM_COMP] += c_G[i + data.t_id_x * Q_1d] * data.slice[i + data.t_id_y * T_1D];
283+
}
279284
// Y derivative
280285
r_V[comp + 1 * NUM_COMP] = 0.0;
281-
for (CeedInt i = 0; i < Q_1d; i++)
282-
r_V[comp + 1 * NUM_COMP] += c_G[i + data.t_id_y * Q_1d] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction (Y derivative)
286+
for (CeedInt i = 0; i < Q_1d; i++) {
287+
r_V[comp + 1 * NUM_COMP] += c_G[i + data.t_id_y * Q_1d] * data.slice[data.t_id_x + i * T_1D];
288+
}
283289
// Z derivative
284290
r_V[comp + 2 * NUM_COMP] = 0.0;
285-
for (CeedInt i = 0; i < Q_1d; i++) r_V[comp + 2 * NUM_COMP] += c_G[i + q * Q_1d] * r_U[i + comp * Q_1d]; // Contract z direction (Z derivative)
291+
for (CeedInt i = 0; i < Q_1d; i++) {
292+
r_V[comp + 2 * NUM_COMP] += c_G[i + q * Q_1d] * r_U[i + comp * Q_1d];
293+
}
286294
__syncthreads();
287295
}
288296
}
@@ -296,21 +304,24 @@ inline __device__ void GradColloSliceTranspose3d(SharedData_Cuda &data, const Ce
296304
CeedScalar *__restrict__ r_V) {
297305
if (data.t_id_x < Q_1d && data.t_id_y < Q_1d) {
298306
for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
299-
// X derivative
300307
data.slice[data.t_id_x + data.t_id_y * T_1D] = r_U[comp + 0 * NUM_COMP];
301308
__syncthreads();
302-
for (CeedInt i = 0; i < Q_1d; i++)
303-
r_V[q + comp * Q_1d] += c_G[data.t_id_x + i * Q_1d] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction (X derivative)
309+
// X derivative
310+
for (CeedInt i = 0; i < Q_1d; i++) {
311+
r_V[q + comp * Q_1d] += c_G[data.t_id_x + i * Q_1d] * data.slice[i + data.t_id_y * T_1D];
312+
}
304313
__syncthreads();
305314
// Y derivative
306315
data.slice[data.t_id_x + data.t_id_y * T_1D] = r_U[comp + 1 * NUM_COMP];
307316
__syncthreads();
308-
for (CeedInt i = 0; i < Q_1d; i++)
309-
r_V[q + comp * Q_1d] += c_G[data.t_id_y + i * Q_1d] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction (Y derivative)
317+
for (CeedInt i = 0; i < Q_1d; i++) {
318+
r_V[q + comp * Q_1d] += c_G[data.t_id_y + i * Q_1d] * data.slice[data.t_id_x + i * T_1D];
319+
}
310320
__syncthreads();
311321
// Z derivative
312-
for (CeedInt i = 0; i < Q_1d; i++)
313-
r_V[i + comp * Q_1d] += c_G[i + q * Q_1d] * r_U[comp + 2 * NUM_COMP]; // PARTIAL contract z direction (Z derivative)
322+
for (CeedInt i = 0; i < Q_1d; i++) {
323+
r_V[i + comp * Q_1d] += c_G[i + q * Q_1d] * r_U[comp + 2 * NUM_COMP];
324+
}
314325
}
315326
}
316327
}

include/ceed/jit-source/hip/hip-gen-templates.h

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -145,27 +145,29 @@ inline __device__ void WriteLVecStrided2d(SharedData_Hip &data, const CeedInt el
145145
template <int NUM_COMP, int COMP_STRIDE, int P_1d>
146146
inline __device__ void ReadLVecStandard3d(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedInt *__restrict__ indices,
147147
const CeedScalar *__restrict__ d_u, CeedScalar *__restrict__ r_u) {
148-
if (data.t_id_x < P_1d && data.t_id_y < P_1d)
148+
if (data.t_id_x < P_1d && data.t_id_y < P_1d) {
149149
for (CeedInt z = 0; z < P_1d; z++) {
150150
const CeedInt node = data.t_id_x + data.t_id_y * P_1d + z * P_1d * P_1d;
151151
const CeedInt ind = indices[node + elem * P_1d * P_1d * P_1d];
152152

153153
for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[z + comp * P_1d] = d_u[ind + COMP_STRIDE * comp];
154154
}
155+
}
155156
}
156157

157158
//------------------------------------------------------------------------------
158159
// L-vector -> E-vector, strided
159160
//------------------------------------------------------------------------------
160161
template <int NUM_COMP, int P_1d, int STRIDES_NODE, int STRIDES_COMP, int STRIDES_ELEM>
161162
inline __device__ void ReadLVecStrided3d(SharedData_Hip &data, const CeedInt elem, const CeedScalar *__restrict__ d_u, CeedScalar *__restrict__ r_u) {
162-
if (data.t_id_x < P_1d && data.t_id_y < P_1d)
163+
if (data.t_id_x < P_1d && data.t_id_y < P_1d) {
163164
for (CeedInt z = 0; z < P_1d; z++) {
164165
const CeedInt node = data.t_id_x + data.t_id_y * P_1d + z * P_1d * P_1d;
165166
const CeedInt ind = node * STRIDES_NODE + elem * STRIDES_ELEM;
166167

167168
for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[z + comp * P_1d] = d_u[ind + comp * STRIDES_COMP];
168169
}
170+
}
169171
}
170172

171173
//------------------------------------------------------------------------------
@@ -203,13 +205,14 @@ inline __device__ void ReadEVecSliceStrided3d(SharedData_Hip &data, const CeedIn
203205
template <int NUM_COMP, int COMP_STRIDE, int P_1d>
204206
inline __device__ void WriteLVecStandard3d(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedInt *__restrict__ indices,
205207
const CeedScalar *__restrict__ r_v, CeedScalar *__restrict__ d_v) {
206-
if (data.t_id_x < P_1d && data.t_id_y < P_1d)
208+
if (data.t_id_x < P_1d && data.t_id_y < P_1d) {
207209
for (CeedInt z = 0; z < P_1d; z++) {
208210
const CeedInt node = data.t_id_x + data.t_id_y * P_1d + z * P_1d * P_1d;
209211
const CeedInt ind = indices[node + elem * P_1d * P_1d * P_1d];
210212

211213
for (CeedInt comp = 0; comp < NUM_COMP; comp++) atomicAdd(&d_v[ind + COMP_STRIDE * comp], r_v[z + comp * P_1d]);
212214
}
215+
}
213216
}
214217

215218
//------------------------------------------------------------------------------
@@ -218,13 +221,14 @@ inline __device__ void WriteLVecStandard3d(SharedData_Hip &data, const CeedInt n
218221
template <int NUM_COMP, int P_1d, int STRIDES_NODE, int STRIDES_COMP, int STRIDES_ELEM>
219222
inline __device__ void WriteLVecStrided3d(SharedData_Hip &data, const CeedInt elem, const CeedScalar *__restrict__ r_v,
220223
CeedScalar *__restrict__ d_v) {
221-
if (data.t_id_x < P_1d && data.t_id_y < P_1d)
224+
if (data.t_id_x < P_1d && data.t_id_y < P_1d) {
222225
for (CeedInt z = 0; z < P_1d; z++) {
223226
const CeedInt node = data.t_id_x + data.t_id_y * P_1d + z * P_1d * P_1d;
224227
const CeedInt ind = node * STRIDES_NODE + elem * STRIDES_ELEM;
225228

226229
for (CeedInt comp = 0; comp < NUM_COMP; comp++) d_v[ind + comp * STRIDES_COMP] += r_v[z + comp * P_1d];
227230
}
231+
}
228232
}
229233

230234
//------------------------------------------------------------------------------
@@ -239,15 +243,19 @@ inline __device__ void GradColloSlice3d(SharedData_Hip &data, const CeedInt q, c
239243
__syncthreads();
240244
// X derivative
241245
r_V[comp + 0 * NUM_COMP] = 0.0;
242-
for (CeedInt i = 0; i < Q_1d; i++)
243-
r_V[comp + 0 * NUM_COMP] += c_G[i + data.t_id_x * Q_1d] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction (X derivative)
246+
for (CeedInt i = 0; i < Q_1d; i++) {
247+
r_V[comp + 0 * NUM_COMP] += c_G[i + data.t_id_x * Q_1d] * data.slice[i + data.t_id_y * T_1D];
248+
}
244249
// Y derivative
245250
r_V[comp + 1 * NUM_COMP] = 0.0;
246-
for (CeedInt i = 0; i < Q_1d; i++)
247-
r_V[comp + 1 * NUM_COMP] += c_G[i + data.t_id_y * Q_1d] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction (Y derivative)
251+
for (CeedInt i = 0; i < Q_1d; i++) {
252+
r_V[comp + 1 * NUM_COMP] += c_G[i + data.t_id_y * Q_1d] * data.slice[data.t_id_x + i * T_1D];
253+
}
248254
// Z derivative
249255
r_V[comp + 2 * NUM_COMP] = 0.0;
250-
for (CeedInt i = 0; i < Q_1d; i++) r_V[comp + 2 * NUM_COMP] += c_G[i + q * Q_1d] * r_U[i + comp * Q_1d]; // Contract z direction (Z derivative)
256+
for (CeedInt i = 0; i < Q_1d; i++) {
257+
r_V[comp + 2 * NUM_COMP] += c_G[i + q * Q_1d] * r_U[i + comp * Q_1d];
258+
}
251259
__syncthreads();
252260
}
253261
}
@@ -264,18 +272,21 @@ inline __device__ void GradColloSliceTranspose3d(SharedData_Hip &data, const Cee
264272
// X derivative
265273
data.slice[data.t_id_x + data.t_id_y * T_1D] = r_U[comp + 0 * NUM_COMP];
266274
__syncthreads();
267-
for (CeedInt i = 0; i < Q_1d; i++)
268-
r_V[q + comp * Q_1d] += c_G[data.t_id_x + i * Q_1d] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction (X derivative)
275+
for (CeedInt i = 0; i < Q_1d; i++) {
276+
r_V[q + comp * Q_1d] += c_G[data.t_id_x + i * Q_1d] * data.slice[i + data.t_id_y * T_1D];
277+
}
269278
__syncthreads();
270279
// Y derivative
271280
data.slice[data.t_id_x + data.t_id_y * T_1D] = r_U[comp + 1 * NUM_COMP];
272281
__syncthreads();
273-
for (CeedInt i = 0; i < Q_1d; i++)
274-
r_V[q + comp * Q_1d] += c_G[data.t_id_y + i * Q_1d] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction (Y derivative)
282+
for (CeedInt i = 0; i < Q_1d; i++) {
283+
r_V[q + comp * Q_1d] += c_G[data.t_id_y + i * Q_1d] * data.slice[data.t_id_x + i * T_1D];
284+
}
275285
__syncthreads();
276286
// Z derivative
277-
for (CeedInt i = 0; i < Q_1d; i++)
278-
r_V[i + comp * Q_1d] += c_G[i + q * Q_1d] * r_U[comp + 2 * NUM_COMP]; // PARTIAL contract z direction (Z derivative)
287+
for (CeedInt i = 0; i < Q_1d; i++) {
288+
r_V[i + comp * Q_1d] += c_G[i + q * Q_1d] * r_U[comp + 2 * NUM_COMP];
289+
}
279290
}
280291
}
281292
}

0 commit comments

Comments
 (0)