-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompute_next.cu
More file actions
37 lines (31 loc) · 1.36 KB
/
compute_next.cu
File metadata and controls
37 lines (31 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#include "cuda_helper.h"
__global__
void iter_kernel_jacobi(double *x_old, double *x_new, int npts_x, int npts_y, double *diff);
__global__
void iter_kernel_gs(double *x_old, double *x_new, int npts_x, int npts_y, double *diff, bool is_red);
void compute_next(
double *x_old_d, double *x_old_h,
double *x_new_d, int npts_local_x, int npts_local_y,
double *diff_d, double *diff_h,
int iter_method)
{
// transfer data from host to device
cudaErrCheck(
cudaMemcpy(x_old_d, x_old_h, npts_local_x * npts_local_y * sizeof(double), cudaMemcpyHostToDevice));
cudaErrCheck(cudaMemset(diff_d, 0, sizeof(double)));
dim3 block(8, 8);
dim3 grid((npts_local_x - 2 + block.x - 1) / block.x,
(npts_local_y - 2 + block.y - 1) / block.y);
if(iter_method == 0)
{
iter_kernel_jacobi<<<grid, block>>>(x_old_d, x_new_d, npts_local_x, npts_local_y, diff_d);
}
else
{
iter_kernel_gs<<<grid, block>>>(x_old_d, x_new_d, npts_local_x, npts_local_y, diff_d, true);
iter_kernel_gs<<<grid, block>>>(x_old_d, x_new_d, npts_local_x, npts_local_y, diff_d, false);
}
// transfer data from device to host
cudaErrCheck(cudaMemcpy(x_old_h, x_new_d, npts_local_x * npts_local_y * sizeof(double), cudaMemcpyDeviceToHost));
cudaErrCheck(cudaMemcpy(diff_h, diff_d, sizeof(double), cudaMemcpyDeviceToHost));
}