1
+ // hello world
2
+
3
+ #include < Meta/CUDA.h>
4
+
5
+ #define GetPhi (phi,x,y,w ) phi[x+w*(y)]
6
+
7
+ void cu_Init () {
8
+
9
+
10
+ }
11
+
12
+ __global__ void reinit (float *phi,float * phi0, float * phin,
13
+ unsigned int width, unsigned int height) {
14
+ uint x = __umul24 (blockIdx .x , blockDim .x ) + threadIdx .x ;
15
+ uint y = __umul24 (blockIdx .y , blockDim .y ) + threadIdx .y ;
16
+
17
+ if (x > width || y > height)
18
+ return ;
19
+
20
+ float xy = GetPhi (phi,x,y,width);
21
+
22
+ float phiXPlus = 0 .0f ;
23
+ float phiXMinus = 0 .0f ;
24
+ float phiYPlus = 0 .0f ;
25
+ float phiYMinus = 0 .0f ;
26
+ if (x != width-1 ) phiXPlus = (GetPhi (phi,x+1 , y,width) - xy);
27
+ if (x != 0 ) phiXMinus = (xy - GetPhi (phi,x-1 , y,width));
28
+
29
+ if (y !=height-1 ) phiYPlus = (GetPhi (phi,x, y+1 ,width) - xy);
30
+ if (y != 0 ) phiYMinus = (xy - GetPhi (phi,x, y-1 ,width));
31
+
32
+ /* GetPhi(phin,x,y,width) = phiYPlus; */
33
+ /* return; */
34
+
35
+
36
+ float dXSquared = 0 ;
37
+ float dYSquared = 0 ;
38
+ float a = GetPhi (phi0,x,y,width);
39
+ if (a > 0 ) {
40
+ // formula 6.3 page 58
41
+ float _max = max (phiXMinus, 0 .0f );
42
+ float _min = min (phiXPlus, 0 .0f );
43
+ dXSquared = max (_max*_max, _min*_min);
44
+
45
+ _max = max (phiYMinus, 0 .0f );
46
+ _min = min (phiYPlus, 0 .0f );
47
+ dYSquared = max (_max*_max, _min*_min);
48
+ } else {
49
+ // formula 6.4 page 58
50
+ float _max = max (phiXPlus, 0 .0f );
51
+ float _min = min (phiXMinus, 0 .0f );
52
+ dXSquared = max (_max*_max, _min*_min);
53
+
54
+ _max = max (phiYPlus, 0 .0f );
55
+ _min = min (phiYMinus, 0 .0f );
56
+ dYSquared = max (_max*_max, _min*_min);
57
+ }
58
+
59
+ float normSquared = dXSquared + dYSquared;
60
+ float norm = sqrt (normSquared);
61
+
62
+ // Using the S(phi) sign formula 7.6 on page 67
63
+ // float sign = phi(x,y) / sqrt(phi(x,y)*phi(x,y) + normSquared);
64
+ float sign = GetPhi (phi0,x,y,width) /
65
+ sqrt (GetPhi (phi0,x,y,width)*GetPhi (phi0,x,y,width) + 1 );
66
+ float t = 0.3 ; // A stabil CFL condition
67
+ GetPhi (phin,x,y,width) = GetPhi (phi,x,y,width) - sign*(norm - 1 )*t;
68
+
69
+
70
+ }
71
+
72
+ void cu_Reinit (float * data,
73
+ unsigned int w,
74
+ unsigned int h,
75
+ unsigned int iterations) {
76
+ float * phiData;
77
+ float * phi0Data;
78
+ float * phinData;
79
+ /* int phiPitch; */
80
+ /* int phi0Pitch; */
81
+ /* int phinPitch; */
82
+
83
+ /* cudaArray* phiData; */
84
+ /* cudaArray* phi0Data; */
85
+ /* cudaArray* phinData; */
86
+
87
+ /* cudaChannelFormatDesc channelDesc = */
88
+ /* cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); */
89
+
90
+
91
+ /* cudaMallocArray(&phiData, &channelDesc, w, h); */
92
+ /* cudaMallocArray(&phi0Data, &channelDesc, w, h); */
93
+ /* cudaMallocArray(&phinData, &channelDesc, w, h); */
94
+
95
+ /* cudaMemcpyToArray(phiData, 0, 0, data, sizeof(float)*w*h, cudaMemcpyHostToDevice); */
96
+ /* cudaMemcpyToArray(phi0Data, 0, 0, data, sizeof(float)*w*h, cudaMemcpyHostToDevice); */
97
+ /* cudaMemcpyToArray(phinData, 0, 0, data, sizeof(float)*w*h, cudaMemcpyHostToDevice); */
98
+
99
+
100
+ cudaMalloc ((void **)&phiData, sizeof (float )*w*h);
101
+ cudaMalloc ((void **)&phi0Data, sizeof (float )*w*h);
102
+ cudaMalloc ((void **)&phinData, sizeof (float )*w*h);
103
+ cudaMemcpy ((void *)phiData,(void *)data, sizeof (float )*w*h,cudaMemcpyHostToDevice);
104
+ cudaMemcpy ((void *)phi0Data,(void *)data, sizeof (float )*w*h,cudaMemcpyHostToDevice);
105
+ cudaMemcpy ((void *)phinData,(void *)data, sizeof (float )*w*h,cudaMemcpyHostToDevice);
106
+
107
+ /* cudaMallocPitch((void**)&phiData, &phiPitch, sizeof(float)*w,h); */
108
+ /* cudaMallocPitch((void**)&phi0Data, &phi0Pitch, sizeof(float)*w,h); */
109
+ /* cudaMallocPitch((void**)&phinData, &phinPitch, sizeof(float)*w,h); */
110
+ /* cudaMemcpy((void*)phiData,(void*)data, sizeof(float)*w*h,cudaMemcpyHostToDevice); */
111
+ /* cudaMemcpy((void*)phi0Data,(void*)data, sizeof(float)*w*h,cudaMemcpyHostToDevice); */
112
+ /* cudaMemcpy((void*)phinData,(void*)data, sizeof(float)*w*h,cudaMemcpyHostToDevice); */
113
+
114
+
115
+ CHECK_FOR_CUDA_ERROR ();
116
+
117
+ const dim3 blockSize (32 ,16 ,1 );
118
+ const dim3 gridSize (w/blockSize.x , h/blockSize.y );
119
+
120
+ // printf("%i,%i\n",w,h);
121
+
122
+ // iterations=1;
123
+ for (unsigned int i=0 ;i<iterations;i++) {
124
+ reinit<<<gridSize,blockSize>>> (phiData,phi0Data,phinData,w,h);
125
+ cudaMemcpy ((void *)phiData,(void *)phinData,sizeof (float )*w*h,cudaMemcpyDeviceToDevice);
126
+ cudaThreadSynchronize ();
127
+ CHECK_FOR_CUDA_ERROR ();
128
+ }
129
+
130
+ cudaMemcpy ((void *)data,(void *)phiData, sizeof (float )*w*h,cudaMemcpyDeviceToHost);
131
+ CHECK_FOR_CUDA_ERROR ();
132
+ cudaFree (phiData);
133
+ cudaFree (phi0Data);
134
+ cudaFree (phinData);
135
+
136
+
137
+ }
0 commit comments