1
- // CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania
2
- // Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania
1
+ #include " rasterize.h"
3
2
4
- #include < stdio.h>
5
- #include < cuda.h>
6
3
#include < cmath>
4
+ #include < cstdio>
5
+ #include < cuda.h>
7
6
#include < thrust/random.h>
8
- #include " rasterizeKernels .h"
7
+ #include " checkCUDAError .h"
9
8
#include " rasterizeTools.h"
10
9
11
10
glm::vec3* framebuffer;
@@ -15,15 +14,7 @@ float* device_cbo;
15
14
int * device_ibo;
16
15
triangle* primitives;
17
16
18
- void checkCUDAError (const char *msg) {
19
- cudaError_t err = cudaGetLastError ();
20
- if ( cudaSuccess != err) {
21
- fprintf (stderr, " Cuda error: %s: %s.\n " , msg, cudaGetErrorString ( err) );
22
- exit (EXIT_FAILURE);
23
- }
24
- }
25
-
26
- // Handy dandy little hashing function that provides seeds for random number generation
17
+ // Handy dandy little hashing function that provides seeds for random number generation
27
18
__host__ __device__ unsigned int hash (unsigned int a){
28
19
a = (a+0x7ed55d16 ) + (a<<12 );
29
20
a = (a^0xc761c23c ) ^ (a>>19 );
@@ -34,15 +25,15 @@ __host__ __device__ unsigned int hash(unsigned int a){
34
25
return a;
35
26
}
36
27
37
- // Writes a given fragment to a fragment buffer at a given location
28
+ // Writes a given fragment to a fragment buffer at a given location
38
29
__host__ __device__ void writeToDepthbuffer (int x, int y, fragment frag, fragment* depthbuffer, glm::vec2 resolution){
39
30
if (x<resolution.x && y<resolution.y ){
40
31
int index = (y*resolution.x ) + x;
41
32
depthbuffer[index] = frag;
42
33
}
43
34
}
44
35
45
- // Reads a fragment from a given location in a fragment buffer
36
+ // Reads a fragment from a given location in a fragment buffer
46
37
__host__ __device__ fragment getFromDepthbuffer (int x, int y, fragment* depthbuffer, glm::vec2 resolution){
47
38
if (x<resolution.x && y<resolution.y ){
48
39
int index = (y*resolution.x ) + x;
@@ -53,15 +44,15 @@ __host__ __device__ fragment getFromDepthbuffer(int x, int y, fragment* depthbuf
53
44
}
54
45
}
55
46
56
- // Writes a given pixel to a pixel buffer at a given location
47
+ // Writes a given pixel to a pixel buffer at a given location
57
48
__host__ __device__ void writeToFramebuffer (int x, int y, glm::vec3 value, glm::vec3* framebuffer, glm::vec2 resolution){
58
49
if (x<resolution.x && y<resolution.y ){
59
50
int index = (y*resolution.x ) + x;
60
51
framebuffer[index] = value;
61
52
}
62
53
}
63
54
64
- // Reads a pixel from a pixel buffer at a given location
55
+ // Reads a pixel from a pixel buffer at a given location
65
56
__host__ __device__ glm::vec3 getFromFramebuffer (int x, int y, glm::vec3* framebuffer, glm::vec2 resolution){
66
57
if (x<resolution.x && y<resolution.y ){
67
58
int index = (y*resolution.x ) + x;
@@ -71,7 +62,7 @@ __host__ __device__ glm::vec3 getFromFramebuffer(int x, int y, glm::vec3* frameb
71
62
}
72
63
}
73
64
74
- // Kernel that clears a given pixel buffer with a given color
65
+ // Kernel that clears a given pixel buffer with a given color
75
66
__global__ void clearImage (glm::vec2 resolution, glm::vec3* image, glm::vec3 color){
76
67
int x = (blockIdx .x * blockDim .x ) + threadIdx .x ;
77
68
int y = (blockIdx .y * blockDim .y ) + threadIdx .y ;
@@ -81,7 +72,7 @@ __global__ void clearImage(glm::vec2 resolution, glm::vec3* image, glm::vec3 col
81
72
}
82
73
}
83
74
84
- // Kernel that clears a given fragment buffer with a given fragment
75
+ // Kernel that clears a given fragment buffer with a given fragment
85
76
__global__ void clearDepthBuffer (glm::vec2 resolution, fragment* buffer, fragment frag){
86
77
int x = (blockIdx .x * blockDim .x ) + threadIdx .x ;
87
78
int y = (blockIdx .y * blockDim .y ) + threadIdx .y ;
@@ -94,7 +85,7 @@ __global__ void clearDepthBuffer(glm::vec2 resolution, fragment* buffer, fragmen
94
85
}
95
86
}
96
87
97
- // Kernel that writes the image to the OpenGL PBO directly.
88
+ // Kernel that writes the image to the OpenGL PBO directly.
98
89
__global__ void sendImageToPBO (uchar4 * PBOpos, glm::vec2 resolution, glm::vec3* image){
99
90
100
91
int x = (blockIdx .x * blockDim .x ) + threadIdx .x ;
@@ -128,29 +119,29 @@ __global__ void sendImageToPBO(uchar4* PBOpos, glm::vec2 resolution, glm::vec3*
128
119
}
129
120
}
130
121
131
- // TODO: Implement a vertex shader
122
+ // TODO: Implement a vertex shader
132
123
__global__ void vertexShadeKernel (float * vbo, int vbosize){
133
124
int index = (blockIdx .x * blockDim .x ) + threadIdx .x ;
134
125
if (index<vbosize/3 ){
135
126
}
136
127
}
137
128
138
- // TODO: Implement primative assembly
129
+ // TODO: Implement primative assembly
139
130
__global__ void primitiveAssemblyKernel (float * vbo, int vbosize, float * cbo, int cbosize, int * ibo, int ibosize, triangle* primitives){
140
131
int index = (blockIdx .x * blockDim .x ) + threadIdx .x ;
141
132
int primitivesCount = ibosize/3 ;
142
133
if (index<primitivesCount){
143
134
}
144
135
}
145
136
146
- // TODO: Implement a rasterization method, such as scanline.
137
+ // TODO: Implement a rasterization method, such as scanline.
147
138
__global__ void rasterizationKernel (triangle* primitives, int primitivesCount, fragment* depthbuffer, glm::vec2 resolution){
148
139
int index = (blockIdx .x * blockDim .x ) + threadIdx .x ;
149
140
if (index<primitivesCount){
150
141
}
151
142
}
152
143
153
- // TODO: Implement a fragment shader
144
+ // TODO: Implement a fragment shader
154
145
__global__ void fragmentShadeKernel (fragment* depthbuffer, glm::vec2 resolution){
155
146
int x = (blockIdx .x * blockDim .x ) + threadIdx .x ;
156
147
int y = (blockIdx .y * blockDim .y ) + threadIdx .y ;
@@ -159,7 +150,7 @@ __global__ void fragmentShadeKernel(fragment* depthbuffer, glm::vec2 resolution)
159
150
}
160
151
}
161
152
162
- // Writes fragment colors to the framebuffer
153
+ // Writes fragment colors to the framebuffer
163
154
__global__ void render (glm::vec2 resolution, fragment* depthbuffer, glm::vec3* framebuffer){
164
155
165
156
int x = (blockIdx .x * blockDim .x ) + threadIdx .x ;
@@ -179,15 +170,15 @@ void cudaRasterizeCore(uchar4* PBOpos, glm::vec2 resolution, float frame, float*
179
170
dim3 threadsPerBlock (tileSize, tileSize);
180
171
dim3 fullBlocksPerGrid ((int )ceil (float (resolution.x )/float (tileSize)), (int )ceil (float (resolution.y )/float (tileSize)));
181
172
182
- // set up framebuffer
173
+ // set up framebuffer
183
174
framebuffer = NULL ;
184
175
cudaMalloc ((void **)&framebuffer, (int )resolution.x *(int )resolution.y *sizeof (glm::vec3));
185
176
186
- // set up depthbuffer
177
+ // set up depthbuffer
187
178
depthbuffer = NULL ;
188
179
cudaMalloc ((void **)&depthbuffer, (int )resolution.x *(int )resolution.y *sizeof (fragment));
189
180
190
- // kernel launches to black out accumulated/unaccumlated pixel buffers and clear our scattering states
181
+ // kernel launches to black out accumulated/unaccumlated pixel buffers and clear our scattering states
191
182
clearImage<<<fullBlocksPerGrid, threadsPerBlock>>> (resolution, framebuffer, glm::vec3 (0 ,0 ,0 ));
192
183
193
184
fragment frag;
@@ -196,9 +187,9 @@ void cudaRasterizeCore(uchar4* PBOpos, glm::vec2 resolution, float frame, float*
196
187
frag.position = glm::vec3 (0 ,0 ,-10000 );
197
188
clearDepthBuffer<<<fullBlocksPerGrid, threadsPerBlock>>> (resolution, depthbuffer,frag);
198
189
199
- // ------------------------------
200
- // memory stuff
201
- // ------------------------------
190
+ // ------------------------------
191
+ // memory stuff
192
+ // ------------------------------
202
193
primitives = NULL ;
203
194
cudaMalloc ((void **)&primitives, (ibosize/3 )*sizeof (triangle));
204
195
@@ -217,34 +208,34 @@ void cudaRasterizeCore(uchar4* PBOpos, glm::vec2 resolution, float frame, float*
217
208
tileSize = 32 ;
218
209
int primitiveBlocks = ceil (((float )vbosize/3 )/((float )tileSize));
219
210
220
- // ------------------------------
221
- // vertex shader
222
- // ------------------------------
211
+ // ------------------------------
212
+ // vertex shader
213
+ // ------------------------------
223
214
vertexShadeKernel<<<primitiveBlocks, tileSize>>> (device_vbo, vbosize);
224
215
225
216
cudaDeviceSynchronize ();
226
- // ------------------------------
227
- // primitive assembly
228
- // ------------------------------
217
+ // ------------------------------
218
+ // primitive assembly
219
+ // ------------------------------
229
220
primitiveBlocks = ceil (((float )ibosize/3 )/((float )tileSize));
230
221
primitiveAssemblyKernel<<<primitiveBlocks, tileSize>>> (device_vbo, vbosize, device_cbo, cbosize, device_ibo, ibosize, primitives);
231
222
232
223
cudaDeviceSynchronize ();
233
- // ------------------------------
234
- // rasterization
235
- // ------------------------------
224
+ // ------------------------------
225
+ // rasterization
226
+ // ------------------------------
236
227
rasterizationKernel<<<primitiveBlocks, tileSize>>> (primitives, ibosize/3 , depthbuffer, resolution);
237
228
238
229
cudaDeviceSynchronize ();
239
- // ------------------------------
240
- // fragment shader
241
- // ------------------------------
230
+ // ------------------------------
231
+ // fragment shader
232
+ // ------------------------------
242
233
fragmentShadeKernel<<<fullBlocksPerGrid, threadsPerBlock>>> (depthbuffer, resolution);
243
234
244
235
cudaDeviceSynchronize ();
245
- // ------------------------------
246
- // write fragments to framebuffer
247
- // ------------------------------
236
+ // ------------------------------
237
+ // write fragments to framebuffer
238
+ // ------------------------------
248
239
render<<<fullBlocksPerGrid, threadsPerBlock>>> (resolution, depthbuffer, framebuffer);
249
240
sendImageToPBO<<<fullBlocksPerGrid, threadsPerBlock>>> (PBOpos, resolution, framebuffer);
250
241
0 commit comments