-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathKernels.cu
153 lines (119 loc) · 4.86 KB
/
Kernels.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#include "Kernels.cuh"
#include <cuda_runtime.h>
#include "Vector2D.h"
#include <stdio.h>
#define BLOCK_SIZE 256
__global__ void kernelUpdateParticles(Particle* particles, int count, float gravity, float dt, int width, int height) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < count) {
particles[idx].velocity.y += gravity * dt;
particles[idx].position.x += particles[idx].velocity.x * dt;
particles[idx].position.y -= particles[idx].velocity.y * dt;
float radius = particles[idx].radius;
// Left and right walls
if (particles[idx].position.x - radius < 0.0f) {
particles[idx].position.x = radius;
particles[idx].velocity.x = -(0.5) * particles[idx].velocity.x;
}
else if (particles[idx].position.x + radius > width) {
particles[idx].position.x = width - radius;
particles[idx].velocity.x = -(0.5) * particles[idx].velocity.x;
}
// Top and bottom walls
if (particles[idx].position.y - radius < 0.0f) {
particles[idx].position.y = radius;
particles[idx].velocity.y = -(0.5) * particles[idx].velocity.y;
}
else if (particles[idx].position.y + radius > height) {
particles[idx].position.y = height - radius;
particles[idx].velocity.y = -(0.5) * particles[idx].velocity.y;
}
}
}
void gpuUpdateParticles(Particle* d_particles, int count, float gravity, float dt, int width, int height) {
int gridSize = (count + BLOCK_SIZE - 1) / BLOCK_SIZE;
kernelUpdateParticles << <gridSize, BLOCK_SIZE >> > (d_particles, count, gravity, dt, width, height);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA Error (Update): %s\n", cudaGetErrorString(err));
}
cudaDeviceSynchronize();
}
#ifndef BLOCK_SIZE
#define BLOCK_SIZE 256
#endif
__global__
void kernelCollisionDetect(Particle* particles, int count, float dt, float correctionPercent)
{
const float SLEEP_VELOCITY_THRESHOLD = 0.01f;
const float SLEEP_TIMEOUT = 5.5f;
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= count) return;
Particle& p1 = particles[idx];
for (int j = 0; j < count; ++j) {
if (j == idx) continue;
Particle& p2 = particles[j];
// Calculate distance & overlap
Vector2D delta = p2.position - p1.position;
float dist = delta.length();
float minDist = p1.radius + p2.radius;
if (dist <= 0.0f || dist >= minDist) {
continue;
}
Vector2D normal = delta / dist; // normalized direction from p1 to p2
Vector2D rv = p2.velocity - p1.velocity;
float relVelDot = rv.dot(normal);
if (relVelDot > 0.0f) {
continue;
}
float e = fminf(p1.restitution, p2.restitution);
float invM1 = (p1.mass == 0.0f) ? 0.0f : 1.0f / p1.mass;
float invM2 = (p2.mass == 0.0f) ? 0.0f : 1.0f / p2.mass;
float z = -(1.0f + e) * relVelDot / (invM1 + invM2);
Vector2D impulse = normal * z;
if (!p1.sleeping && invM1 > 0.0f) {
p1.velocity -= impulse * invM1;
}
if (!p2.sleeping && invM2 > 0.0f) {
p2.velocity += impulse * invM2;
}
float penetration = minDist - dist; // how far they overlapped
if (penetration > 0.0f) {
Vector2D correction = normal * (correctionPercent * penetration / (invM1 + invM2));
if (!p1.sleeping && invM1 > 0.0f) {
p1.position -= correction * invM1;
}
if (!p2.sleeping && invM2 > 0.0f) {
p2.position += correction * invM2;
}
}
float wakeThreshold = 0.1f;
if (fabsf(j) > wakeThreshold) {
p1.sleeping = false;
p1.sleepTimer = 0.0f;
p2.sleeping = false;
p2.sleepTimer = 0.0f;
}
}
float speedSq = p1.velocity.dot(p1.velocity);
if (speedSq < (SLEEP_VELOCITY_THRESHOLD * SLEEP_VELOCITY_THRESHOLD)) {
p1.sleepTimer += dt;
if (p1.sleepTimer > SLEEP_TIMEOUT) {
p1.sleeping = true;
p1.velocity = Vector2D(0.0f, 0.0f);
}
}
else {
p1.sleepTimer = 0.0f;
p1.sleeping = false; // if it moved significantly, it’s awake
}
}
void gpuCollisionDetect(Particle* d_particles, int count, float dt) {
int gridSize = (count + BLOCK_SIZE - 1) / BLOCK_SIZE;
int collisionIterations = 5;
float correctionPercent = 0.8f;
for (int i = 0; i < collisionIterations; i++) {
kernelCollisionDetect << <gridSize, BLOCK_SIZE >> > (d_particles, count, dt, correctionPercent);
cudaDeviceSynchronize(); // or check errors, etc.
}
}