Skip to content
This repository was archived by the owner on Jul 4, 2022. It is now read-only.

Commit a0df51d

Browse files
committed
clean up RANSAC
1 parent 803adfa commit a0df51d

File tree

3 files changed

+100
-128
lines changed

3 files changed

+100
-128
lines changed

jetson/percep_obs_detect/include/plane-ransac.hpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class RansacPlane {
5454
/**
5555
* \brief RansacPlane constructor
5656
* \param axis Unit vector giving the axis normal to the plane we're trying to find
57-
* \param epsilon How far off-angle a plane can be from the input axis and still be considered. Unit of degree
57+
* \param epsilon How far off-angle a plane can be from the input axis and still be considered. Unit of degrees
5858
* \param iterations The number of randomly selected models to attempt to fit to the data, must be less than 1024
5959
* \param threshold The maximum allowed distance from the model for a point to be considered an inlier
6060
* \param pcSize The maximum expected size of a point cloud passed to this algorithm (use the frame resolution area)
@@ -77,24 +77,21 @@ class RansacPlane {
7777
*/
7878
void selectOptimalModel();
7979

80-
//user given model parms
80+
// user given model parms
8181
GPU_Cloud pc;
8282
float3 axis;
8383
float epsilon;
8484
int iterations;
8585
float threshold;
8686
float removalRadius;
8787

88-
//internal info [GPU]
89-
float* inlierCounts;
90-
int* modelPoints;
88+
// internal info [GPU]
89+
int* inlierCounts;
90+
int3* candidatePlanePoints;
9191
Plane* selection;
92-
int* inlierListGPU;
93-
int* optimalModelIndex;
9492

9593
//internal info [CPU]
96-
Plane* selectedModel;
97-
//int* inlierList;
94+
Plane* selectionCPU;
9895

9996

10097
};

jetson/percep_obs_detect/src/obs-detector.cpp

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -132,42 +132,15 @@ void ObsDetector::update(sl::Mat &frame) {
132132
GPU_Cloud pc;
133133
getRawCloud(pc, frame);
134134

135-
136-
// TEST CODE
137-
/*
138-
int size = 8;
139-
float4 testCPU[size] = {
140-
{1,0,-2,4},
141-
{2,0,2,4},
142-
{0,0,0,4},
143-
{10000,10000,10000,10000},
144-
{1,0,-1,4},
145-
{2,0,2,4},
146-
{0,0,2,4},
147-
{10000,-10000,10001,10000}
148-
};
149-
150-
GPU_Cloud testCPUpc {
151-
testCPU, size
152-
};
153-
cout << "Constructed\n";
154-
float4* testGPU;
155-
cudaMalloc(&testGPU, sizeof(float4)*size);
156-
cudaMemcpy(testGPU, testCPU, sizeof(sl::float4)*size, cudaMemcpyHostToDevice);
157-
GPU_Cloud testPC = { testGPU, size};
158-
cout << "Copied over \n";
159-
160-
*/
161135
// Processing
162-
163136
cout << "Original Size: "<< pc.size << "\n";
164137
passZ->run(pc);
165138
cout << "Pass-Through ran\n";
166139
std::cout << "pre ransac:" << pc.size << endl;
167140
ransacPlane->computeModel(pc);
168141
std::cout << "post ransac:" << pc.size << endl;
169142

170-
/*
143+
171144
Bins bins;
172145

173146
#if VOXEL

jetson/percep_obs_detect/src/plane-ransac.cu

Lines changed: 93 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -7,203 +7,205 @@
77

88
#define DEBUG
99

10-
// TODO: move this into common
10+
// does ceiling division
1111
__device__ int ceilDivGPU(int a, int b) {
1212
return (a + b - 1) / b;
1313
}
1414

15+
/**
16+
* Launch with iteration [aka, number of candidate models] many blocks and MAX_THREADS
17+
* \brief Runs through RANSAC 'iterations' and computes the number of inliers for each iteration's candidate
18+
* \param pc Point cloud to process
19+
* \param candidatePlanePoints Array len(iterations) of randomly generated candidate planes. Each elt is an int3 containing
20+
* the INDICIES of three points from the point cloud data array which form a candidate plane
21+
* \param threshold How far a point may be from the candidate plane to be considered an inlier
22+
* \param axis The axis normal to the plane we are looking for
23+
* \param epsilon How far off the given axis a candidate plane can be without being rejected
24+
* \param inlierCountOut An output array len(iterations) where the number of inliers for each candidate tried is stored
25+
*/
26+
1527
/*
16-
LAUNCH:
17-
- [Block] # iterations [aka, randomly selected models] to try
18-
- [Thread] MAX_THREADS
19-
20-
REQUIRES:
21-
- GPU point cloud
22-
- A buffer to write inlier counts for each attempted model
23-
- A buffer that tells the kernel what the randomly selected points were for each model
24-
- Threshold distance for a pt to be considered an inlier
25-
EFFECTS:
26-
27-
[Block]:
2828
Each block represents an "iteration" of the traditional RANSAC algorithm.
29-
That is, every block has a different set of randomly chosen 3 points that define the
30-
model (a plane is minimally defined by 3 points). The threads in the block then check
31-
every point in the point cloud against the model for that block.
32-
33-
[Thread]:
34-
Threads are used to decide how many points are inliers to the model. If
35-
thread max = 1024 and there are 2048 points, each thread will process 2
36-
points. Each thread will write the number of inliers in the set of points it evaluated
37-
to its specific spot in shared memory. Threads are synced, and then threads will
38-
participate in a parallel reduction to give the total number of inliers for that block/model,
39-
which will be returned from the kernel in the inlierCounts buffer.
29+
The threads in the block then check every point in the point cloud against
30+
the candidate for that iteration. Each thread is responsible for
31+
total_points/num_threads many points. The thread computes how many of its
32+
own points are inliers. It then participates in parallel reduction to give
33+
the total number of inliers for that iteration in the output buffer
4034
*/
41-
__global__ void ransacKernel(GPU_Cloud pc, float* inlierCounts, int* modelPoints, float threshold, float3 axis, float epsilon) {
42-
__shared__ float inlierField[MAX_THREADS];
43-
inlierField[threadIdx.x] = 0;
4435

45-
int iteration = blockIdx.x; //which "iteration" of RANSAC
46-
float inliers = 0; //number of inliers in this thread
36+
__global__ void ransacKernel(GPU_Cloud pc, int3* candidatePlanePoints, float threshold, float3 axis, float epsilon, int* inlierCountsOut) {
37+
// Set up internal inlier counts for each thread of this block to write to
38+
__shared__ float inlierCountInternal[MAX_THREADS];
39+
inlierCountInternal[threadIdx.x] = 0;
40+
41+
// Which "iteration" of RANSAC
42+
int iteration = blockIdx.x;
43+
int3 candidateModelIndicies = candidatePlanePoints[iteration];
4744

48-
// select 3 random points from the cloud as the model that this particular block will evaluate
49-
int randIdx0 = modelPoints[iteration*3 + 0];
50-
int randIdx1 = modelPoints[iteration*3 + 1];
51-
int randIdx2 = modelPoints[iteration*3 + 2];
45+
// Number of inliers in this thread
46+
float inliers = 0;
5247

53-
if(randIdx0 >= pc.size || randIdx1 >= pc.size || randIdx2 >= pc.size) {
54-
inlierCounts[iteration] = 0;
48+
// Verify that this candidate plane is actually valid and doesn't consist of a point that goes out of bounds
49+
if(candidateModelIndicies.x >= pc.size || candidateModelIndicies.y >= pc.size || candidateModelIndicies.z >= pc.size) {
50+
// Mark as an invalid model so its not chosen
51+
inlierCountsOut[iteration] = -1;
5552
return;
5653
}
5754

58-
float3 modelPt0 = make_float3(pc.data[randIdx0].x, pc.data[randIdx0].y, pc.data[randIdx0].z);
59-
float3 modelPt1 = make_float3(pc.data[randIdx1].x, pc.data[randIdx1].y, pc.data[randIdx1].z);
60-
float3 modelPt2 = make_float3(pc.data[randIdx2].x, pc.data[randIdx2].y, pc.data[randIdx2].z);
55+
float3 modelPt0 = make_float3(pc.data[candidateModelIndicies.x]);
56+
float3 modelPt1 = make_float3(pc.data[candidateModelIndicies.y]);
57+
float3 modelPt2 = make_float3(pc.data[candidateModelIndicies.z]);
6158

62-
// Create a plane from the 3 points
59+
// Create the candidate plane from the 3 points
6360
Plane plane(modelPt0, modelPt1, modelPt2);
6461

65-
//check that n dot desired axis is less than epsilon, if so, return here
62+
// Verify that the candidate plane's normal is within epsilon of desired axis
6663
if(abs(dot(normalize(plane.normal), normalize(axis))) < epsilon) {
67-
if(threadIdx.x == 0) inlierCounts[iteration] = 0; //make it -1 to show invalid model?
64+
// Mark as an invalid model so its not chosen
65+
inlierCountsOut[iteration] = 0;
6866
return;
6967
}
7068

7169
// Construct predicate to chek if a point is an inlier in the plane
7270
NotInPlane pred(plane.normal, modelPt1, threshold);
7371

74-
// figure out how many points each thread must compute distance for and determine if each is inlier/outlier
72+
// Figure out how many points each thread must check
7573
int pointsPerThread = ceilDivGPU(pc.size, MAX_THREADS);
7674
for(int i = 0; i < pointsPerThread; i++) {
77-
// select a point index or return if this isn't a valid point
75+
// Select a point index or return if this isn't a valid point
7876
int pointIdx = threadIdx.x * pointsPerThread + i;
79-
if(pointIdx >= pc.size) continue; //TODO Should this be return???
77+
if(pointIdx >= pc.size) continue;
8078

81-
// point in the point cloud that could be an inlier or outlier
82-
float4 curPt = make_float4(pc.data[pointIdx].x, pc.data[pointIdx].y, pc.data[pointIdx].z, 0);
79+
// Point in the point cloud that could be an inlier or outlier
80+
float4 curPt = pc.data[pointIdx];
8381

84-
//add a 1 if inlier in plane, 0 if not
85-
inliers += (pred(curPt)) ? 0 : 1; //very probalmatic line, how can we reduce these checks
82+
// Add a 1 if inlier in plane, 0 if not
83+
inliers += (pred(curPt)) ? 0 : 1;
8684
}
8785

88-
//parallel reduction to get an aggregate sum of the number of inliers for this model
89-
//this is all equivalent to sum(inlierField), but it does it in parallel
90-
inlierField[threadIdx.x] = inliers;
86+
// Load the inliers for this thread into the shared memory that all threads can access
87+
inlierCountInternal[threadIdx.x] = inliers;
9188
__syncthreads();
89+
90+
// Parallel reduction to get an aggregate sum of the number of inliers for this model
91+
// This is all equivalent to sum(inlierCountInternal), but it does it in parallel
9292
int aliveThreads = (blockDim.x) / 2;
9393
while (aliveThreads > 0) {
9494
if (threadIdx.x < aliveThreads) {
95-
inliers += inlierField[aliveThreads + threadIdx.x];
96-
if (threadIdx.x >= (aliveThreads) / 2) inlierField[threadIdx.x] = inliers;
95+
inliers += inlierCountInternal[aliveThreads + threadIdx.x];
96+
if (threadIdx.x >= (aliveThreads) / 2) inlierCountInternal[threadIdx.x] = inliers;
9797
}
9898
__syncthreads();
9999
aliveThreads /= 2;
100100
}
101101

102-
//at the final thread, write to global memory
102+
// At the final thread, write the number of inliers for this iteration's model to global memory
103103
if(threadIdx.x == 0) {
104-
inlierCounts[iteration] = inliers;
104+
inlierCountsOut[iteration] = inliers;
105105
}
106106
}
107107

108108
/**
109109
* \brief Updates the plane selection from the cloud using the given model index
110110
*/
111-
__global__ void getOptimalModelPoints(GPU_Cloud pc, Plane &selection, int idx, int* modelPoints, float* maxCount) {
112-
int pt = threadIdx.x;
113-
float4 point = pc.data[modelPoints[3*idx + pt]];
114-
selection[pt] = make_float3(point.x, point.y, point.z);
111+
__global__ void getOptimalModelPoints(GPU_Cloud pc, Plane &selection, int optimalCandidateIndex, int3* candidatePlanePoints, int* maxInliersCount) {
112+
// Each thread is responsible for one point in the output candidate plane, there will be 3 threads
113+
int candidatePtNum = threadIdx.x;
114+
115+
// Copy the point from the cloud into the output plane object
116+
float4 point;
117+
if(candidatePtNum == 0) point = pc.data[candidatePlanePoints[optimalCandidateIndex].x];
118+
if(candidatePtNum == 1) point = pc.data[candidatePlanePoints[optimalCandidateIndex].y];
119+
if(candidatePtNum == 2) point = pc.data[candidatePlanePoints[optimalCandidateIndex].z];
120+
121+
selection[candidatePtNum] = make_float3(point);
115122

116123
// Use one thread to compute the normal
117124
__syncthreads();
118125
if(threadIdx.x == 0) {
119126
selection.ComputeNormal();
120127

121128
#ifdef DEBUG
122-
printf("Winner model inlier count: %f \n", *maxCount);
129+
printf("Winner model inlier count: %d \n", *maxInliersCount);
123130
#endif
124131
}
125132
}
126133

127134
void RansacPlane::selectOptimalModel() {
128-
float* maxCount = thrust::max_element(thrust::device, inlierCounts, inlierCounts + iterations);
129-
// Pointer arithmetic gives us the model index with most inliers
130-
int maxIdx = maxCount - inlierCounts;
131-
// Send the index to GPU
132-
cudaMemcpy(optimalModelIndex, &maxIdx , sizeof(int), cudaMemcpyHostToDevice);
133-
// Now launch a kernel to write the Plane of this model into selection
134-
getOptimalModelPoints<<<1, 3>>>(pc, *selection, maxIdx, modelPoints, maxCount);
135+
// Get a pointer to the elt in inlierCounts that is the greatest
136+
int* maxCount = thrust::max_element(thrust::device, inlierCounts, inlierCounts + iterations);
137+
// Pointer arithmetic gives us the optimal model index with most inliers
138+
int optIdx = maxCount - inlierCounts;
139+
// Now launch a kernel to construct the Plane object 'selection'
140+
getOptimalModelPoints<<<1, 3>>>(pc, *selection, optIdx, candidatePlanePoints, maxCount);
135141
checkStatus(cudaDeviceSynchronize());
136142
}
137143

138144
RansacPlane::RansacPlane(float3 axis, float epsilon, int iterations, float threshold, int pcSize, float removalRadius)
139145
: pc(pc), axis(axis), epsilon(epsilon), iterations(iterations), threshold(threshold), removalRadius(removalRadius) {
140146

141147
//Set up buffers needed for RANSAC
142-
cudaMalloc(&inlierCounts, sizeof(float) * iterations);
143-
cudaMalloc(&modelPoints, sizeof(int) * iterations * 3);
148+
cudaMalloc(&inlierCounts, sizeof(int) * iterations);
149+
cudaMalloc(&candidatePlanePoints, sizeof(int3) * iterations);
144150
cudaMalloc(&selection, sizeof(Plane));
145-
cudaMalloc(&optimalModelIndex, sizeof(int));
146151

147152
//Generate random numbers in CPU to use in RANSAC kernel
148-
int* randomNumsCPU = (int*) malloc(sizeof(int) * iterations* 3);
149-
153+
int3* randomNumsCPU = (int3*) malloc(sizeof(int3) * iterations);
150154
for(int i = 0; i < iterations; i++) {
151155
int a = 0;
152156
int b = 0;
153157
int c = 0;
158+
// Prevent duplicate points in a particular model
154159
while(a == b || b == c || a == c) {
155160
a = rand() % pcSize;
156161
b = rand() % pcSize;
157162
c = rand() % pcSize;
158163
}
159-
160-
randomNumsCPU[i*3] = a;
161-
randomNumsCPU[i*3 + 1] = b;
162-
randomNumsCPU[i*3 + 2] = c;
164+
randomNumsCPU[i].x = a;
165+
randomNumsCPU[i].y = b;
166+
randomNumsCPU[i].z = c;
163167
}
164-
165-
cudaMemcpy(modelPoints, randomNumsCPU, sizeof(int) * iterations * 3, cudaMemcpyHostToDevice);
168+
cudaMemcpy(candidatePlanePoints, randomNumsCPU, sizeof(int3) * iterations, cudaMemcpyHostToDevice);
166169
free(randomNumsCPU);
167170

168-
// Generate a buffer for retreiving the selected model from CUDA Kernels
169-
selectedModel = (Plane*) malloc(sizeof(Plane));
171+
// Generate a CPU buffer for retreiving the selected model from CUDA Kernels
172+
selectionCPU = (Plane*) malloc(sizeof(Plane));
170173
}
171174

172175
Plane RansacPlane::computeModel(GPU_Cloud &pc) {
173-
if(pc.size == 0) return {make_float3(0,0,0), make_float3(0,0,0), make_float3(0,0,0)};
176+
if(pc.size == 0) {
177+
std::cout << "[WARNING] Can't run RANSAC on empty plane." << std::endl;
178+
return {make_float3(0,0,0), make_float3(0,0,0), make_float3(0,0,0)};
179+
}
174180

175181
// Copy vars locally
176182
this->pc = pc;
177183
int blocks = iterations;
178184
int threads = MAX_THREADS;
179185

180186
// Get a list of models and corresponding inlier count
181-
ransacKernel<<<blocks, threads>>>(pc, inlierCounts, modelPoints, threshold, axis, cos(epsilon*3.1415/180));
187+
ransacKernel<<<blocks, threads>>>(pc, candidatePlanePoints, threshold, axis, cos(epsilon*3.1415/180), inlierCounts);
182188
checkStatus(cudaGetLastError());
183189
checkStatus(cudaDeviceSynchronize());
184190

185191
// Choose the model with the greatest inlier count
186192
selectOptimalModel();
187193

188-
checkStatus(cudaGetLastError());
189-
checkStatus(cudaDeviceSynchronize());
190-
191194
// Copy selected plane to CPU
192-
cudaMemcpy(selectedModel, selection, sizeof(Plane), cudaMemcpyDeviceToHost);
195+
cudaMemcpy(selectionCPU, selection, sizeof(Plane), cudaMemcpyDeviceToHost);
193196

194197
// Filter out all the points in the plane
195-
NotInPlane predicate(selectedModel->normal, selectedModel->p1, threshold);
198+
NotInPlane predicate(selectionCPU->normal, selectionCPU->p1, threshold);
196199
Filter<NotInPlane>(pc, predicate, FilterOp::REMOVE, 0);
197200
checkStatus(cudaGetLastError());
198201
checkStatus(cudaDeviceSynchronize());
199202

200-
return *selectedModel;
203+
return *selectionCPU;
201204
}
202205

203206
RansacPlane::~RansacPlane() {
204207
cudaFree(inlierCounts);
205-
cudaFree(modelPoints);
208+
cudaFree(candidatePlanePoints);
206209
cudaFree(selection);
207-
cudaFree(optimalModelIndex);
208-
free(selectedModel);
210+
free(selectionCPU);
209211
}

0 commit comments

Comments
 (0)