Skip to content
This repository was archived by the owner on Jul 4, 2022. It is now read-only.

Commit 803adfa

Browse files
committed
replaced optimal model selection kernel with thrust
1 parent 97321ee commit 803adfa

File tree

3 files changed

+36
-68
lines changed

3 files changed

+36
-68
lines changed

jetson/percep_obs_detect/include/plane-ransac.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ class RansacPlane {
7272
Plane computeModel(GPU_Cloud &pc);
7373

7474
private:
75+
/**
76+
* \brief Picks the model with the highest inlier count and updates the Plane "selection"
77+
*/
78+
void selectOptimalModel();
79+
7580
//user given model parms
7681
GPU_Cloud pc;
7782
float3 axis;

jetson/percep_obs_detect/src/obs-detector.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ void ObsDetector::update(sl::Mat &frame) {
167167
ransacPlane->computeModel(pc);
168168
std::cout << "post ransac:" << pc.size << endl;
169169

170-
170+
/*
171171
Bins bins;
172172
173173
#if VOXEL

jetson/percep_obs_detect/src/plane-ransac.cu

Lines changed: 30 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
#include "filter.hpp"
33
#include <stdlib.h>
44
#include <unistd.h>
5+
#include <thrust/extrema.h>
6+
7+
8+
#define DEBUG
59

610
// TODO: move this into common
711
__device__ int ceilDivGPU(int a, int b) {
@@ -38,7 +42,7 @@ __global__ void ransacKernel(GPU_Cloud pc, float* inlierCounts, int* modelPoints
3842
__shared__ float inlierField[MAX_THREADS];
3943
inlierField[threadIdx.x] = 0;
4044

41-
int iteration = blockIdx.x; //which "iteration"
45+
int iteration = blockIdx.x; //which "iteration" of RANSAC
4246
float inliers = 0; //number of inliers in this thread
4347

4448
// select 3 random points from the cloud as the model that this particular block will evaluate
@@ -101,78 +105,36 @@ __global__ void ransacKernel(GPU_Cloud pc, float* inlierCounts, int* modelPoints
101105
}
102106
}
103107

104-
//to avoid kernel launch time, this could actually be appended to the bottom of the ransacKernel,
105-
//after a syncthreads() call. But for now it will be left seperate for the purpose of clarity.
106-
//kernel launch time is likely in tens of microseconds. TODO test to confirm this theory
107-
/*
108-
LAUNCH:
109-
- [Block] 1
110-
- [Thread] Number of attempted models ("iterations")
111-
112-
REQUIRES:
113-
- Buffer with inlier counts for each attempted model in RANSAC
114-
- Output in memory the 3 points of the selected model
108+
/**
109+
* \brief Updates the plane selection from the cloud using the given model index
110+
*/
111+
__global__ void getOptimalModelPoints(GPU_Cloud pc, Plane &selection, int idx, int* modelPoints, float* maxCount) {
112+
int pt = threadIdx.x;
113+
float4 point = pc.data[modelPoints[3*idx + pt]];
114+
selection[pt] = make_float3(point.x, point.y, point.z);
115115

116-
EFFECTS:
117-
- Selects the optimal model (the one with the greatest inlier count)
118-
- Outputs the points of this model
119-
*/
120-
// optimalMOdel out = { p1.x, p1.y, p1.z, p2.x, p2.y, p2.z, p3.x, p3.y, p3.z}
121-
__global__ void selectOptimalRansacModel(GPU_Cloud pc, float* inlierCounts, int* modelPoints, Plane& optimalModelOut, int iterations, int* optimalModelIndex) {
122-
123-
__shared__ float inlierCountsLocal[MAX_THREADS];
124-
__shared__ int modelIndiciesLocal[MAX_THREADS];
125-
126-
//TODO: This can easily index out of bounds if threadIdx.x > numPoints in the PC
127-
//another problem: we must initalize the inlierCountsLocal with low valeus that wont be chosen
128-
129-
// Populate the locally defined arrays
130-
float inliers = (threadIdx.x < iterations) ? inlierCounts[threadIdx.x] : 0;
131-
int optimalModel = threadIdx.x;
132-
inlierCountsLocal[threadIdx.x] = inliers;
133-
modelIndiciesLocal[threadIdx.x] = optimalModel;
116+
// Use one thread to compute the normal
134117
__syncthreads();
135-
136-
// Parallel reduction to determine the model with the largest number of inliers
137-
int aliveThreads = (blockDim.x) / 2;
138-
while (aliveThreads > 0) {
139-
if (threadIdx.x < aliveThreads) {
140-
int temp = max(inlierCountsLocal[aliveThreads + threadIdx.x], inliers);
141-
if(temp > inliers) {
142-
inliers = temp;
143-
optimalModel = modelIndiciesLocal[aliveThreads + threadIdx.x];
144-
}
145-
146-
if (threadIdx.x >= (aliveThreads) / 2) {
147-
modelIndiciesLocal[threadIdx.x] = optimalModel;
148-
inlierCountsLocal[threadIdx.x] = inliers;
149-
}
150-
}
151-
__syncthreads();
152-
aliveThreads /= 2;
153-
}
154-
155-
//at the final thread, write to global memory
156-
if(threadIdx.x < 3) {
157-
float3 pt = make_float3(pc.data[ modelPoints[modelIndiciesLocal[0]*3 + threadIdx.x] ].x, pc.data[ modelPoints[modelIndiciesLocal[0]*3 + threadIdx.x] ].y, pc.data[ modelPoints[modelIndiciesLocal[0]*3 + threadIdx.x] ].z);
158-
159-
// Set output model
160-
optimalModelOut[threadIdx.x] = pt;
161-
}
162-
163-
__syncthreads();
164-
165118
if(threadIdx.x == 0) {
166-
// Find normal to the plane
167-
optimalModelOut.ComputeNormal();
119+
selection.ComputeNormal();
168120

169-
printf("winner model inlier count: %f \n", inlierCountsLocal[0]);
170-
171-
//check here if the inlier counts local is 0, if so return -1 instead
172-
*optimalModelIndex = (inlierCountsLocal[0] > 1.0) ? modelIndiciesLocal[0] : -1;
121+
#ifdef DEBUG
122+
printf("Winner model inlier count: %f \n", *maxCount);
123+
#endif
173124
}
174125
}
175126

127+
void RansacPlane::selectOptimalModel() {
128+
float* maxCount = thrust::max_element(thrust::device, inlierCounts, inlierCounts + iterations);
129+
// Pointer arithmetic gives us the model index with most inliers
130+
int maxIdx = maxCount - inlierCounts;
131+
// Send the index to GPU
132+
cudaMemcpy(optimalModelIndex, &maxIdx , sizeof(int), cudaMemcpyHostToDevice);
133+
// Now launch a kernel to write the Plane of this model into selection
134+
getOptimalModelPoints<<<1, 3>>>(pc, *selection, maxIdx, modelPoints, maxCount);
135+
checkStatus(cudaDeviceSynchronize());
136+
}
137+
176138
RansacPlane::RansacPlane(float3 axis, float epsilon, int iterations, float threshold, int pcSize, float removalRadius)
177139
: pc(pc), axis(axis), epsilon(epsilon), iterations(iterations), threshold(threshold), removalRadius(removalRadius) {
178140

@@ -221,7 +183,8 @@ Plane RansacPlane::computeModel(GPU_Cloud &pc) {
221183
checkStatus(cudaDeviceSynchronize());
222184

223185
// Choose the model with the greatest inlier count
224-
selectOptimalRansacModel<<<1, MAX_THREADS>>>(pc, inlierCounts, modelPoints, *selection, iterations, optimalModelIndex);
186+
selectOptimalModel();
187+
225188
checkStatus(cudaGetLastError());
226189
checkStatus(cudaDeviceSynchronize());
227190

0 commit comments

Comments
 (0)