Skip to content
34 changes: 19 additions & 15 deletions modules/cudaarithm/src/cuda/threshold.cu
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ __global__ void otsu_sums(uint *histogram, uint *threshold_sums, unsigned long l
}

__global__ void
otsu_variance(float2 *variance, uint *histogram, uint *threshold_sums, unsigned long long *sums)
otsu_variance(float2 *variance, uint *histogram, uint *threshold_sums, unsigned long long *sums, uint n_samples)
{
const uint n_bins = 256;

Expand All @@ -137,7 +137,6 @@ otsu_variance(float2 *variance, uint *histogram, uint *threshold_sums, unsigned
int bin_idx = threadIdx.x;
int threshold = blockIdx.x;

uint n_samples = threshold_sums[0];
uint n_samples_above = threshold_sums[threshold];
uint n_samples_below = n_samples - n_samples_above;

Expand All @@ -149,15 +148,21 @@ otsu_variance(float2 *variance, uint *histogram, uint *threshold_sums, unsigned
float threshold_variance_below_f32 = 0;
if (bin_idx > threshold)
{
float mean = (float) sum_above / n_samples_above;
float sigma = bin_idx - mean;
threshold_variance_above_f32 = sigma * sigma;
if (n_samples_above > 0)
{
float mean = (float) sum_above / n_samples_above;
float sigma = bin_idx - mean;
threshold_variance_above_f32 = sigma * sigma;
}
}
else
{
float mean = (float) sum_below / n_samples_below;
float sigma = bin_idx - mean;
threshold_variance_below_f32 = sigma * sigma;
if (n_samples_below > 0)
{
float mean = (float) sum_below / n_samples_below;
float sigma = bin_idx - mean;
threshold_variance_below_f32 = sigma * sigma;
}
}

uint bin_count = histogram[bin_idx];
Expand Down Expand Up @@ -198,15 +203,14 @@ __device__ bool has_lowest_score(
}

__global__ void
otsu_score(uint *otsu_threshold, uint *threshold_sums, float2 *variance)
otsu_score(uint *otsu_threshold, uint *threshold_sums, float2 *variance, uint n_samples)
{
const uint n_thresholds = 256;

__shared__ float shared_memory[n_thresholds];

int threshold = threadIdx.x;

uint n_samples = threshold_sums[0];
uint n_samples_above = threshold_sums[threshold];
uint n_samples_below = n_samples - n_samples_above;

Expand Down Expand Up @@ -241,7 +245,7 @@ otsu_score(uint *otsu_threshold, uint *threshold_sums, float2 *variance)
}
}

void compute_otsu(uint *histogram, uint *otsu_threshold, Stream &stream)
void compute_otsu(uint *histogram, uint *otsu_threshold, uint n_samples, Stream &stream)
{
const uint n_bins = 256;
const uint n_thresholds = 256;
Expand All @@ -261,12 +265,12 @@ void compute_otsu(uint *histogram, uint *otsu_threshold, Stream &stream)
otsu_sums<<<grid_all, block_all, 0, cuda_stream>>>(
histogram, gpu_threshold_sums.ptr<uint>(), gpu_sums.ptr<unsigned long long>());
otsu_variance<<<grid_all, block_all, 0, cuda_stream>>>(
gpu_variances.ptr<float2>(), histogram, gpu_threshold_sums.ptr<uint>(), gpu_sums.ptr<unsigned long long>());
gpu_variances.ptr<float2>(), histogram, gpu_threshold_sums.ptr<uint>(), gpu_sums.ptr<unsigned long long>(), n_samples);
otsu_score<<<grid_score, block_score, 0, cuda_stream>>>(
otsu_threshold, gpu_threshold_sums.ptr<uint>(), gpu_variances.ptr<float2>());
otsu_threshold, gpu_threshold_sums.ptr<uint>(), gpu_variances.ptr<float2>(), n_samples);
}

// TODO: Replace this is cv::cuda::calcHist
// TODO: Replace this with cv::cuda::calcHist
template <uint n_bins>
__global__ void histogram_kernel(
uint *histogram, const uint8_t *image, uint width,
Expand Down Expand Up @@ -334,7 +338,7 @@ double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, dou
calcHist(src, gpu_histogram, stream);

GpuMat gpu_otsu_threshold(1, 1, CV_32SC1, pool.getAllocator());
compute_otsu(gpu_histogram.ptr<uint>(), gpu_otsu_threshold.ptr<uint>(), stream);
compute_otsu(gpu_histogram.ptr<uint>(), gpu_otsu_threshold.ptr<uint>(), src.rows * src.cols, stream);

cv::Mat mat_otsu_threshold;
gpu_otsu_threshold.download(mat_otsu_threshold, stream);
Expand Down
1 change: 1 addition & 0 deletions modules/cudacodec/include/opencv2/cudacodec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ class CV_EXPORTS_W NVSurfaceToColorConverter {
* @param stream Stream for the asynchronous version.
*/
CV_WRAP virtual bool convert(InputArray yuv, OutputArray color, const SurfaceFormat surfaceFormat, const ColorFormat outputFormat, const BitDepth bitDepth = BitDepth::UNCHANGED, const bool planar = false, cuda::Stream& stream = cuda::Stream::Null()) = 0;
virtual ~NVSurfaceToColorConverter() {};
};

/** @brief Creates a NVSurfaceToColorConverter.
Expand Down
4 changes: 2 additions & 2 deletions modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,11 @@ class CV_EXPORTS_W DescriptorMatcher : public cv::Algorithm

/** @brief Clears the train descriptor collection.
*/
CV_WRAP virtual void clear() = 0;
CV_WRAP virtual void clear() CV_OVERRIDE = 0;

/** @brief Returns true if there are no train descriptors in the collection.
*/
CV_WRAP virtual bool empty() const = 0;
CV_WRAP virtual bool empty() const CV_OVERRIDE = 0;

/** @brief Trains a descriptor matcher.

Expand Down
6 changes: 3 additions & 3 deletions modules/cudaoptflow/src/nvidiaOpticalFlow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1020,9 +1020,6 @@ void NvidiaOpticalFlowImpl_2::calc(InputArray _frame0, InputArray _frame1, Input
GpuMat flowXYGpuMat(Size((m_width + m_hwGridSize - 1) / m_hwGridSize,
(m_height + m_hwGridSize - 1) / m_hwGridSize), CV_16SC2,
(void*)m_flowXYcuDevPtr, m_outputBufferStrideInfo.strideInfo[0].strideXInBytes);
GpuMat flowXYGpuMatUpScaled(Size((m_width + m_gridSize - 1) / m_gridSize,
(m_height + m_gridSize - 1) / m_gridSize), CV_16SC2,
(void*)m_flowXYUpScaledcuDevPtr, m_outputUpScaledBufferStrideInfo.strideInfo[0].strideXInBytes);

//check whether frame0 is Mat or GpuMat
if (_frame0.isMat())
Expand Down Expand Up @@ -1105,6 +1102,9 @@ void NvidiaOpticalFlowImpl_2::calc(InputArray _frame0, InputArray _frame1, Input

if (m_scaleFactor > 1)
{
GpuMat flowXYGpuMatUpScaled(Size((m_width + m_gridSize - 1) / m_gridSize,
(m_height + m_gridSize - 1) / m_gridSize), CV_16SC2,
(void*)m_flowXYUpScaledcuDevPtr, m_outputUpScaledBufferStrideInfo.strideInfo[0].strideXInBytes);
uint32_t nSrcWidth = flowXYGpuMat.size().width;
uint32_t nSrcHeight = flowXYGpuMat.size().height;
uint32_t nSrcPitch = m_outputBufferStrideInfo.strideInfo[0].strideXInBytes;
Expand Down
9 changes: 9 additions & 0 deletions modules/cudev/include/opencv2/cudev/ptr2d/zip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,12 @@ template <class PtrTuple> struct PtrTraits< ZipPtrSz<PtrTuple> > : PtrTraitsBase
}}

#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ > 12 || (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 4))
#if (__CUDACC_VER_MAJOR__ > 13 || (__CUDACC_VER_MAJOR__ == 13 && __CUDACC_VER_MINOR__ >= 2))
_CCCL_BEGIN_NAMESPACE_CUDA_STD
#else
_LIBCUDACXX_BEGIN_NAMESPACE_STD
#endif


template< class... Types >
struct tuple_size< cv::cudev::ZipPtr<tuple<Types...> > >
Expand All @@ -198,7 +203,11 @@ template<size_t N, class... Types >
struct tuple_element<N, cv::cudev::ZipPtrSz<tuple<Types...> > >
: tuple_element<N, tuple<Types...> > { };

#if (__CUDACC_VER_MAJOR__ > 13 || (__CUDACC_VER_MAJOR__ == 13 && __CUDACC_VER_MINOR__ >= 2))
_CCCL_END_NAMESPACE_CUDA_STD
#else
_LIBCUDACXX_END_NAMESPACE_STD
#endif

#endif
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,14 @@ class CV_EXPORTS_W DisparityFilter : public Algorithm
@param left_view left view of the original stereo-pair to guide the filtering process, 8-bit single-channel
or three-channel image.

@param filtered_disparity_map output disparity map.
@param filtered_disparity_map output disparity map, single-channel CV_16S type,
with disparity values scaled by 16.


@param disparity_map_right optional argument, some implementations might also use the disparity map
of the right view to compute confidence maps. If provided, it must be a single-channel CV_32F matrix,
otherwise a runtime assertion will fail.
of the right view to compute confidence maps. If provided, it must be a single-channel CV_16S matrix.
Disparity values are expected to be scaled by 16 (one-pixel disparity corresponds to the value of 16).


@param ROI region of the disparity map to filter. Optional, usually it should be set automatically.

Expand Down
Loading