-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Use auto-schedulers to obtain optimized schedules for CPU and GPU. CPU schedule outperforms everything else _by far_ except for unreasonably large variances, in which case the GPU schedule wins. CPU schedule is faster than current ITK CPU filter by over an order of magnitude. It is even faster than the ITK GPU filter, but the contest is closer.
- Loading branch information
Showing
7 changed files
with
572 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
/*========================================================================= | ||
* | ||
* Copyright NumFOCUS | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0.txt | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*=========================================================================*/ | ||
#ifndef itkHalideGPUDiscreteGaussianImageFilter_h | ||
#define itkHalideGPUDiscreteGaussianImageFilter_h | ||
|
||
#include "itkImageToImageFilter.h" | ||
|
||
namespace itk | ||
{ | ||
|
||
/** \class HalideGPUDiscreteGaussianImageFilter | ||
* | ||
* \brief Filters a image by iterating over its pixels. | ||
* | ||
* Filters a image by iterating over its pixels in a multi-threaded way | ||
* and {to be completed by the developer}. | ||
* | ||
* \ingroup HalideFilters | ||
* | ||
* Limitations compared te itkDiscreteGaussianImageFilter: | ||
* - Only supports isotropic variance and maximum error (to simplify wrapper) | ||
* - Only supports 3d images (to simplify wrapper) | ||
* | ||
*/ | ||
template <typename TInputImage, typename TOutputImage> | ||
class HalideGPUDiscreteGaussianImageFilter : public ImageToImageFilter<TInputImage, TOutputImage> | ||
{ | ||
public: | ||
ITK_DISALLOW_COPY_AND_MOVE(HalideGPUDiscreteGaussianImageFilter); | ||
|
||
static constexpr unsigned int InputImageDimension = TInputImage::ImageDimension; | ||
static constexpr unsigned int OutputImageDimension = TOutputImage::ImageDimension; | ||
|
||
using InputImageType = TInputImage; | ||
using OutputImageType = TOutputImage; | ||
using InputPixelType = typename InputImageType::PixelType; | ||
using OutputPixelType = typename OutputImageType::PixelType; | ||
|
||
/** Standard class aliases. */ | ||
using Self = HalideGPUDiscreteGaussianImageFilter<InputImageType, OutputImageType>; | ||
using Superclass = ImageToImageFilter<InputImageType, OutputImageType>; | ||
using Pointer = SmartPointer<Self>; | ||
using ConstPointer = SmartPointer<const Self>; | ||
|
||
/** Run-time type information. */ | ||
itkOverrideGetNameOfClassMacro(HalideGPUDiscreteGaussianImageFilter); | ||
|
||
/** Standard New macro. */ | ||
itkNewMacro(Self); | ||
|
||
itkSetMacro(Variance, float); | ||
itkGetMacro(Variance, float); | ||
|
||
itkSetMacro(MaximumError, float); | ||
itkGetMacro(MaximumError, float); | ||
|
||
itkGetMacro(MaximumKernelWidth, unsigned int); | ||
itkSetMacro(MaximumKernelWidth, unsigned int); | ||
|
||
itkGetMacro(UseImageSpacing, bool); | ||
itkSetMacro(UseImageSpacing, bool); | ||
itkBooleanMacro(UseImageSpacing); | ||
|
||
protected: | ||
HalideGPUDiscreteGaussianImageFilter(); | ||
~ | ||
HalideGPUDiscreteGaussianImageFilter() override = default; | ||
|
||
void | ||
PrintSelf(std::ostream & os, Indent indent) const override; | ||
|
||
using OutputRegionType = typename OutputImageType::RegionType; | ||
|
||
void | ||
GenerateData() override; | ||
|
||
private: | ||
#ifdef ITK_USE_CONCEPT_CHECKING | ||
// Add concept checking such as | ||
itkConceptMacro(FloatingPointPixel, (itk::Concept::IsFloatingPoint<typename InputImageType::PixelType>)); | ||
#endif | ||
|
||
float m_Variance = 0; | ||
float m_MaximumError = 0.01; | ||
unsigned int m_MaximumKernelWidth = 32; | ||
bool m_UseImageSpacing = true; | ||
}; | ||
} // namespace itk | ||
|
||
#ifndef ITK_MANUAL_INSTANTIATION | ||
# include "itkHalideGPUDiscreteGaussianImageFilter.hxx" | ||
#endif | ||
|
||
#endif // itkHalideGPUDiscreteGaussianImageFilter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
/*========================================================================= | ||
* | ||
* Copyright NumFOCUS | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0.txt | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*=========================================================================*/ | ||
#ifndef itkHalideGPUDiscreteGaussianImageFilter_hxx | ||
#define itkHalideGPUDiscreteGaussianImageFilter_hxx | ||
|
||
#include "itkHalideGPUDiscreteGaussianImageFilter.h" | ||
|
||
#include "itkHalideGPUSeparableConvolutionImpl.h" | ||
|
||
#include "itkGaussianOperator.h" | ||
|
||
#include <Halide.h> | ||
#include <HalideBuffer.h> | ||
#include <iomanip> | ||
|
||
namespace itk | ||
{ | ||
|
||
template <typename TInputImage, typename TOutputImage> | ||
HalideGPUDiscreteGaussianImageFilter<TInputImage, TOutputImage>::HalideGPUDiscreteGaussianImageFilter() | ||
{ | ||
this->DynamicMultiThreadingOff(); | ||
this->ThreaderUpdateProgressOff(); | ||
} | ||
|
||
|
||
template <typename TInputImage, typename TOutputImage> | ||
void | ||
HalideGPUDiscreteGaussianImageFilter<TInputImage, TOutputImage>::PrintSelf(std::ostream & os, Indent indent) const | ||
{ | ||
Superclass::PrintSelf(os, indent); | ||
} | ||
|
||
|
||
template <typename TInputImage, typename TOutputImage> | ||
void | ||
HalideGPUDiscreteGaussianImageFilter<TInputImage, TOutputImage>::GenerateData() | ||
{ | ||
const InputImageType * input = this->GetInput(); | ||
typename InputImageType::RegionType inputRegion = input->GetBufferedRegion(); | ||
typename InputImageType::SizeType inputSize = inputRegion.GetSize(); | ||
typename InputImageType::SpacingType inputSpacing = input->GetSpacing(); | ||
|
||
std::vector<Halide::Runtime::Buffer<float, 1>> kernel_buffers{}; | ||
|
||
// compute kernel coefficients with itk::GaussianOperator to match behavior with itk::DiscreteGaussianImageFilter | ||
for (int dim = 0; dim < InputImageDimension; ++dim) | ||
{ | ||
GaussianOperator<float, 1> oper{}; | ||
oper.SetMaximumError(m_MaximumError); | ||
oper.SetMaximumKernelWidth(m_MaximumKernelWidth); | ||
|
||
float variance = m_Variance; | ||
if (m_UseImageSpacing) | ||
{ | ||
variance /= inputSpacing[dim]; | ||
} | ||
oper.SetVariance(variance); | ||
|
||
oper.CreateDirectional(); | ||
|
||
Halide::Runtime::Buffer<float, 1> & buf = kernel_buffers.emplace_back(static_cast<int>(oper.GetSize(0))); | ||
buf.set_min(-static_cast<int>(oper.GetRadius(0))); | ||
std::copy(oper.Begin(), oper.End(), buf.begin()); | ||
buf.set_host_dirty(); | ||
} | ||
|
||
OutputImageType * output = this->GetOutput(); | ||
output->SetRegions(inputRegion); | ||
output->Allocate(); | ||
|
||
std::vector<int> sizes(3, 1); | ||
std::copy(inputSize.begin(), inputSize.end(), sizes.begin()); | ||
|
||
Halide::Runtime::Buffer<const InputPixelType> inputBuffer(input->GetBufferPointer(), sizes); | ||
Halide::Runtime::Buffer<OutputPixelType> outputBuffer(output->GetBufferPointer(), sizes); | ||
|
||
inputBuffer.set_host_dirty(); | ||
itkHalideGPUSeparableConvolutionImpl(inputBuffer, kernel_buffers[0], kernel_buffers[1], kernel_buffers[2], outputBuffer); | ||
outputBuffer.copy_to_host(); | ||
} | ||
|
||
} // end namespace itk | ||
|
||
#endif // itkHalideGPUDiscreteGaussianImageFilter_hxx |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.