diff --git a/CMakeLists.txt b/CMakeLists.txt index e4b2524..aff18ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,8 @@ project(HalideFilters) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED YES) +option(Module_HalideFilters_USE_AUTOSCHEDULER "Use auto-schedulers for Halide filters" OFF) + # Update the following variables to update the version of Halide used set(HALIDE_VERSION "18.0.0") set(HALIDE_VERSION_COMMIT "8c651b459a4e3744b413c23a29b5c5d968702bb7") diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 04ba6be..7d17a84 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -3,6 +3,8 @@ project(HalideFiltersExamples) set(ExampleSpecificComponents HalideFilters + ITKGPUSmoothing + ITKImageNoise ) if(NOT ITK_SOURCE_DIR) diff --git a/examples/SampleHalidePipeline.cxx b/examples/SampleHalidePipeline.cxx index 55e772d..e617d63 100644 --- a/examples/SampleHalidePipeline.cxx +++ b/examples/SampleHalidePipeline.cxx @@ -17,29 +17,100 @@ *=========================================================================*/ #include "itkHalideDiscreteGaussianImageFilter.h" +#include "itkDiscreteGaussianImageFilter.h" +#include "itkHalideGPUDiscreteGaussianImageFilter.h" +#include "itkGPUDiscreteGaussianImageFilter.h" +#include "itkAdditiveGaussianNoiseImageFilter.h" +#include "itkCastImageFilter.h" +#include "itkImage.h" +#include "itkGPUImage.h" -#include "itkCommand.h" #include "itkImageFileReader.h" #include "itkImageFileWriter.h" +#define BENCH(label, samples, block) \ + do \ + { \ + auto start = std::chrono::high_resolution_clock::now(); \ + for (size_t __i = 0; __i < samples; __i++) \ + { \ + block; \ + } \ + auto end = std::chrono::high_resolution_clock::now(); \ + auto ms = std::chrono::duration_cast(end - start).count(); \ + std::cout << #label << " " << (ms / samples) << "ms" << std::endl; \ + } while (0) -int main( int argc, char * argv[] ) +using ImageType = itk::Image; +using NoiseFilter = itk::AdditiveGaussianNoiseImageFilter; +using GPUImageType = itk::GPUImage; +using CastToGPUImage = itk::CastImageFilter; + +using CPUBlur = itk::DiscreteGaussianImageFilter; +using HalideBlur = itk::HalideDiscreteGaussianImageFilter; + +using GPUBlur = itk::GPUDiscreteGaussianImageFilter; +using HalideGPUBlur = itk::HalideGPUDiscreteGaussianImageFilter; + +int +main(int argc, char * argv[]) { - if( argc < 4 ) - { - std::cerr << "Missing parameters." << std::endl; - std::cerr << "Usage: " << argv[0] - << " inputImage" - << " outputImage" - << " parameters" << std::endl; - return EXIT_FAILURE; - } + ImageType::IndexValueType SIZE = 300; + float VARIANCE = 90; + + ImageType::IndexType index; + index.Fill(0); + ImageType::SizeType size; + size.Fill(SIZE); + ImageType::RegionType region; + region.SetIndex(index); + region.SetSize(size); + + ImageType::Pointer image = ImageType::New(); + image->SetRegions(region); + image->Allocate(); + image->FillBuffer(0.0f); + + NoiseFilter::Pointer noise = NoiseFilter::New(); + noise->SetInput(image); + noise->SetMean(0); + noise->SetStandardDeviation(2.0); + noise->Update(); + + image = noise->GetOutput(); + + BENCH(cpu_blur, 1, { + CPUBlur::Pointer filter = CPUBlur::New(); + filter->SetInput(image); + filter->SetVariance(VARIANCE); + filter->Update(); + }); + BENCH(halide_cpu_blur, 1, { + HalideBlur::Pointer filter = HalideBlur::New(); + filter->SetInput(image); + filter->SetVariance(VARIANCE); + filter->Update(); + }); - // Please, write a complete, self-containted and useful example that - // demonstrate a class when being used along with other ITK classes or in - // the context of a wider or specific application. + BENCH(gpu_blur, 1, { + CastToGPUImage::Pointer cast = CastToGPUImage::New(); + cast->SetInput(image); + GPUBlur::Pointer filter = GPUBlur::New(); + GPUImageType::Pointer gpu_image = cast->GetOutput(); + cast->Update(); + filter->SetInput(gpu_image); + filter->SetVariance(VARIANCE); + filter->Update(); + filter->GetOutput()->UpdateBuffers(); + }); + BENCH(halide_gpu_blur, 1, { + HalideGPUBlur::Pointer filter = HalideGPUBlur::New(); + filter->SetInput(image); + filter->SetVariance(VARIANCE); + filter->Update(); + }); return EXIT_SUCCESS; } diff --git a/include/itkHalideGPUDiscreteGaussianImageFilter.h b/include/itkHalideGPUDiscreteGaussianImageFilter.h new file mode 100644 index 0000000..62dabb3 --- /dev/null +++ b/include/itkHalideGPUDiscreteGaussianImageFilter.h @@ -0,0 +1,109 @@ +/*========================================================================= + * + * Copyright NumFOCUS + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + *=========================================================================*/ +#ifndef itkHalideGPUDiscreteGaussianImageFilter_h +#define itkHalideGPUDiscreteGaussianImageFilter_h + +#include "itkImageToImageFilter.h" + +namespace itk +{ + +/** \class HalideGPUDiscreteGaussianImageFilter + * + * \brief Filters a image by iterating over its pixels. + * + * Filters a image by iterating over its pixels in a multi-threaded way + * and {to be completed by the developer}. + * + * \ingroup HalideFilters + * + * Limitations compared te itkDiscreteGaussianImageFilter: + * - Only supports isotropic variance and maximum error (to simplify wrapper) + * - Only supports 3d images (to simplify wrapper) + * + */ +template +class HalideGPUDiscreteGaussianImageFilter : public ImageToImageFilter +{ +public: + ITK_DISALLOW_COPY_AND_MOVE(HalideGPUDiscreteGaussianImageFilter); + + static constexpr unsigned int InputImageDimension = TInputImage::ImageDimension; + static constexpr unsigned int OutputImageDimension = TOutputImage::ImageDimension; + + using InputImageType = TInputImage; + using OutputImageType = TOutputImage; + using InputPixelType = typename InputImageType::PixelType; + using OutputPixelType = typename OutputImageType::PixelType; + + /** Standard class aliases. */ + using Self = HalideGPUDiscreteGaussianImageFilter; + using Superclass = ImageToImageFilter; + using Pointer = SmartPointer; + using ConstPointer = SmartPointer; + + /** Run-time type information. */ + itkOverrideGetNameOfClassMacro(HalideGPUDiscreteGaussianImageFilter); + + /** Standard New macro. */ + itkNewMacro(Self); + + itkSetMacro(Variance, float); + itkGetMacro(Variance, float); + + itkSetMacro(MaximumError, float); + itkGetMacro(MaximumError, float); + + itkGetMacro(MaximumKernelWidth, unsigned int); + itkSetMacro(MaximumKernelWidth, unsigned int); + + itkGetMacro(UseImageSpacing, bool); + itkSetMacro(UseImageSpacing, bool); + itkBooleanMacro(UseImageSpacing); + +protected: + HalideGPUDiscreteGaussianImageFilter(); + ~ + HalideGPUDiscreteGaussianImageFilter() override = default; + + void + PrintSelf(std::ostream & os, Indent indent) const override; + + using OutputRegionType = typename OutputImageType::RegionType; + + void + GenerateData() override; + +private: +#ifdef ITK_USE_CONCEPT_CHECKING + // Add concept checking such as + itkConceptMacro(FloatingPointPixel, (itk::Concept::IsFloatingPoint)); +#endif + + float m_Variance = 0; + float m_MaximumError = 0.01; + unsigned int m_MaximumKernelWidth = 32; + bool m_UseImageSpacing = true; +}; +} // namespace itk + +#ifndef ITK_MANUAL_INSTANTIATION +# include "itkHalideGPUDiscreteGaussianImageFilter.hxx" +#endif + +#endif // itkHalideGPUDiscreteGaussianImageFilter diff --git a/include/itkHalideGPUDiscreteGaussianImageFilter.hxx b/include/itkHalideGPUDiscreteGaussianImageFilter.hxx new file mode 100644 index 0000000..5bce81c --- /dev/null +++ b/include/itkHalideGPUDiscreteGaussianImageFilter.hxx @@ -0,0 +1,100 @@ +/*========================================================================= + * + * Copyright NumFOCUS + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + *=========================================================================*/ +#ifndef itkHalideGPUDiscreteGaussianImageFilter_hxx +#define itkHalideGPUDiscreteGaussianImageFilter_hxx + +#include "itkHalideGPUDiscreteGaussianImageFilter.h" + +#include "itkHalideGPUSeparableConvolutionImpl.h" + +#include "itkGaussianOperator.h" + +#include +#include +#include + +namespace itk +{ + +template +HalideGPUDiscreteGaussianImageFilter::HalideGPUDiscreteGaussianImageFilter() +{ + this->DynamicMultiThreadingOff(); + this->ThreaderUpdateProgressOff(); +} + + +template +void +HalideGPUDiscreteGaussianImageFilter::PrintSelf(std::ostream & os, Indent indent) const +{ + Superclass::PrintSelf(os, indent); +} + + +template +void +HalideGPUDiscreteGaussianImageFilter::GenerateData() +{ + const InputImageType * input = this->GetInput(); + typename InputImageType::RegionType inputRegion = input->GetBufferedRegion(); + typename InputImageType::SizeType inputSize = inputRegion.GetSize(); + typename InputImageType::SpacingType inputSpacing = input->GetSpacing(); + + std::vector> kernel_buffers{}; + + // compute kernel coefficients with itk::GaussianOperator to match behavior with itk::DiscreteGaussianImageFilter + for (int dim = 0; dim < InputImageDimension; ++dim) + { + GaussianOperator oper{}; + oper.SetMaximumError(m_MaximumError); + oper.SetMaximumKernelWidth(m_MaximumKernelWidth); + + float variance = m_Variance; + if (m_UseImageSpacing) + { + variance /= inputSpacing[dim]; + } + oper.SetVariance(variance); + + oper.CreateDirectional(); + + Halide::Runtime::Buffer & buf = kernel_buffers.emplace_back(static_cast(oper.GetSize(0))); + buf.set_min(-static_cast(oper.GetRadius(0))); + std::copy(oper.Begin(), oper.End(), buf.begin()); + buf.set_host_dirty(); + } + + OutputImageType * output = this->GetOutput(); + output->SetRegions(inputRegion); + output->Allocate(); + + std::vector sizes(3, 1); + std::copy(inputSize.begin(), inputSize.end(), sizes.begin()); + + Halide::Runtime::Buffer inputBuffer(input->GetBufferPointer(), sizes); + Halide::Runtime::Buffer outputBuffer(output->GetBufferPointer(), sizes); + + inputBuffer.set_host_dirty(); + itkHalideGPUSeparableConvolutionImpl(inputBuffer, kernel_buffers[0], kernel_buffers[1], kernel_buffers[2], outputBuffer); + outputBuffer.copy_to_host(); +} + +} // end namespace itk + +#endif // itkHalideGPUDiscreteGaussianImageFilter_hxx diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 086fa8c..2b160e5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,16 +3,45 @@ find_package(Halide REQUIRED shared) add_executable(itkHalideGenerators generators.cpp) target_link_libraries(itkHalideGenerators PRIVATE Halide::Generator) -add_halide_library(itkHalideSeparableConvolutionImpl - FROM itkHalideGenerators - HEADER itkHalideSeparableConvolutionImpl_h - FEATURES cuda - ) +if(Module_HalideFilters_USE_AUTOSCHEDULER) + add_halide_library(itkHalideSeparableConvolutionImpl + FROM itkHalideGenerators + GENERATOR itkHalideSeparableConvolutionImpl + HEADER itkHalideSeparableConvolutionImpl_h + SCHEDULE itkHalideSeparableConvolutionSchedule + AUTOSCHEDULER Halide::Adams2019 + ) + + add_halide_library(itkHalideGPUSeparableConvolutionImpl + FROM itkHalideGenerators + GENERATOR itkHalideSeparableConvolutionImpl + HEADER itkGPUHalideSeparableConvolutionImpl_h + SCHEDULE itkHalideGPUSeparableConvolutionSchedule + FEATURES cuda + AUTOSCHEDULER Halide::Anderson2021 + ) +else() + add_halide_library(itkHalideSeparableConvolutionImpl + FROM itkHalideGenerators + GENERATOR itkHalideSeparableConvolutionImpl + HEADER itkHalideSeparableConvolutionImpl_h + PARAMS use_gpu=false + ) + + add_halide_library(itkHalideGPUSeparableConvolutionImpl + FROM itkHalideGenerators + GENERATOR itkHalideSeparableConvolutionImpl + HEADER itkGPUHalideSeparableConvolutionImpl_h + FEATURES cuda + PARAMS use_gpu=true + ) +endif() set(HalideFilters_SRCS + ${itkHalideGPUSeparableConvolutionImpl_h} ${itkHalideSeparableConvolutionImpl_h} ) itk_module_add_library(HalideFilters ${HalideFilters_SRCS}) target_include_directories(HalideFilters PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -target_link_libraries(HalideFilters PUBLIC itkHalideSeparableConvolutionImpl) +target_link_libraries(HalideFilters PUBLIC itkHalideSeparableConvolutionImpl itkHalideGPUSeparableConvolutionImpl) diff --git a/src/generators.cpp b/src/generators.cpp index 3e23f11..4a76f8e 100644 --- a/src/generators.cpp +++ b/src/generators.cpp @@ -5,15 +5,18 @@ using namespace Halide; class SeparableConvolutionGenerator : public Generator { public: - Input> input{ "input" }; + GeneratorParam use_gpu{ "use_gpu", true }; - Input> kernel_x{"kernel_x"}; - Input> kernel_y{"kernel_y"}; - Input> kernel_z{"kernel_z"}; + Input> input{ "input" }; + Input> kernel_x{ "kernel_x" }; + Input> kernel_y{ "kernel_y" }; + Input> kernel_z{ "kernel_z" }; Output> output{ "output" }; - Var x{ "x" }, y{ "y" }, z{ "z" }; + Var x{ "x" }, y{ "y" }, z{ "z" }; + Func blur_x{ "blur_x" }, blur_y{ "blur_y" }, blur_z{ "blur_z" }; + Func sample{ "sample" }; void generate() @@ -24,34 +27,251 @@ class SeparableConvolutionGenerator : public Generator