Merge pull request opencv#26617 from CodeLinaro:xuezha_2ndPost

quic-xuezha · shyama7004 · commit bf6d77f63058 · 2025-01-20T12:22:32.000+05:30
FastCV-based HAL for OpenCV acceleration 2ndpost-1 opencv#26617 ### Detailed description: - Add parallel support for cv_hal_sobel - Add cv_hal_gaussianBlurBinomial and parallel support. - Add cv_hal_addWeighted8u and parallel support - Add cv_hal_warpPerspective and parallel support Requires binary from [opencv/opencv_3rdparty#90](opencv/opencv_3rdparty#90) Related patch to opencv_contrib: [opencv/opencv_contrib#3844](opencv/opencv_contrib#3844) ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
diff --git a/3rdparty/fastcv/fastcv.cmake b/3rdparty/fastcv/fastcv.cmake
@@ -1,23 +1,23 @@
 function(download_fastcv root_dir)
 
   # Commit SHA in the opencv_3rdparty repo
-  set(FASTCV_COMMIT "b8f0d48fa9dbebb0237d3e0abd206f9930c89db6")
+  set(FASTCV_COMMIT "dc5d58018f3af915a8d209386d2c58c0501c0f2c")
 
   # Define actual FastCV versions
   if(ANDROID)
     if(AARCH64)
       message(STATUS "Download FastCV for Android aarch64")
-      set(FCV_PACKAGE_NAME  "fastcv_android_aarch64_2024_10_24.tgz")
-      set(FCV_PACKAGE_HASH  "14486af00dc0282dac591dc9ccdd957e")
+      set(FCV_PACKAGE_NAME  "fastcv_android_aarch64_2024_12_11.tgz")
+      set(FCV_PACKAGE_HASH  "9dac41e86597305f846212dae31a4a88")
     else()
       message(STATUS "Download FastCV for Android armv7")
-      set(FCV_PACKAGE_NAME  "fastcv_android_arm32_2024_10_24.tgz")
-      set(FCV_PACKAGE_HASH  "b5afadd5a5b55f8f6c2e7361f225fa21")
+      set(FCV_PACKAGE_NAME  "fastcv_android_arm32_2024_12_11.tgz")
+      set(FCV_PACKAGE_HASH  "fe2d30334180b17e3031eee92aac43b6")
     endif()
   elseif(UNIX AND NOT APPLE AND NOT IOS AND NOT XROS)
     if(AARCH64)
-      set(FCV_PACKAGE_NAME  "fastcv_linux_aarch64_2024_10_24.tgz")
-      set(FCV_PACKAGE_HASH  "d15c7b77f2d3577ba46bd94e6cf15230")
+      set(FCV_PACKAGE_NAME  "fastcv_linux_aarch64_2024_12_11.tgz")
+      set(FCV_PACKAGE_HASH  "7b33ad833e6f15ab6d4ec64fa3c17acd")
     else()
       message("FastCV: fastcv lib for 32-bit Linux is not supported for now!")
     endif()
diff --git a/3rdparty/fastcv/include/fastcv_hal_core.hpp b/3rdparty/fastcv/include/fastcv_hal_core.hpp
@@ -24,6 +24,8 @@
 #define cv_hal_flip                 fastcv_hal_flip
 #undef  cv_hal_rotate90
 #define cv_hal_rotate90             fastcv_hal_rotate
+#undef  cv_hal_addWeighted8u
+#define cv_hal_addWeighted8u        fastcv_hal_addWeighted8u
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 /// @brief look-up table transform of an array.
@@ -152,4 +154,27 @@ int fastcv_hal_rotate(
     size_t          dst_step,
     int             angle);
 
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/// @brief weighted sum of two arrays using formula: dst[i] = a * src1[i] + b * src2[i]
+/// @param src1_data first source image data
+/// @param src1_step first source image step
+/// @param src2_data second source image data
+/// @param src2_step second source image step
+/// @param dst_data  destination image data
+/// @param dst_step  destination image step
+/// @param width     width of the images
+/// @param height    height of the images
+/// @param scalars   numbers a, b, and c
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+int fastcv_hal_addWeighted8u(
+    const uchar*    src1_data,
+    size_t          src1_step,
+    const uchar*    src2_data,
+    size_t          src2_step,
+    uchar*          dst_data,
+    size_t          dst_step,
+    int             width,
+    int             height,
+    const double    scalars[3]);
+
 #endif
diff --git a/3rdparty/fastcv/include/fastcv_hal_imgproc.hpp b/3rdparty/fastcv/include/fastcv_hal_imgproc.hpp
@@ -12,10 +12,14 @@
 #define cv_hal_medianBlur           fastcv_hal_medianBlur
 #undef  cv_hal_sobel
 #define cv_hal_sobel                fastcv_hal_sobel
-#undef cv_hal_boxFilter
+#undef  cv_hal_boxFilter
 #define cv_hal_boxFilter            fastcv_hal_boxFilter
-#undef cv_hal_adaptiveThreshold
+#undef  cv_hal_adaptiveThreshold
 #define cv_hal_adaptiveThreshold    fastcv_hal_adaptiveThreshold
+#undef  cv_hal_gaussianBlurBinomial
+#define cv_hal_gaussianBlurBinomial fastcv_hal_gaussianBlurBinomial
+#undef  cv_hal_warpPerspective
+#define cv_hal_warpPerspective      fastcv_hal_warpPerspective
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 /// @brief Calculate medianBlur filter
@@ -148,4 +152,69 @@ int fastcv_hal_adaptiveThreshold(
     int             blockSize,
     double          C);
 
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/// @brief Blurs an image using a Gaussian filter.
+/// @param src_data         Source image data
+/// @param src_step         Source image step
+/// @param dst_data         Destination image data
+/// @param dst_step         Destination image step
+/// @param width            Source image width
+/// @param height           Source image height
+/// @param depth            Depth of source and destination image
+/// @param cn               Number of channels
+/// @param margin_left      Left margins for source image
+/// @param margin_top       Top margins for source image
+/// @param margin_right     Right margins for source image
+/// @param margin_bottom    Bottom margins for source image
+/// @param ksize            Kernel size
+/// @param border_type      Border type
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+int fastcv_hal_gaussianBlurBinomial(
+    const uchar*    src_data,
+    size_t          src_step,
+    uchar*          dst_data,
+    size_t          dst_step,
+    int             width,
+    int             height,
+    int             depth,
+    int             cn,
+    size_t          margin_left,
+    size_t          margin_top,
+    size_t          margin_right,
+    size_t          margin_bottom,
+    size_t          ksize,
+    int             border_type);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/// @brief Applies a perspective transformation to an image.
+///
+/// @param src_type         Source and destination image type
+/// @param src_data         Source image data
+/// @param src_step         Source image step
+/// @param src_width        Source image width
+/// @param src_height       Source image height
+/// @param dst_data         Destination image data
+/// @param dst_step         Destination image step
+/// @param dst_width        Destination image width
+/// @param dst_height       Destination image height
+/// @param M                3x3 matrix with transform coefficients
+/// @param interpolation    Interpolation mode (CV_HAL_INTER_NEAREST, ...)
+/// @param border_type      Border processing mode (CV_HAL_BORDER_REFLECT, ...)
+/// @param border_value     Values to use for CV_HAL_BORDER_CONSTANT mode
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+int fastcv_hal_warpPerspective(
+    int             src_type,
+    const uchar*    src_data,
+    size_t          src_step,
+    int             src_width,
+    int             src_height,
+    uchar*          dst_data,
+    size_t          dst_step,
+    int             dst_width,
+    int             dst_height,
+    const double    M[9],
+    int             interpolation,
+    int             border_type,
+    const double    border_value[4]);
+
 #endif
diff --git a/3rdparty/fastcv/include/fastcv_hal_utils.hpp b/3rdparty/fastcv/include/fastcv_hal_utils.hpp
@@ -29,7 +29,7 @@
             status == FASTCV_EHWGPU)                                        \
     {                                                                       \
         CV_LOG_DEBUG(NULL, "FastCV status:"<<getFastCVErrorString(status)   \
-            <<"Switching to default OpenCV solution!");                     \
+            <<", Switching to default OpenCV solution!");                   \
         return CV_HAL_ERROR_NOT_IMPLEMENTED;                                \
     }                                                                       \
     else                                                                    \
@@ -38,7 +38,7 @@
         return CV_HAL_ERROR_UNKNOWN;                                        \
     }                                                                       \
 }
- 
+
 #define CV_HAL_RETURN_NOT_IMPLEMENTED(reason)                           \
 {                                                                       \
     CV_LOG_DEBUG(NULL,"Switching to default OpenCV\nInfo: "<<reason);   \
@@ -47,6 +47,7 @@
 
 #define FCV_KernelSize_SHIFT 3
 #define FCV_MAKETYPE(ksize,depth) ((ksize<<FCV_KernelSize_SHIFT) + depth)
+#define FCV_CMP_EQ(val1,val2) (fabs(val1 - val2) < FLT_EPSILON)
 
 const char* getFastCVErrorString(int status);
 const char* borderToString(int border);
diff --git a/3rdparty/fastcv/src/fastcv_hal_core.cpp b/3rdparty/fastcv/src/fastcv_hal_core.cpp
@@ -38,15 +38,15 @@ class ParallelTableLookup : public cv::ParallelLoopBody
 };
 
 int fastcv_hal_lut(
-    const uchar*    src_data, 
-    size_t          src_step, 
-    size_t          src_type, 
-    const uchar*    lut_data, 
-    size_t          lut_channel_size, 
-    size_t          lut_channels, 
-    uchar*          dst_data, 
-    size_t          dst_step, 
-    int             width, 
+    const uchar*    src_data,
+    size_t          src_step,
+    size_t          src_type,
+    const uchar*    lut_data,
+    size_t          lut_channel_size,
+    size_t          lut_channels,
+    uchar*          dst_data,
+    size_t          dst_step,
+    int             width,
     int             height)
 {
     if((width*height)<=(320*240))
@@ -69,10 +69,10 @@ int fastcv_hal_lut(
 }
 
 int fastcv_hal_normHammingDiff8u(
-    const uchar*    a, 
-    const uchar*    b, 
-    int             n, 
-    int             cellSize, 
+    const uchar*    a,
+    const uchar*    b,
+    int             n,
+    int             cellSize,
     int*            result)
 {
     fcvStatus           status;
@@ -169,15 +169,15 @@ int fastcv_hal_transpose2d(
     switch (element_size)
     {
         case 1:
-            status = fcvTransposeu8_v2(src_data, src_width, src_height, src_step, 
+            status = fcvTransposeu8_v2(src_data, src_width, src_height, src_step,
                                        dst_data, dst_step);
             break;
         case 2:
-            status = fcvTransposeu16_v2((const uint16_t*)src_data, src_width, src_height, 
+            status = fcvTransposeu16_v2((const uint16_t*)src_data, src_width, src_height,
                                        src_step, (uint16_t*)dst_data, dst_step);
             break;
         case 4:
-            status = fcvTransposef32_v2((const float32_t*)src_data, src_width, src_height, 
+            status = fcvTransposef32_v2((const float32_t*)src_data, src_width, src_height,
                                        src_step, (float32_t*)dst_data, dst_step);
             break;
         default:
@@ -205,18 +205,18 @@ int fastcv_hal_meanStdDev(
     if(src_type != CV_8UC1)
     {
         CV_HAL_RETURN_NOT_IMPLEMENTED("src type not supported");
-    }  
+    }
     else if(mask != nullptr)
     {
         CV_HAL_RETURN_NOT_IMPLEMENTED("mask not supported");
-    }  
+    }
     else if(mean_val == nullptr && stddev_val == nullptr)
     {
         CV_HAL_RETURN_NOT_IMPLEMENTED("null ptr for mean and stddev");
     }
-       
+
     float32_t mean, variance;
-        
+
     fcvStatus status = fcvImageIntensityStats_v2(src_data, src_step, 0, 0, width, height,
                                    &mean, &variance, FASTCV_BIASED_VARIANCE_ESTIMATOR);
 
@@ -278,7 +278,7 @@ int fastcv_hal_flip(
         status = fcvFlipRGB888u8((uint8_t*)src_data, src_width, src_height, src_step, (uint8_t*)dst_data, dst_step, dir);
     else
         CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Data type:%d is not supported, Switching to default OpenCV solution!", src_type));
-    
+
     CV_HAL_RETURN(status, hal_flip);
 }
 
@@ -294,7 +294,7 @@ int fastcv_hal_rotate(
 {
     if((src_width*src_height)<(120*80))
         CV_HAL_RETURN_NOT_IMPLEMENTED("Switching to default OpenCV solution for lower resolution!");
-    
+
     fcvStatus           status;
     fcvRotateDegree     degree;
 
@@ -324,11 +324,63 @@ int fastcv_hal_rotate(
             status = fcvRotateImageu8(src_data, src_width, src_height, src_step, dst_data, dst_step, degree);
             break;
         case CV_8UC2:
-            status = fcvRotateImageInterleavedu8((uint8_t*)src_data, src_width, src_height, src_step, (uint8_t*)dst_data, 
+            status = fcvRotateImageInterleavedu8((uint8_t*)src_data, src_width, src_height, src_step, (uint8_t*)dst_data,
                                                     dst_step, degree);
             break;
         default:
             CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("src_type:%d is not supported", src_type));
     }
     CV_HAL_RETURN(status, hal_rotate);
+}
+
+int fastcv_hal_addWeighted8u(
+    const uchar*    src1_data,
+    size_t          src1_step,
+    const uchar*    src2_data,
+    size_t          src2_step,
+    uchar*          dst_data,
+    size_t          dst_step,
+    int             width,
+    int             height,
+    const double    scalars[3])
+{
+    if( (scalars[0] < -128.0f) || (scalars[0] >= 128.0f) ||
+        (scalars[1] < -128.0f) || (scalars[1] >= 128.0f) ||
+        (scalars[2] < -(1<<23))|| (scalars[2] >= 1<<23))
+        CV_HAL_RETURN_NOT_IMPLEMENTED(
+            cv::format("Alpha:%f,Beta:%f,Gamma:%f is not supported because it's too large or too small\n",
+            scalars[0],scalars[1],scalars[2]));
+
+    INITIALIZATION_CHECK;
+
+    fcvStatus status = FASTCV_SUCCESS;
+
+    if (height == 1)
+    {
+        src1_step = width*sizeof(uchar);
+        src2_step = width*sizeof(uchar);
+        dst_step  = width*sizeof(uchar);
+
+        cv::parallel_for_(cv::Range(0, width), [&](const cv::Range &range){
+            int rangeWidth = range.end - range.start;
+            const uint8_t *src1 = src1_data + range.start;
+            const uint8_t *src2 = src2_data + range.start;
+            uint8_t *dst = dst_data + range.start;
+            fcvAddWeightedu8_v2(src1, rangeWidth, height, src1_step, src2, src2_step,
+                scalars[0], scalars[1], scalars[2], dst, dst_step);
+            });
+    }
+    else
+    {
+        cv::parallel_for_(cv::Range(0, height), [&](const cv::Range &range){
+            int rangeHeight = range.end - range.start;
+            const uint8_t *src1 = src1_data + range.start * src1_step;
+            const uint8_t *src2 = src2_data + range.start * src2_step;
+            uint8_t *dst = dst_data + range.start * dst_step;
+            fcvAddWeightedu8_v2(src1, width, rangeHeight, src1_step, src2, src2_step,
+                scalars[0], scalars[1], scalars[2], dst, dst_step);
+            });
+    }
+
+    CV_HAL_RETURN(status, hal_addWeighted8u_v2);
 }
diff --git a/3rdparty/fastcv/src/fastcv_hal_imgproc.cpp b/3rdparty/fastcv/src/fastcv_hal_imgproc.cpp