Skip to content

Commit bf6d77f

Browse files
quic-xuezhashyama7004
authored andcommitted
Merge pull request opencv#26617 from CodeLinaro:xuezha_2ndPost
FastCV-based HAL for OpenCV acceleration 2ndpost-1 opencv#26617 ### Detailed description: - Add parallel support for cv_hal_sobel - Add cv_hal_gaussianBlurBinomial and parallel support. - Add cv_hal_addWeighted8u and parallel support - Add cv_hal_warpPerspective and parallel support Requires binary from [opencv/opencv_3rdparty#90](opencv/opencv_3rdparty#90) Related patch to opencv_contrib: [opencv/opencv_contrib#3844](opencv/opencv_contrib#3844) ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
1 parent 03a173d commit bf6d77f

File tree

6 files changed

+648
-73
lines changed

6 files changed

+648
-73
lines changed

3rdparty/fastcv/fastcv.cmake

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
function(download_fastcv root_dir)
22

33
# Commit SHA in the opencv_3rdparty repo
4-
set(FASTCV_COMMIT "b8f0d48fa9dbebb0237d3e0abd206f9930c89db6")
4+
set(FASTCV_COMMIT "dc5d58018f3af915a8d209386d2c58c0501c0f2c")
55

66
# Define actual FastCV versions
77
if(ANDROID)
88
if(AARCH64)
99
message(STATUS "Download FastCV for Android aarch64")
10-
set(FCV_PACKAGE_NAME "fastcv_android_aarch64_2024_10_24.tgz")
11-
set(FCV_PACKAGE_HASH "14486af00dc0282dac591dc9ccdd957e")
10+
set(FCV_PACKAGE_NAME "fastcv_android_aarch64_2024_12_11.tgz")
11+
set(FCV_PACKAGE_HASH "9dac41e86597305f846212dae31a4a88")
1212
else()
1313
message(STATUS "Download FastCV for Android armv7")
14-
set(FCV_PACKAGE_NAME "fastcv_android_arm32_2024_10_24.tgz")
15-
set(FCV_PACKAGE_HASH "b5afadd5a5b55f8f6c2e7361f225fa21")
14+
set(FCV_PACKAGE_NAME "fastcv_android_arm32_2024_12_11.tgz")
15+
set(FCV_PACKAGE_HASH "fe2d30334180b17e3031eee92aac43b6")
1616
endif()
1717
elseif(UNIX AND NOT APPLE AND NOT IOS AND NOT XROS)
1818
if(AARCH64)
19-
set(FCV_PACKAGE_NAME "fastcv_linux_aarch64_2024_10_24.tgz")
20-
set(FCV_PACKAGE_HASH "d15c7b77f2d3577ba46bd94e6cf15230")
19+
set(FCV_PACKAGE_NAME "fastcv_linux_aarch64_2024_12_11.tgz")
20+
set(FCV_PACKAGE_HASH "7b33ad833e6f15ab6d4ec64fa3c17acd")
2121
else()
2222
message("FastCV: fastcv lib for 32-bit Linux is not supported for now!")
2323
endif()

3rdparty/fastcv/include/fastcv_hal_core.hpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
#define cv_hal_flip fastcv_hal_flip
2525
#undef cv_hal_rotate90
2626
#define cv_hal_rotate90 fastcv_hal_rotate
27+
#undef cv_hal_addWeighted8u
28+
#define cv_hal_addWeighted8u fastcv_hal_addWeighted8u
2729

2830
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2931
/// @brief look-up table transform of an array.
@@ -152,4 +154,27 @@ int fastcv_hal_rotate(
152154
size_t dst_step,
153155
int angle);
154156

157+
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
158+
/// @brief weighted sum of two arrays using formula: dst[i] = a * src1[i] + b * src2[i]
159+
/// @param src1_data first source image data
160+
/// @param src1_step first source image step
161+
/// @param src2_data second source image data
162+
/// @param src2_step second source image step
163+
/// @param dst_data destination image data
164+
/// @param dst_step destination image step
165+
/// @param width width of the images
166+
/// @param height height of the images
167+
/// @param scalars numbers a, b, and c
168+
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
169+
int fastcv_hal_addWeighted8u(
170+
const uchar* src1_data,
171+
size_t src1_step,
172+
const uchar* src2_data,
173+
size_t src2_step,
174+
uchar* dst_data,
175+
size_t dst_step,
176+
int width,
177+
int height,
178+
const double scalars[3]);
179+
155180
#endif

3rdparty/fastcv/include/fastcv_hal_imgproc.hpp

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,14 @@
1212
#define cv_hal_medianBlur fastcv_hal_medianBlur
1313
#undef cv_hal_sobel
1414
#define cv_hal_sobel fastcv_hal_sobel
15-
#undef cv_hal_boxFilter
15+
#undef cv_hal_boxFilter
1616
#define cv_hal_boxFilter fastcv_hal_boxFilter
17-
#undef cv_hal_adaptiveThreshold
17+
#undef cv_hal_adaptiveThreshold
1818
#define cv_hal_adaptiveThreshold fastcv_hal_adaptiveThreshold
19+
#undef cv_hal_gaussianBlurBinomial
20+
#define cv_hal_gaussianBlurBinomial fastcv_hal_gaussianBlurBinomial
21+
#undef cv_hal_warpPerspective
22+
#define cv_hal_warpPerspective fastcv_hal_warpPerspective
1923

2024
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2125
/// @brief Calculate medianBlur filter
@@ -148,4 +152,69 @@ int fastcv_hal_adaptiveThreshold(
148152
int blockSize,
149153
double C);
150154

155+
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
156+
/// @brief Blurs an image using a Gaussian filter.
157+
/// @param src_data Source image data
158+
/// @param src_step Source image step
159+
/// @param dst_data Destination image data
160+
/// @param dst_step Destination image step
161+
/// @param width Source image width
162+
/// @param height Source image height
163+
/// @param depth Depth of source and destination image
164+
/// @param cn Number of channels
165+
/// @param margin_left Left margins for source image
166+
/// @param margin_top Top margins for source image
167+
/// @param margin_right Right margins for source image
168+
/// @param margin_bottom Bottom margins for source image
169+
/// @param ksize Kernel size
170+
/// @param border_type Border type
171+
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
172+
int fastcv_hal_gaussianBlurBinomial(
173+
const uchar* src_data,
174+
size_t src_step,
175+
uchar* dst_data,
176+
size_t dst_step,
177+
int width,
178+
int height,
179+
int depth,
180+
int cn,
181+
size_t margin_left,
182+
size_t margin_top,
183+
size_t margin_right,
184+
size_t margin_bottom,
185+
size_t ksize,
186+
int border_type);
187+
188+
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
189+
/// @brief Applies a perspective transformation to an image.
190+
///
191+
/// @param src_type Source and destination image type
192+
/// @param src_data Source image data
193+
/// @param src_step Source image step
194+
/// @param src_width Source image width
195+
/// @param src_height Source image height
196+
/// @param dst_data Destination image data
197+
/// @param dst_step Destination image step
198+
/// @param dst_width Destination image width
199+
/// @param dst_height Destination image height
200+
/// @param M 3x3 matrix with transform coefficients
201+
/// @param interpolation Interpolation mode (CV_HAL_INTER_NEAREST, ...)
202+
/// @param border_type Border processing mode (CV_HAL_BORDER_REFLECT, ...)
203+
/// @param border_value Values to use for CV_HAL_BORDER_CONSTANT mode
204+
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
205+
int fastcv_hal_warpPerspective(
206+
int src_type,
207+
const uchar* src_data,
208+
size_t src_step,
209+
int src_width,
210+
int src_height,
211+
uchar* dst_data,
212+
size_t dst_step,
213+
int dst_width,
214+
int dst_height,
215+
const double M[9],
216+
int interpolation,
217+
int border_type,
218+
const double border_value[4]);
219+
151220
#endif

3rdparty/fastcv/include/fastcv_hal_utils.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
status == FASTCV_EHWGPU) \
3030
{ \
3131
CV_LOG_DEBUG(NULL, "FastCV status:"<<getFastCVErrorString(status) \
32-
<<"Switching to default OpenCV solution!"); \
32+
<<", Switching to default OpenCV solution!"); \
3333
return CV_HAL_ERROR_NOT_IMPLEMENTED; \
3434
} \
3535
else \
@@ -38,7 +38,7 @@
3838
return CV_HAL_ERROR_UNKNOWN; \
3939
} \
4040
}
41-
41+
4242
#define CV_HAL_RETURN_NOT_IMPLEMENTED(reason) \
4343
{ \
4444
CV_LOG_DEBUG(NULL,"Switching to default OpenCV\nInfo: "<<reason); \
@@ -47,6 +47,7 @@
4747

4848
#define FCV_KernelSize_SHIFT 3
4949
#define FCV_MAKETYPE(ksize,depth) ((ksize<<FCV_KernelSize_SHIFT) + depth)
50+
#define FCV_CMP_EQ(val1,val2) (fabs(val1 - val2) < FLT_EPSILON)
5051

5152
const char* getFastCVErrorString(int status);
5253
const char* borderToString(int border);

3rdparty/fastcv/src/fastcv_hal_core.cpp

Lines changed: 75 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,15 @@ class ParallelTableLookup : public cv::ParallelLoopBody
3838
};
3939

4040
int fastcv_hal_lut(
41-
const uchar* src_data,
42-
size_t src_step,
43-
size_t src_type,
44-
const uchar* lut_data,
45-
size_t lut_channel_size,
46-
size_t lut_channels,
47-
uchar* dst_data,
48-
size_t dst_step,
49-
int width,
41+
const uchar* src_data,
42+
size_t src_step,
43+
size_t src_type,
44+
const uchar* lut_data,
45+
size_t lut_channel_size,
46+
size_t lut_channels,
47+
uchar* dst_data,
48+
size_t dst_step,
49+
int width,
5050
int height)
5151
{
5252
if((width*height)<=(320*240))
@@ -69,10 +69,10 @@ int fastcv_hal_lut(
6969
}
7070

7171
int fastcv_hal_normHammingDiff8u(
72-
const uchar* a,
73-
const uchar* b,
74-
int n,
75-
int cellSize,
72+
const uchar* a,
73+
const uchar* b,
74+
int n,
75+
int cellSize,
7676
int* result)
7777
{
7878
fcvStatus status;
@@ -169,15 +169,15 @@ int fastcv_hal_transpose2d(
169169
switch (element_size)
170170
{
171171
case 1:
172-
status = fcvTransposeu8_v2(src_data, src_width, src_height, src_step,
172+
status = fcvTransposeu8_v2(src_data, src_width, src_height, src_step,
173173
dst_data, dst_step);
174174
break;
175175
case 2:
176-
status = fcvTransposeu16_v2((const uint16_t*)src_data, src_width, src_height,
176+
status = fcvTransposeu16_v2((const uint16_t*)src_data, src_width, src_height,
177177
src_step, (uint16_t*)dst_data, dst_step);
178178
break;
179179
case 4:
180-
status = fcvTransposef32_v2((const float32_t*)src_data, src_width, src_height,
180+
status = fcvTransposef32_v2((const float32_t*)src_data, src_width, src_height,
181181
src_step, (float32_t*)dst_data, dst_step);
182182
break;
183183
default:
@@ -205,18 +205,18 @@ int fastcv_hal_meanStdDev(
205205
if(src_type != CV_8UC1)
206206
{
207207
CV_HAL_RETURN_NOT_IMPLEMENTED("src type not supported");
208-
}
208+
}
209209
else if(mask != nullptr)
210210
{
211211
CV_HAL_RETURN_NOT_IMPLEMENTED("mask not supported");
212-
}
212+
}
213213
else if(mean_val == nullptr && stddev_val == nullptr)
214214
{
215215
CV_HAL_RETURN_NOT_IMPLEMENTED("null ptr for mean and stddev");
216216
}
217-
217+
218218
float32_t mean, variance;
219-
219+
220220
fcvStatus status = fcvImageIntensityStats_v2(src_data, src_step, 0, 0, width, height,
221221
&mean, &variance, FASTCV_BIASED_VARIANCE_ESTIMATOR);
222222

@@ -278,7 +278,7 @@ int fastcv_hal_flip(
278278
status = fcvFlipRGB888u8((uint8_t*)src_data, src_width, src_height, src_step, (uint8_t*)dst_data, dst_step, dir);
279279
else
280280
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Data type:%d is not supported, Switching to default OpenCV solution!", src_type));
281-
281+
282282
CV_HAL_RETURN(status, hal_flip);
283283
}
284284

@@ -294,7 +294,7 @@ int fastcv_hal_rotate(
294294
{
295295
if((src_width*src_height)<(120*80))
296296
CV_HAL_RETURN_NOT_IMPLEMENTED("Switching to default OpenCV solution for lower resolution!");
297-
297+
298298
fcvStatus status;
299299
fcvRotateDegree degree;
300300

@@ -324,11 +324,63 @@ int fastcv_hal_rotate(
324324
status = fcvRotateImageu8(src_data, src_width, src_height, src_step, dst_data, dst_step, degree);
325325
break;
326326
case CV_8UC2:
327-
status = fcvRotateImageInterleavedu8((uint8_t*)src_data, src_width, src_height, src_step, (uint8_t*)dst_data,
327+
status = fcvRotateImageInterleavedu8((uint8_t*)src_data, src_width, src_height, src_step, (uint8_t*)dst_data,
328328
dst_step, degree);
329329
break;
330330
default:
331331
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("src_type:%d is not supported", src_type));
332332
}
333333
CV_HAL_RETURN(status, hal_rotate);
334+
}
335+
336+
int fastcv_hal_addWeighted8u(
337+
const uchar* src1_data,
338+
size_t src1_step,
339+
const uchar* src2_data,
340+
size_t src2_step,
341+
uchar* dst_data,
342+
size_t dst_step,
343+
int width,
344+
int height,
345+
const double scalars[3])
346+
{
347+
if( (scalars[0] < -128.0f) || (scalars[0] >= 128.0f) ||
348+
(scalars[1] < -128.0f) || (scalars[1] >= 128.0f) ||
349+
(scalars[2] < -(1<<23))|| (scalars[2] >= 1<<23))
350+
CV_HAL_RETURN_NOT_IMPLEMENTED(
351+
cv::format("Alpha:%f,Beta:%f,Gamma:%f is not supported because it's too large or too small\n",
352+
scalars[0],scalars[1],scalars[2]));
353+
354+
INITIALIZATION_CHECK;
355+
356+
fcvStatus status = FASTCV_SUCCESS;
357+
358+
if (height == 1)
359+
{
360+
src1_step = width*sizeof(uchar);
361+
src2_step = width*sizeof(uchar);
362+
dst_step = width*sizeof(uchar);
363+
364+
cv::parallel_for_(cv::Range(0, width), [&](const cv::Range &range){
365+
int rangeWidth = range.end - range.start;
366+
const uint8_t *src1 = src1_data + range.start;
367+
const uint8_t *src2 = src2_data + range.start;
368+
uint8_t *dst = dst_data + range.start;
369+
fcvAddWeightedu8_v2(src1, rangeWidth, height, src1_step, src2, src2_step,
370+
scalars[0], scalars[1], scalars[2], dst, dst_step);
371+
});
372+
}
373+
else
374+
{
375+
cv::parallel_for_(cv::Range(0, height), [&](const cv::Range &range){
376+
int rangeHeight = range.end - range.start;
377+
const uint8_t *src1 = src1_data + range.start * src1_step;
378+
const uint8_t *src2 = src2_data + range.start * src2_step;
379+
uint8_t *dst = dst_data + range.start * dst_step;
380+
fcvAddWeightedu8_v2(src1, width, rangeHeight, src1_step, src2, src2_step,
381+
scalars[0], scalars[1], scalars[2], dst, dst_step);
382+
});
383+
}
384+
385+
CV_HAL_RETURN(status, hal_addWeighted8u_v2);
334386
}

0 commit comments

Comments
 (0)