diff --git a/modules/cannops/CMakeLists.txt b/modules/cannops/CMakeLists.txt
index 0c16c5eb143..557fbe7f492 100644
--- a/modules/cannops/CMakeLists.txt
+++ b/modules/cannops/CMakeLists.txt
@@ -15,3 +15,9 @@ ocv_include_directories(${CMAKE_SOURCE_DIR}/modules/ts/include)
 ocv_add_accuracy_tests(DEPENDS_ON opencv_cannops)
 ocv_add_perf_tests(DEPENDS_ON opencv_cannops)
 ocv_add_samples(opencv_cannops)
+
+# compile ascnedc kernels.
+add_subdirectory(ascendc_kernels)
+ocv_include_directories(${CMAKE_BINARY_DIR}/include/ascendc_kernels)
+ocv_target_link_libraries(opencv_cannops PRIVATE ascendc_kernels)
+ocv_target_link_libraries(opencv_test_cannops PRIVATE ascendc_kernels)
diff --git a/modules/cannops/ascendc_kernels/CMakeLists.txt b/modules/cannops/ascendc_kernels/CMakeLists.txt
new file mode 100644
index 00000000000..c4198e8b8e6
--- /dev/null
+++ b/modules/cannops/ascendc_kernels/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(SOC_VERSION "ascend310p3" CACHE STRING "system on chip type")
+set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "ASCEND CANN package installation directory")
+set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim/cpu")
+
+if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
+else()
+    message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the compiler package is installed.")
+endif()
+
+include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
+
+ascendc_library(ascendc_kernels STATIC
+    threshold_opencv_kernel.cpp
+)
diff --git a/modules/cannops/ascendc_kernels/kernel_tiling_types.h b/modules/cannops/ascendc_kernels/kernel_tiling_types.h
new file mode 100644
index 00000000000..3fbbdd06a63
--- /dev/null
+++ b/modules/cannops/ascendc_kernels/kernel_tiling_types.h
@@ -0,0 +1,22 @@
+#ifndef KERNEL_TILING_H
+#define KERNEL_TILING_H
+
+/*
+ * threshType:
+ *   THRESH_BINARY     = 0,
+ *   THRESH_BINARY_INV = 1,
+ *   THRESH_TRUNC      = 2,
+ *   THRESH_TOZERO     = 3,
+ *   THRESH_TOZERO_INV = 4,
+*/
+#pragma pack(push, 8)
+struct ThresholdOpencvTilingData
+{
+    float maxVal;
+    float thresh;
+    uint32_t totalLength;
+    uint8_t threshType;
+    uint8_t dtype;
+};
+#pragma pack(pop)
+#endif // KERNEL_TILING_H
diff --git a/modules/cannops/ascendc_kernels/threshold_opencv_kernel.cpp b/modules/cannops/ascendc_kernels/threshold_opencv_kernel.cpp
new file mode 100644
index 00000000000..7fa1867c8b1
--- /dev/null
+++ b/modules/cannops/ascendc_kernels/threshold_opencv_kernel.cpp
@@ -0,0 +1,379 @@
+#include "kernel_operator.h"
+#include "vector_tiling.h"
+#include "kernel_tiling_types.h"
+
+using namespace AscendC;
+
+// Make compiler happy. These two function will never be called.
+__aicore__ static inline void Cast(const LocalTensor<half>& dstLocal,
+                                   const LocalTensor<half>& srcLocal, const RoundMode& round_mode,
+                                   const uint32_t calCount){};
+__aicore__ static inline void Cast(const LocalTensor<float>& dstLocal,
+                                   const LocalTensor<float>& srcLocal, const RoundMode& round_mode,
+                                   const uint32_t calCount){};
+
+/**
+ * T: input data type.
+ * C: data type for calculate.
+ * if T != C, data should cast from T to C.
+ */
+template <typename T, typename C>
+class KernelThreshold
+{
+public:
+    __aicore__ inline KernelThreshold() {}
+    __aicore__ inline void Init(ThresholdOpencvTilingData* tiling, GM_ADDR x, GM_ADDR y)
+    {
+        tilingData = tiling;
+
+        /**
+         * Calculate memory use per element.
+         * 1. InputQueue: sizeof(T) * BUFFER_NUM
+         * 2. OutputQueue: sizeof(T) * BUFFER_NUM
+         * 3. maskBuffer: 1 byte at most.
+         */
+        uint64_t bytesPerElem = sizeof(T) * BUFFER_NUM * 2 + sizeof(uint8_t) * 1;
+
+        /**
+         * If need cast, should init two more cast buffers.
+         * Memory use per element:
+         * 1. InputCastBuffer: sizeof(C)
+         * 2. OutputCastBuffer: sizeof(C)
+         */
+        if (!std::is_same<T, C>::value)
+        {
+            bytesPerElem += sizeof(C) * 2;
+        }
+
+        // Most of AscendC APIs need align to 32 Bytes, but Compare and Select need
+        // align to 256 Bytes, 256/sizeof(C) means how many element can be process
+        // in one loop.
+        vecTiling.calculate(tilingData->totalLength, GetBlockNum(), GetBlockIdx(), bytesPerElem,
+                            256 / sizeof(C));
+
+        xGM.SetGlobalBuffer((__gm__ T*)x + vecTiling.blockOffset, vecTiling.blockLength);
+        yGM.SetGlobalBuffer((__gm__ T*)y + vecTiling.blockOffset, vecTiling.blockLength);
+
+        // Cast buffer.
+        if (!std::is_same<T, C>::value)
+        {
+            pipe.InitBuffer(InputCastBuffer, vecTiling.loopLength * sizeof(C));
+            pipe.InitBuffer(outputCastBuffer, vecTiling.loopLength * sizeof(C));
+        }
+
+        pipe.InitBuffer(inputQueue, BUFFER_NUM, vecTiling.loopLength * sizeof(T));
+        pipe.InitBuffer(outputQueue, BUFFER_NUM, vecTiling.loopLength * sizeof(T));
+        pipe.InitBuffer(maskBuffer, vecTiling.loopLength * sizeof(uint8_t));
+    }
+
+    __aicore__ inline void Run()
+    {
+        for (uint32_t loop = 0; loop < vecTiling.loopCount; loop++)
+        {
+            uint32_t offset = loop * vecTiling.loopLength;
+            Compute(offset, vecTiling.loopLength);
+        }
+
+        if (vecTiling.loopTailLength != 0)
+        {
+            uint32_t offset = vecTiling.loopCount * vecTiling.loopLength;
+            Compute(offset, vecTiling.loopTailLength);
+        }
+    }
+
+private:
+    __aicore__ inline void Compute(uint32_t offset, uint32_t len)
+    {
+        CopyIn(offset, len);
+
+        // Get local Tensor, if case is need, local tensors come from
+        // cast buffer. otherwise, local tensors come from input/output queue.
+        LocalTensor<C> xLocal = CastInput(inputQueue, InputCastBuffer, len);
+        LocalTensor<C> yLocal = GetOutput(outputQueue, outputCastBuffer);
+
+        Threshold(xLocal, yLocal, len);
+
+        // Free local input tensor if tensor is not from cast buffer.
+        FreeInput(inputQueue, xLocal);
+        // Cast output tensor to output queue if output tensor is from cast buffer.
+        CastOutput(outputQueue, yLocal, len);
+
+        CopyOut(offset, len);
+    }
+
+    /**
+     * If need cast:
+     * 1. Get data from input queue, this data can't be calculate directly.
+     * 2. Get buffer with type C, which satisfied AscendC APIs.
+     * 3. Cast data from T to C.
+     *
+     * If not need cast:
+     * 1. Only need get data from queue.
+     */
+    __aicore__ inline LocalTensor<C> CastInput(TQue<QuePosition::VECIN, BUFFER_NUM>& queue,
+                                               TBuf<TPosition::VECCALC>& buffer, uint32_t len)
+    {
+        LocalTensor<C> xLocal;
+        if (std::is_same<T, C>::value)
+        {
+            xLocal = queue.DeQue<C>();
+        }
+        else
+        {
+            xLocal = buffer.Get<C>();
+            LocalTensor<T> xCast = queue.DeQue<T>();
+            Cast(xLocal, xCast, RoundMode::CAST_NONE, len);
+            queue.FreeTensor(xCast);
+        }
+        return xLocal;
+    }
+
+    /**
+     * If need cast:
+     * 1. Get local tensor from cast buffer.
+     *
+     * If not need cast:
+     * 1. Alloc local tensor from output queue.
+     */
+    __aicore__ inline LocalTensor<C> GetOutput(TQue<QuePosition::VECOUT, BUFFER_NUM>& queue,
+                                               TBuf<TPosition::VECCALC>& buffer)
+    {
+        if (std::is_same<T, C>::value)
+        {
+            return queue.AllocTensor<C>();
+        }
+        else
+        {
+            return buffer.Get<C>();
+        }
+    }
+
+    /**
+     * If need cast:
+     * 1. Input local tensor are get from cast buffer, which do not need free.
+     *
+     * If not need cast:
+     * 1. Input local tensor are alloced from input queue, which need free.
+     */
+    __aicore__ inline void FreeInput(TQue<QuePosition::VECIN, BUFFER_NUM>& queue,
+                                     LocalTensor<C>& xLocal)
+    {
+        if (std::is_same<T, C>::value)
+        {
+            queue.FreeTensor(xLocal);
+        }
+    }
+
+    /**
+     * If need cast:
+     * 1. Alloc local tensor from output queue.
+     * 2. Cast from C to T.
+     * 3. Put casted local tensor in queue.
+     *
+     * If not need cast:
+     * 1. Only put local tensor in queue.
+     *
+     */
+    __aicore__ inline void CastOutput(TQue<QuePosition::VECOUT, BUFFER_NUM>& queue,
+                                      LocalTensor<C>& yLocal, uint32_t len)
+    {
+        if (std::is_same<T, C>::value)
+        {
+            queue.EnQue(yLocal);
+        }
+        else
+        {
+            LocalTensor<T> yCast = queue.AllocTensor<T>();
+            RoundMode roundMode = RoundMode::CAST_NONE;
+            // Ref to AscendC cast API.
+            if (std::is_same<T, int16_t>::value)
+            {
+                roundMode = RoundMode::CAST_RINT;
+            }
+            else if (std::is_same<T, int32_t>::value)
+            {
+                roundMode = RoundMode::CAST_ROUND;
+            }
+            Cast(yCast, yLocal, roundMode, len);
+            queue.EnQue(yCast);
+        }
+    }
+
+    __aicore__ inline void CopyIn(uint32_t offset, uint32_t len)
+    {
+        LocalTensor<T> xLocal = inputQueue.AllocTensor<T>();
+        DataCopy(xLocal, xGM[offset], len);
+        inputQueue.EnQue(xLocal);
+    }
+
+    __aicore__ inline void CopyOut(uint32_t offset, uint32_t len)
+    {
+        LocalTensor<T> yLocal = outputQueue.DeQue<T>();
+        DataCopy(yGM[offset], yLocal, len);
+        outputQueue.FreeTensor(yLocal);
+    }
+
+    /**
+     * AscendC API Compare Warpper.
+     * AscendC Compare level2 API need input length align to 256, process
+     * tail data by level0 API.
+     */
+    __aicore__ inline void CompareWrap(const LocalTensor<uint8_t>& dstLocal,
+                                       const LocalTensor<C>& src0Local,
+                                       const LocalTensor<C>& src1Local, CMPMODE cmpMode,
+                                       uint32_t calCount)
+    {
+        // Elements total count for on loop inside Compare.
+        uint32_t batchCount = 256 / sizeof(C);
+
+        // Tail elements count.
+        uint32_t tailCount = calCount % batchCount;
+
+        // Level2 API, calCount should align to 256.
+        Compare(dstLocal, src0Local, src1Local, cmpMode, calCount - tailCount);
+
+        // Data blocks are already cut align to 256, tail count will be 0 for
+        // all process loops except last one.
+        if (tailCount != 0)
+        {
+            BinaryRepeatParams repeatParams = {1, 1, 1, 8, 8, 8};
+            uint32_t tailIdx = calCount - tailCount;
+            uint32_t maskIdx = tailIdx / sizeof(uint8_t);
+            Compare(dstLocal[maskIdx], src0Local[tailIdx], src1Local[tailIdx], cmpMode, tailCount,
+                    1, repeatParams);
+        }
+    }
+
+    /**
+     * AscendC API Select Warpper.
+     * AscendC Select level2 API need input length align to 256, process
+     * tail data by level0 API.
+     */
+    __aicore__ inline void SelectWrap(const LocalTensor<C>& dstLocal,
+                                      const LocalTensor<uint8_t>& selMask,
+                                      const LocalTensor<C>& src0Local, C src1Local, SELMODE selMode,
+                                      uint32_t calCount)
+    {
+        uint32_t batchCount = 256 / sizeof(C);
+        uint32_t tailCount = calCount % batchCount;
+
+        Select(dstLocal, selMask, src0Local, src1Local, selMode, calCount - tailCount);
+        if (tailCount != 0)
+        {
+            BinaryRepeatParams repeatParams = {1, 1, 1, 8, 8, 8};
+            uint32_t tailIdx = calCount - tailCount;
+            uint32_t maskIdx = tailIdx / sizeof(uint8_t);
+            Select(dstLocal[tailIdx], selMask[maskIdx], src0Local[tailIdx], src1Local, selMode,
+                   tailCount, 1, repeatParams);
+        }
+    }
+
+    __aicore__ inline void Threshold(LocalTensor<C>& xLocal, LocalTensor<C>& yLocal, uint32_t len)
+    {
+        LocalTensor<uint8_t> mask = maskBuffer.Get<uint8_t>();
+        Duplicate(yLocal, static_cast<C>(tilingData->thresh), len);
+        switch (tilingData->threshType)
+        {
+            case 0:
+                CompareWrap(mask, xLocal, yLocal, CMPMODE::LE, len);
+                Duplicate(yLocal, static_cast<C>(0), len);
+                SelectWrap(yLocal, mask, yLocal, static_cast<C>(tilingData->maxVal),
+                           SELMODE::VSEL_TENSOR_SCALAR_MODE, len);
+                break;
+            case 1:
+                CompareWrap(mask, xLocal, yLocal, CMPMODE::GT, len);
+                Duplicate(yLocal, static_cast<C>(0), len);
+                SelectWrap(yLocal, mask, yLocal, static_cast<C>(tilingData->maxVal),
+                           SELMODE::VSEL_TENSOR_SCALAR_MODE, len);
+                break;
+            case 2:
+                CompareWrap(mask, xLocal, yLocal, CMPMODE::LE, len);
+                SelectWrap(yLocal, mask, xLocal, static_cast<C>(tilingData->thresh),
+                           SELMODE::VSEL_TENSOR_SCALAR_MODE, len);
+                break;
+            case 3:
+                CompareWrap(mask, xLocal, yLocal, CMPMODE::GT, len);
+                SelectWrap(yLocal, mask, xLocal, static_cast<C>(0),
+                           SELMODE::VSEL_TENSOR_SCALAR_MODE, len);
+                break;
+            case 4:
+                CompareWrap(mask, xLocal, yLocal, CMPMODE::LE, len);
+                SelectWrap(yLocal, mask, xLocal, static_cast<C>(0),
+                           SELMODE::VSEL_TENSOR_SCALAR_MODE, len);
+                break;
+            default:
+                break;
+        }
+    }
+
+    TPipe pipe;
+    TQue<QuePosition::VECIN, BUFFER_NUM> inputQueue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> outputQueue;
+    TBuf<TPosition::VECCALC> InputCastBuffer, outputCastBuffer, maskBuffer;
+
+    GlobalTensor<T> xGM, yGM;
+    VectorTiling vecTiling;
+    ThresholdOpencvTilingData* tilingData;
+};
+
+#define LAUNCH_THRESHOLD_KERNEL(NAME, T, C)                                                      \
+    __aicore__ inline void launch_threshold_kernel_##NAME(ThresholdOpencvTilingData* tilingData, \
+                                                          GM_ADDR x, GM_ADDR y)                  \
+    {                                                                                            \
+        KernelThreshold<T, C> op;                                                                \
+        op.Init(tilingData, x, y);                                                               \
+        op.Run();                                                                                \
+    }
+
+LAUNCH_THRESHOLD_KERNEL(CV_8U, uint8_t, half)   // CV_8U
+LAUNCH_THRESHOLD_KERNEL(CV_8S, int8_t, half)    // CV_8S
+                                                // CV_16U
+LAUNCH_THRESHOLD_KERNEL(CV_16S, int16_t, half)  // CV_16S
+LAUNCH_THRESHOLD_KERNEL(CV_32S, int32_t, float) // CV_32S
+LAUNCH_THRESHOLD_KERNEL(CV_32F, float, float)   // CV_32F
+                                                // CV_64F
+LAUNCH_THRESHOLD_KERNEL(CV_16F, half, half)     // CV_16F
+
+#undef LAUNCH_THRESHOLD_KERNEL
+
+#define CALL_THRESHOLD_KERNEL(NAME) launch_threshold_kernel_##NAME
+
+extern "C" __global__ __aicore__ void threshold_opencv(GM_ADDR tilingGM, GM_ADDR x, GM_ADDR y)
+{
+    ThresholdOpencvTilingData tilingData;
+    auto tempTilingGM = (__gm__ uint8_t*)tilingGM;
+    auto tempTiling = (uint8_t*)&tilingData;
+    for (int32_t i = 0; i < sizeof(ThresholdOpencvTilingData) / sizeof(uint8_t);
+         ++i, ++tempTilingGM, ++tempTiling)
+    {
+        *tempTiling = *tempTilingGM;
+    }
+
+    // AscendC can only call inline functions, function pointer can't be used here.
+    // Use Macro and switch case instead.
+    switch (tilingData.dtype)
+    {
+        case 0:
+            CALL_THRESHOLD_KERNEL(CV_8U)(&tilingData, x, y);
+            break;
+        case 1:
+            CALL_THRESHOLD_KERNEL(CV_8S)(&tilingData, x, y);
+            break;
+        case 3:
+            CALL_THRESHOLD_KERNEL(CV_16S)(&tilingData, x, y);
+            break;
+        case 4:
+            CALL_THRESHOLD_KERNEL(CV_32S)(&tilingData, x, y);
+            break;
+        case 5:
+            CALL_THRESHOLD_KERNEL(CV_32F)(&tilingData, x, y);
+            break;
+        case 7:
+            CALL_THRESHOLD_KERNEL(CV_16F)(&tilingData, x, y);
+            break;
+        case 2: case 6: default: // CV_16U, CV_64F
+            break;
+    }
+    // Clear tiling GM cache manually. (cce compiler bug)
+    dcci(tilingGM, 1);
+}
diff --git a/modules/cannops/ascendc_kernels/vector_tiling.h b/modules/cannops/ascendc_kernels/vector_tiling.h
new file mode 100644
index 00000000000..e00dd423c38
--- /dev/null
+++ b/modules/cannops/ascendc_kernels/vector_tiling.h
@@ -0,0 +1,77 @@
+#ifndef TILING_KERNEL_H
+#define TILING_KERNEL_H
+
+#ifdef __CCE_KT_TEST__
+#define __aicore__
+#else
+#define __aicore__ [aicore]
+#endif
+
+inline __aicore__ int32_t AlignNCeil(int32_t n, int32_t align) { return ((n + align) & ~(align-1)); }
+
+inline __aicore__ int32_t AlignNFloor(int32_t n, int32_t align) { return (n & ~(align-1)); }
+
+constexpr int32_t BUFFER_NUM = 2;
+constexpr int32_t UB_BUF_LEN = 248 * 1024;
+
+struct VectorTiling {
+  __aicore__ inline void calculate(uint64_t _totalLength, uint64_t _blockNum,
+                                   uint64_t _blockIdx, uint64_t _variableBytesPerElem, uint32_t _align) {
+    totalLength = _totalLength;
+    blockNum = _blockNum;
+    blockIdx = _blockIdx;
+    variableBytesPerElem = _variableBytesPerElem;
+    blockLength = 0;
+    blockOffset = 0;
+    align = _align;
+    GetBlockLengthAndOffset();
+    GetLoopLengthAndCount();
+#ifdef __CCE_KT_TEST__
+    std::cout << "Block(" << blockIdx << "): BlockLength = " << blockLength
+              << ", BlockOffset = " << blockOffset
+              << ", LoopLength = " << loopLength
+              << ", LoopCount = " << loopCount
+              << ", LoopTailLength = " << loopTailLength << std::endl;
+#endif
+  }
+
+  __aicore__ inline void GetBlockLengthAndOffset() {
+    // Data should Align by 32B.
+    uint32_t fullBlockLength = AlignNCeil(totalLength / blockNum, 32);
+    // Some core may get no data after Align32 Ceil.
+    uint32_t fullBlockNum = totalLength / fullBlockLength;
+    uint32_t blockTailLength = totalLength % fullBlockLength;
+
+    if (blockIdx < fullBlockNum) {
+      blockLength = fullBlockLength;
+      blockOffset = blockIdx * blockLength;
+      // Last block must less than full block num.
+    } else if (blockTailLength != 0 && blockIdx == fullBlockNum) {
+      blockLength = blockTailLength;
+      blockOffset = blockIdx * fullBlockLength;
+    }
+  }
+
+  /**
+   * @brief Get length for one loop and loop count.
+   * Use as much UB buf as possible.
+   */
+  __aicore__ inline void GetLoopLengthAndCount() {
+    loopLength = AlignNFloor(UB_BUF_LEN / variableBytesPerElem, align);
+    loopCount = blockLength / loopLength;
+    loopTailLength = blockLength - (loopLength * loopCount);
+  }
+
+  uint64_t totalLength;
+  uint64_t blockNum;
+  uint64_t blockIdx;
+  uint64_t variableBytesPerElem;
+  uint32_t blockLength;
+  uint32_t blockOffset;
+  uint32_t loopLength;
+  uint32_t loopCount;
+  uint32_t loopTailLength;
+  uint32_t align;
+};
+
+#endif  // TILING_KERNEL_H
diff --git a/modules/cannops/include/opencv2/ascendc_kernels.hpp b/modules/cannops/include/opencv2/ascendc_kernels.hpp
new file mode 100644
index 00000000000..b030920a62f
--- /dev/null
+++ b/modules/cannops/include/opencv2/ascendc_kernels.hpp
@@ -0,0 +1,7 @@
+#ifndef ASCENDC_KERNELS_H
+#define ASCENDC_KERNELS_H
+
+#include "../../ascendc_kernels/kernel_tiling_types.h"
+#include "aclrtlaunch_threshold_opencv.h"
+
+#endif //ASCENDC_KERNELS_H
diff --git a/modules/cannops/include/opencv2/cann.hpp b/modules/cannops/include/opencv2/cann.hpp
index bd351481624..4f4f3a7d8d2 100644
--- a/modules/cannops/include/opencv2/cann.hpp
+++ b/modules/cannops/include/opencv2/cann.hpp
@@ -318,6 +318,21 @@ CV_EXPORTS_W void initAcl();
  */
 CV_EXPORTS_W void finalizeAcl();
 
+/**
+ * @brief init DVPP system.
+ * @note The DVPP interfaces used are all version V2.
+ * Supported devices: Atlas Inference Series products, Atlas 200/500 A2 Inference products and
+ * Atlas A2 Training Series products/Atlas 300I A2 Inference products
+ */
+CV_EXPORTS_W void initDvpp();
+
+/**
+ * @brief finalize DVPP system.
+ * @note Supported devices: Atlas Inference Series products, Atlas 200/500 A2 Inference products and
+ * Atlas A2 Training Series products/Atlas 300I A2 Inference products
+ */
+CV_EXPORTS_W void finalizeDvpp();
+
 //! @} cann_init
 
 } // namespace cann
diff --git a/modules/cannops/include/opencv2/cann_call.hpp b/modules/cannops/include/opencv2/cann_call.hpp
index 651bff8bba0..e21f339db96 100644
--- a/modules/cannops/include/opencv2/cann_call.hpp
+++ b/modules/cannops/include/opencv2/cann_call.hpp
@@ -9,7 +9,9 @@
 #include <set>
 #include <string>
 #include <acl/acl_base.h>
-#include "opencv2/cann.hpp"
+#include "cann.hpp"
+#include "stream_accessor.hpp"
+#include "ascendc_kernels.hpp"
 
 class aclopAttr;
 
@@ -17,6 +19,15 @@ namespace cv
 {
 namespace cann
 {
+CV_EXPORTS void checkAclError(aclError err, const char* file, const int line, const char* func);
+void checkAclPtr(void* ptr, const char* file, const int line, const char* func);
+#define CV_ACL_SAFE_CALL(expr) checkAclError((expr), __FILE__, __LINE__, CV_Func)
+#define CV_ACL_SAFE_CALL_PTR(expr)                     \
+    ({                                                 \
+        auto ptr = (expr);                             \
+        checkAclPtr(ptr, __FILE__, __LINE__, CV_Func); \
+        ptr;                                           \
+    })
 // Warpper for functions in CANN, callers should not call CANN's api directly, but should call the
 // function provided in cann_call.
 void aclrtMallocWarpper(void** data, size_t size);
@@ -39,7 +50,7 @@ void aclrtMemsetWarpper(std::shared_ptr<uchar>& ptr, int32_t value, size_t count
 //! Type mapping between opencv and cann.
 aclDataType getACLType(int opencvdepth);
 //! Malloc and upload raw data to devices.
-std::shared_ptr<uchar> mallocAndUpload(const void* data, size_t size, AscendStream& stream,
+CV_EXPORTS std::shared_ptr<uchar> mallocAndUpload(const void* data, size_t size, AscendStream& stream,
                                        AscendMat::Allocator* allocator);
 /**
  * @brief Warpper of CANN streams.
@@ -151,6 +162,19 @@ class OperatorRunner
     OperatorRunner& run(AscendStream& stream);
 };
 
+template <typename KERNEL_TYPE, typename TILING_TYPE, typename... ARGS>
+void kernel_launch(KERNEL_TYPE kernel, AscendStream& stream, TILING_TYPE& tiling, ARGS... args)
+{
+    std::shared_ptr<uchar> tilingDevice =
+        mallocAndUpload(&tiling, sizeof(TILING_TYPE), stream, AscendMat::defaultAllocator());
+    aclrtStream rawStream = AscendStreamAccessor::getStream(stream);
+    CV_ACL_SAFE_CALL(kernel(1, rawStream, tilingDevice.get(), args...));
+    if (rawStream == nullptr)
+    {
+        stream.waitForCompletion();
+    }
+}
+
 } // namespace cann
 } // namespace cv
 
diff --git a/modules/cannops/include/opencv2/cann_interface.hpp b/modules/cannops/include/opencv2/cann_interface.hpp
index 6b13090f4f1..8d7c90a1cc1 100644
--- a/modules/cannops/include/opencv2/cann_interface.hpp
+++ b/modules/cannops/include/opencv2/cann_interface.hpp
@@ -111,6 +111,7 @@ CV_EXPORTS_W void subtract(const Scalar& src1, const AscendMat& src2, CV_OUT Asc
  * @param scale Optional scale factor.
  * @param dtype Optional depth of the output array.
  * @param stream AscendStream for the asynchronous version.
+ * @note when scale != 1, src must be one of the following types: float16, float32, int32
  * @sa cv::multiply cuda::multiply
  */
 CV_EXPORTS_W void multiply(const InputArray src1, const InputArray src2, OutputArray dst,
@@ -145,6 +146,9 @@ CV_EXPORTS_W void multiply(const Scalar& src1, const AscendMat& src2, CV_OUT Asc
  * @param scale Optional scale factor.
  * @param dtype Optional depth of the output array.
  * @param stream AscendStream for the asynchronous version.
+ * @note when scale == 1, src must be one of the following types: float16, float32, double, uint16,
+ * int8, uint8, int16, int32, int64; when scale != 1, src must be one of the following types:
+ * int32, int16, float16, float32.
  * @sa cv::divide cuda::divide
  */
 CV_EXPORTS_W void divide(const InputArray src1, const InputArray src2, OutputArray dst,
@@ -178,6 +182,7 @@ CV_EXPORTS_W void divide(const Scalar& src1, const AscendMat& src2, CV_OUT Ascen
  * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
  * destination array to be changed. The mask can be used only with single channel images.
  * @param stream AscendStream for the asynchronous version.
+ * @note src must be one of the following types: int32, int16, uint16
  * @sa cv::bitwise_and cuda::bitwise_and
  */
 CV_EXPORTS_W void bitwise_and(const InputArray src1, const InputArray src2, OutputArray dst,
@@ -211,6 +216,7 @@ CV_EXPORTS_W void bitwise_and(const Scalar& src1, const AscendMat& src2, CV_OUT
  * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
  * destination array to be changed. The mask can be used only with single channel images.
  * @param stream AscendStream for the asynchronous version.
+ * @note src must be one of the following types: int32, int16, uint16
  * @sa cv::bitwise_or cuda::bitwise_or
  */
 CV_EXPORTS_W void bitwise_or(const InputArray src1, const InputArray src2, OutputArray dst,
@@ -245,6 +251,7 @@ CV_EXPORTS_W void bitwise_or(const Scalar& src1, const AscendMat& src2, CV_OUT A
  * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
  * destination array to be changed. The mask can be used only with single channel images.
  * @param stream AscendStream for the asynchronous version.
+ * @note src must be one of the following types: int32, int16, uint16
  * @sa cv::bitwise_xor cuda::bitwise_xor
  */
 CV_EXPORTS_W void bitwise_xor(const InputArray src1, const InputArray src2, OutputArray dst,
@@ -277,6 +284,7 @@ CV_EXPORTS_W void bitwise_xor(const Scalar& src1, const AscendMat& src2, CV_OUT
  * @param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
  * destination array to be changed. The mask can be used only with single channel images.
  * @param stream AscendStream for the asynchronous version.
+ * @note src must be one of the following types: int32, int16, uint16
  * @sa cv::bitwise_not cuda::bitwise_not
  */
 CV_EXPORTS_W void bitwise_not(const InputArray src, OutputArray dst,
@@ -306,6 +314,7 @@ The function addWeighted calculates the weighted sum of two arrays as follows:
 
 where I is a multi-dimensional index of array elements. In case of multi-channel arrays, each
 channel is processed independently.
+@note src must be one of the following types: int32, int16, float16, float32.
 
 @sa cv::addWeighted cv::cuda::addWeighted
  */
@@ -326,6 +335,7 @@ CV_EXPORTS_W void addWeighted(const AscendMat& src1, double alpha, const AscendM
 @param type Threshold type. For details, see threshold . The THRESH_MASK, THRESH_OTSU and
 THRESH_TRIANGLE threshold types are not supported.
 @param stream AscendStream for the asynchronous version.
+@note src must be one of the following types: float16, float32.
 
 @sa cv::threshold cv::cuda::threshold
 */
@@ -346,11 +356,13 @@ CV_EXPORTS_W double threshold(const AscendMat& src, CV_OUT AscendMat& dst, doubl
 @param n Number of source matrices.
 @param dst Destination matrix.
 @param stream AscendStream for the asynchronous version.
+@note src must be one of the following types: float16, float32, double, int32, int16, int8, int64,
+uint8, uint16, uint32, uint64.
 
 @sa cv::merge cv::cuda::merge
  */
 CV_EXPORTS_W void merge(const AscendMat* src, size_t n, CV_OUT AscendMat& dst,
-                      AscendStream& stream = AscendStream::Null());
+                        AscendStream& stream = AscendStream::Null());
 /** @overload */
 CV_EXPORTS_W void merge(const std::vector<AscendMat>& src, CV_OUT AscendMat& dst,
                         AscendStream& stream = AscendStream::Null());
@@ -366,11 +378,13 @@ CV_EXPORTS_W void merge(const std::vector<AscendMat>& src, OutputArray& dst,
 @param src Source matrix.
 @param dst Destination array/vector of single-channel matrices.
 @param stream AscendStream for the asynchronous version.
+@note src must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32,
+ uint64, int8, int16, bool
 
 @sa cv::split cv::cuda::split
  */
 CV_EXPORTS_W void split(const AscendMat& src, AscendMat* dst,
-                      AscendStream& stream = AscendStream::Null());
+                        AscendStream& stream = AscendStream::Null());
 /** @overload */
 CV_EXPORTS_W void split(const AscendMat& src, CV_OUT std::vector<AscendMat>& dst,
                         AscendStream& stream = AscendStream::Null());
@@ -386,6 +400,8 @@ CV_EXPORTS_W void split(const InputArray src, CV_OUT std::vector<AscendMat>& dst
 @param src Source matrix.
 @param dst Destination matrix.
 @param stream AscendStream for the asynchronous version.
+@note src must be one of the following types:
+float16,float,int8,int16,int32,int64,uint8,uint16,uint32,uint64,bool
 
 @sa cv::transpose cv::cuda::transpose
  */
@@ -403,6 +419,7 @@ CV_EXPORTS_W void transpose(const AscendMat& src, CV_OUT AscendMat& dst,
 -   \> 0 Flips around y-axis.
 -   \< 0 Flips around both axes.
 @param stream AscendStream for the asynchronous version.
+@note src must be one of the following types: float16,float,int64,int32,int16,uint16
 
 @sa cv::flip cv::cuda::flip
  */
@@ -421,6 +438,7 @@ The function cv::rotate rotates the array in one of three different ways:
 and the rows and cols are switched for ROTATE_90_CLOCKWISE and ROTATE_90_COUNTERCLOCKWISE.
 @param rotateCode an enum to specify how to rotate the array; see the enum #RotateFlags
 @param stream AscendStream for the asynchronous version.
+@note src must be one of the following types: float16,float,int64,int32,int16,uint16
 
 @sa cv::rotate
 */
@@ -445,21 +463,6 @@ CV_EXPORTS_W AscendMat crop(InputArray src, const Rect& rect,
 /** @overload */
 CV_EXPORTS_W AscendMat crop(const AscendMat& src, const Rect& rect,
                             AscendStream& stream = AscendStream::Null());
-/** @brief Resizes an image src down to or up to the specified size.
-@param src    input image
-@param dst    output image; it has the size dsize (when it is non-zero) or the size computed from
-src.size(), fx, and fy; the type of dst is the same as of src.
-@param dsize  output image size; if it equals zero, it is computed as:
-     \f[𝚍𝚜𝚒𝚣𝚎 = 𝚂𝚒𝚣𝚎(𝚛𝚘𝚞𝚗𝚍(𝚏𝚡*𝚜𝚛𝚌.𝚌𝚘𝚕𝚜), 𝚛𝚘𝚞𝚗𝚍(𝚏𝚢*𝚜𝚛𝚌.𝚛𝚘𝚠𝚜))\f]
-     Either dsize or both fx and fy must be non-zero.
-@param fx     scale factor along the horizontal axis; when it equals 0, it is computed as
-\f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚠𝚒𝚍𝚝𝚑/𝚜𝚛𝚌.𝚌𝚘𝚕𝚜\f]
-
-@param fy     scale factor along the vertical axis; when it equals 0, it is computed as
-\f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚑𝚎𝚒𝚐𝚑𝚝/𝚜𝚛𝚌.𝚛𝚘𝚠𝚜\f]
-@param interpolation    interpolation method(see **cv.cann.InterpolationFlags**)
-@sa cv::resize
-*/
 
 //! interpolation algorithm
 enum InterpolationFlags
@@ -478,14 +481,121 @@ enum InterpolationFlags
     INTER_MAX = 7,
 };
 
-CV_EXPORTS_W void resize(InputArray _src, OutputArray _dst, Size dsize, double inv_scale_x,
-                         double inv_scale_y, int interpolation,
-                         AscendStream& stream = AscendStream::Null());
+/** @brief Resizes an image src down to or up to the specified size.
+@param src    input image
+@param dst    output image; it has the size dsize (when it is non-zero) or the size computed from
+src.size(), fx, and fy; the type of dst is the same as of src.
+@param dsize  output image size; if it equals zero, it is computed as:
+     \f[𝚍𝚜𝚒𝚣𝚎 = 𝚂𝚒𝚣𝚎(𝚛𝚘𝚞𝚗𝚍(𝚏𝚡*𝚜𝚛𝚌.𝚌𝚘𝚕𝚜), 𝚛𝚘𝚞𝚗𝚍(𝚏𝚢*𝚜𝚛𝚌.𝚛𝚘𝚠𝚜))\f]
+     Either dsize or both fx and fy must be non-zero.
+@param fx     scale factor along the horizontal axis; when it equals 0, it is computed as
+\f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚠𝚒𝚍𝚝𝚑/𝚜𝚛𝚌.𝚌𝚘𝚕𝚜\f]
+
+@param fy     scale factor along the vertical axis; when it equals 0, it is computed as
+\f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚑𝚎𝚒𝚐𝚑𝚝/𝚜𝚛𝚌.𝚛𝚘𝚠𝚜\f]
+@param interpolation    interpolation method(see **cv.cann.InterpolationFlags**)
+@param stream AscendStream for the asynchronous version.
+* @note  There are some constraints for the input datatype:
+  * when resampling using
+  * nearest neighbor or bilinear interpolation: Input images must be uint8, and only GRAY and BGR
+  images are supported. The resolution of input and output images must in range of [10*6,
+4096*4096].
+  * bicubic interpolation: Input images can be of different types, output images must be
+  float or uint8.
+  * pixel area interpolation: Input images can be of different types but output images
+  are always float.\n
+  * Only the following devices are supported when resampling using nearest neighbor or bilinear
+  interpolation: Atlas Inference Series products, Atlas 200/500 A2 Inference products and
+  Atlas A2 Training Series products/Atlas 300I A2 Inference products
+@sa cv::resize
+*/
+CV_EXPORTS_W void resize(InputArray src, OutputArray dst, Size dsize, double fx, double fy,
+                         int interpolation, AscendStream& stream = AscendStream::Null());
 /** @overload */
-CV_EXPORTS_W void resize(const AscendMat& src, CV_OUT AscendMat& dst, Size dsize, double inv_scale_x,
-                         double inv_scale_y, int interpolation,
-                         AscendStream& stream = AscendStream::Null());
+CV_EXPORTS_W void resize(const AscendMat& src, CV_OUT AscendMat& dst, Size dsize, double fx,
+                         double fy, int interpolation, AscendStream& stream = AscendStream::Null());
+
+/** @brief crop a sub image from a big one, and resize it to certain size.
+@param src input array.
+@param dst output array. it has the size dsize (when it is non-zero) or the size computed from
+src.size(), fx, and fy; the type of dst is the same as of src.
+@param rect a rect to crop a array to
+@param dsize  output image size; if it equals zero, it is computed as cv::resize do.
+@param fx     scale factor along the horizontal axis; when it equals 0, it is computed as
+\f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚠𝚒𝚍𝚝𝚑/𝚜𝚛𝚌.𝚌𝚘𝚕𝚜\f]
+@param fy     scale factor along the vertical axis; when it equals 0, it is computed as
+\f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚑𝚎𝚒𝚐𝚑𝚝/𝚜𝚛𝚌.𝚛𝚘𝚠𝚜\f]
+@param interpolation    interpolation method, only support INTER_NEAREST and INTER_LINEAR here.
+    (see **cv.cann.InterpolationFlags**)
+@note  The input images must be uint8, and only GRAY and BGR images are supported. The resolution of
+input and output images must in range of [10*6, 4096*4096].
+@note Only the following devices are supported: Atlas Inference Series products, Atlas 200/500 A2
+Inference products and Atlas A2 Training Series products/Atlas 300I A2 Inference products.
+@sa cv::gapi::crop, cv::resize, cv::cann::resize
+*/
+CV_EXPORTS_W void cropResize(const InputArray src, OutputArray dst, const Rect& rect, Size dsize,
+                             double fx, double fy, int interpolation);
+/** @overload */
+CV_EXPORTS_W void cropResize(const AscendMat& src, CV_OUT AscendMat& dst, const Rect& rect,
+                             Size dsize, double fx, double fy, int interpolation);
 
+/** @brief crop a sub image from a big one, resize it to certain size, and form the top/left border
+and fills it with specified bordertype.
+@param src input array.
+@param dst output array; it has the size Size(dsize.height + top, dsize.width + left).
+@param rect a rect to crop a array to
+@param dsize  resize size;
+@param fx     scale factor along the horizontal axis;
+@param fy     scale factor along the vertical axis;
+@param interpolation    interpolation method, only INTER_NEAREST and INTER_LINEAR are supported.
+    (see **cv.cann.InterpolationFlags**)
+@param borderType border extrapolate method, only cv::BorderTypes::BORDER_CONSTANT and
+cv::BorderTypes::BORDER_REPLICATE are supported.
+@param value Border BGR or YUV value if borderType==BORDER_CONSTANT.
+@param top Number of pixels for top padding
+@param left Number of pixels for left padding
+@note  The input images must be uint8, and only GRAY and BGR images are supported. The resolution of
+input and output images must in range of [10*6, 4096*4096].
+@note Only the following devices are supported: Atlas Inference Series products, Atlas 200/500 A2
+Inference products and Atlas A2 Training Series products/Atlas 300I A2 Inference products.
+@sa cv::gapi::crop, cv::resize, cv::cann::resize, cv::BorderTypes
+*/
+
+CV_EXPORTS_W void cropResizeMakeBorder(const InputArray src, OutputArray dst, const Rect& rect,
+                                       Size dsize, double fx, double fy, int interpolation, int top,
+                                       int left, const int borderType, Scalar value = Scalar());
+/** @overload */
+CV_EXPORTS_W void cropResizeMakeBorder(const AscendMat& src, CV_OUT AscendMat& dst,
+                                       const Rect& rect, Size dsize, double fx, double fy,
+                                       int interpolation, int top, int left, const int borderType,
+                                       Scalar value = Scalar());
+/** @brief Forms a border and fills it with specified bordertype around the copy of input image.
+@param src Source image.
+@param dst Destination image of the same type as src and the size Size(src.cols+left+right,
+src.rows+top+bottom).
+@param top Number of pixels for top padding
+@param bottom Number of pixels for bottom padding
+@param left Number of pixels for left padding
+@param right Number of pixels for right padding
+Parameter specifying how many pixels in each direction from the source image rectangle to
+extrapolate. For example, top=1, bottom=1, left=1, right=1 mean that 1 pixel-wide border needs to be
+built.
+@param borderType Border type. only cv::BorderTypes::BORDER_CONSTANT and
+cv::BorderTypes::BORDER_REPLICATE are supported.
+@param value Border BGR or YUV value if borderType==BORDER_CONSTANT.
+@note  The input images must be uint8, and only GRAY and BGR images are supported. The resolution of
+input and output images must in range of [10*6, 4096*4096].
+@note Only the following devices are supported: Atlas Inference Series products, Atlas 200/500 A2
+Inference products and Atlas A2 Training Series products/Atlas 300I A2 Inference products.
+@sa  cv::copyMakeBorder, cv::borderInterpolate
+*/
+CV_EXPORTS_W void copyMakeBorder(const InputArray src, OutputArray dst, int top, int bottom,
+                                 int left, int right, int borderType,
+                                 const Scalar& value = Scalar());
+/** @overload */
+CV_EXPORTS_W void copyMakeBorder(const AscendMat& src, CV_OUT AscendMat& dst, int top, int bottom,
+                                 int left, int right, int borderType,
+                                 const Scalar& value = Scalar());
 //! @} cannops_core
 
 //! @addtogroup cannimgproc
@@ -495,10 +605,17 @@ CV_EXPORTS_W void resize(const AscendMat& src, CV_OUT AscendMat& dst, Size dsize
 
 @param src Source image with CV_8U , CV_16U , or CV_32F depth and 1, 3, or 4 channels.
 @param dst Destination image.
-@param code Color space conversion code. For details, see cvtColor .
+@param code Color space conversion code. For details, see cv::ColorConversionCodes .
 @param dstCn Number of channels in the destination image. If the parameter is 0, the number of the
 channels is derived automatically from src and the code .
 @param stream AscendStream for the asynchronous version.
+@note The supported conversion types are as follows:
+  { CV_BGR2BGRA, CV_BGRA2BGR, CV_BGR2RGBA, CV_RGBA2BGR,
+  CV_BGR2RGB, CV_BGRA2RGBA, CV_BGR2GRAY, CV_RGB2GRAY,
+  CV_GRAY2BGR, CV_GRAY2BGRA, CV_BGRA2GRAY, CV_RGBA2GRAY,
+  CV_BGR2XYZ, CV_RGB2XYZ, CV_XYZ2BGR, CV_XYZ2RGB,
+  CV_BGR2YCrCb, CV_RGB2YCrCb, CV_YCrCb2BGR, CV_YCrCb2RGB,
+  CV_BGR2YUV, CV_RGB2YUV, CV_YUV2BGR, CV_YUV2RGB }
 
 @sa cv::cvtColor cv::cuda::cvtColor
  */
diff --git a/modules/cannops/include/opencv2/dvpp_call.hpp b/modules/cannops/include/opencv2/dvpp_call.hpp
new file mode 100644
index 00000000000..e70d56ea801
--- /dev/null
+++ b/modules/cannops/include/opencv2/dvpp_call.hpp
@@ -0,0 +1,107 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef ENABLE_DVPP_INTERFACE
+    #define ENABLE_DVPP_INTERFACE
+#endif // ENABLE_DVPP_INTERFACE
+
+#include <vector>
+#include <string>
+#include <acl/acl.h>
+#include <acl/acl_op_compiler.h>
+#include <acl/dvpp/hi_dvpp.h>
+#include "acl/acl_op.h"
+#include "cann_call.hpp"
+
+namespace cv
+{
+namespace cann
+{
+struct AscendPicDesc
+{
+    const char* name;
+    std::shared_ptr<hi_void> data;
+    std::vector<int64_t> batchNum;
+
+    size_t widthAlignment = 16;
+    size_t heightAlignment = 1;
+    size_t sizeAlignment = 3;
+    size_t sizeNum = 3;
+
+    hi_vpc_pic_info Pic;
+    AscendPicDesc& setMemAlign();
+    AscendPicDesc& setPic(hi_pixel_format _picture_format);
+    std::shared_ptr<hi_void> allocate();
+    AscendPicDesc(){};
+    AscendPicDesc(const AscendMat& ascendMat, hi_pixel_format _picture_format);
+    AscendPicDesc(const Mat& mat, hi_pixel_format _picture_format);
+};
+
+/*
+ ***************************** hi_mpi_vpc warppers ***************************
+ The DVPP VPC interfaces here are all version v2. Only the following devices are supported: Atlas
+ Inference Series products, Atlas 200/500 A2 Inference products and Atlas A2 Training Series
+ products/Atlas 300I A2 Inference products.
+*/
+inline void vpcResizeWarpper(hi_vpc_chn chnId, hi_vpc_pic_info& inPic, hi_vpc_pic_info& outPic,
+                             int interpolation, uint32_t* taskID)
+{
+    uint32_t ret = hi_mpi_vpc_resize(chnId, &inPic, &outPic, 0, 0, interpolation, taskID, -1);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to resize image");
+}
+void vpcCropResizeWarpper(hi_vpc_chn chnId, hi_vpc_pic_info& inPic, hi_vpc_pic_info& outPic,
+                          int cnt, uint32_t* taskID, const Rect& rect, Size dsize,
+                          int interpolation);
+
+void vpcCropResizeMakeBorderWarpper(hi_vpc_chn chnId, std::vector<AscendPicDesc>& inPicDesc,
+                                    std::vector<AscendPicDesc>& outPicDesc, int cnt,
+                                    uint32_t* taskID, const Rect& rect, Size dsize,
+                                    int interpolation, const int borderType, Scalar scalarV,
+                                    int top, int left);
+void vpcCopyMakeBorderWarpper(hi_vpc_chn chnId, hi_vpc_pic_info& inPic, hi_vpc_pic_info& outPic,
+                              uint32_t* taskID, int* offsets, int bordertype, Scalar value);
+/*****************************************************************************/
+
+/**
+ * @brief Interface for calling DVPP operator descriptors.
+ * The DVPP VPC interfaces here are all version v2. Supported devices: Atlas Inference Series
+ * products, Atlas 200/500 A2 Inference products and Atlas A2 Training Series products/Atlas 300I A2
+ * Inference products.
+ */
+class DvppOperatorDesc
+{
+private:
+    DvppOperatorDesc& addInput(AscendPicDesc& picDesc);
+    DvppOperatorDesc& addOutput(AscendPicDesc& picDesc);
+    std::set<std::shared_ptr<hi_void>> holder;
+
+public:
+    DvppOperatorDesc()
+    {
+        chnId = 0;
+        stChnAttr = {};
+        createChannel();
+    }
+    virtual ~DvppOperatorDesc() { reset(); }
+    DvppOperatorDesc& addInput(const AscendMat& mat);
+    DvppOperatorDesc& addOutput(AscendMat& mat);
+    DvppOperatorDesc& addInput(const Mat& mat);
+    DvppOperatorDesc& addOutput(Mat& mat);
+
+    DvppOperatorDesc& getResult(Mat& dst, uint32_t& taskIDResult);
+    DvppOperatorDesc& getResult(AscendMat& dst, uint32_t& taskIDResult);
+
+    DvppOperatorDesc& reset();
+    DvppOperatorDesc& createChannel();
+
+    std::vector<AscendPicDesc> inputDesc_;
+    std::vector<AscendPicDesc> outputDesc_;
+
+    hi_vpc_chn chnId;
+    hi_vpc_chn_attr stChnAttr;
+};
+
+} // namespace cann
+} // namespace cv
\ No newline at end of file
diff --git a/modules/cannops/misc/python/test/test_cannops.py b/modules/cannops/misc/python/test/test_cannops.py
index f1b53bc192c..48d4ff18d11 100644
--- a/modules/cannops/misc/python/test/test_cannops.py
+++ b/modules/cannops/misc/python/test/test_cannops.py
@@ -24,6 +24,7 @@ def genMask(mask, listx, listy):
 class cannop_test(NewOpenCVTests):
     def test_ascend(self):
         cv.cann.initAcl()
+        cv.cann.initDvpp()
         cv.cann.getDevice()
         cv.cann.setDevice(0)
         stream = cv.cann.AscendStream_Null()
@@ -275,6 +276,50 @@ def test_imgproc(self):
                 aclMat, 127, 255, tType)
             self.assertTrue(np.allclose(cvThresh, cannThresh.download()))
             self.assertTrue(np.allclose(cvRet, cannRet))
+
+        npMat = (np.random.random((1280, 1024, 3)) * 255).astype(np.uint8)
+        w_off, h_off, crop_w, crop_h = 0, 0, 512, 384
+        roi = [w_off, h_off, crop_w, crop_h]
+        aclMat = cv.cann.AscendMat()
+        aclMat.upload(npMat)
+
+        # resize
+        dstSize = np.array([crop_w, crop_h])
+        self.assertTrue(np.allclose(cv.cann.resize(npMat, dstSize, 0, 0, 1),
+                        cv.resize(npMat, dstSize, 0, 0, 1)))
+        self.assertTrue(np.allclose(cv.cann.resize(aclMat, dstSize, 0, 0, 1).download(),
+                        cv.resize(npMat, dstSize, 0, 0, 1)))
+        # cropResize
+        self.assertTrue(np.allclose(cv.cann.cropResize(npMat, roi, dstSize, 0, 0, 1),
+                        cv.resize(npMat[h_off:crop_h, w_off:crop_w], dstSize, 0, 0, 1)), 0)
+        self.assertTrue(np.allclose(cv.cann.cropResize(aclMat, roi, dstSize, 0, 0, 1).download(),
+                        cv.resize(npMat[h_off:crop_h, w_off:crop_w], dstSize, 0, 0, 1)), 0)
+
+        # cropResizeMakeBorder
+        # TODO cv.copyMakeBorder ignores borderColorValue param; find the reason and fix it
+        borderColorValue = (100, 0, 255)
+        top, bottom, left, right = 32, 0, 10, 0
+        borderTypes = [0, 1]
+
+        for borderType in borderTypes:
+            self.assertTrue(np.allclose(cv.cann.cropResizeMakeBorder(npMat, roi, dstSize,
+                                0, 0, 1, top, left, borderType),
+                            cv.copyMakeBorder(cv.resize(npMat[h_off:crop_h, w_off:crop_w],
+                                dstSize, 0, 0, 1), top, bottom, left, right, borderType), 1))
+            self.assertTrue(np.allclose(cv.cann.cropResizeMakeBorder(aclMat, roi, dstSize,
+                                0, 0, 1, top, left, borderType).download(),
+                            cv.copyMakeBorder(cv.resize(npMat[h_off:crop_h, w_off:crop_w],
+                                dstSize, 0, 0, 1), top, bottom, left, right, borderType), 1))
+
+        # copyMakeBorder
+        for borderType in borderTypes:
+            self.assertTrue(np.allclose(cv.cann.copyMakeBorder(npMat, top, bottom, left, right,
+                                                               borderType),
+                            cv.copyMakeBorder(npMat, top, bottom, left, right, borderType)))
+            self.assertTrue(np.allclose(cv.cann.copyMakeBorder(aclMat, top, bottom, left, right,
+                                                               borderType).download(),
+                            cv.copyMakeBorder(npMat, top, bottom, left, right, borderType)))
+
         cv.cann.resetDevice()
 
 if __name__ == '__main__':
diff --git a/modules/cannops/perf/perf_core.cpp b/modules/cannops/perf/perf_core.cpp
index a9d86fca881..914a122d287 100644
--- a/modules/cannops/perf/perf_core.cpp
+++ b/modules/cannops/perf/perf_core.cpp
@@ -11,6 +11,7 @@ namespace
 {
 #define TYPICAL_ASCEND_MAT_SIZES \
     Values(::perf::sz1080p, ::perf::sz2K, ::perf::sz2160p, ::perf::sz4320p)
+#define DVPP_ASCEND_MAT_SIZES Values(::perf::sz1080p, ::perf::sz2K, ::perf::sz2160p, ::perf::sz5MP)
 #define DEF_PARAM_TEST(name, ...) \
     typedef ::perf::TestBaseWithParam<testing::tuple<__VA_ARGS__>> name
 
@@ -157,5 +158,176 @@ PERF_TEST_P(NPU, CROP_OVERLOAD, TYPICAL_ASCEND_MAT_SIZES)
     cv::cann::resetDevice();
     SANITY_CHECK_NOTHING();
 }
+
+PERF_TEST_P(CPU, RESIZE, DVPP_ASCEND_MAT_SIZES)
+{
+    Mat mat(GET_PARAM(0), CV_8UC3);
+    Mat dst;
+    declare.in(mat, WARMUP_RNG);
+    Size dsize = Size(256, 256);
+    TEST_CYCLE_N(10) { cv::resize(mat, dst, dsize, 0, 0, 1); }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(NPU, RESIZE, DVPP_ASCEND_MAT_SIZES)
+{
+    Mat mat(GET_PARAM(0), CV_32FC3);
+    AscendMat dst;
+    AscendMat src;
+    src.upload(mat);
+    declare.in(mat, WARMUP_RNG);
+    Size dsize = Size(256, 256);
+    TEST_CYCLE_N(10) { cv::cann::resize(src, dst, dsize, 0, 0, 3); }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(NPU, THRESHOLD, TYPICAL_ASCEND_MAT_SIZES)
+{
+    Mat mat(GET_PARAM(0), CV_32FC3);
+    AscendMat dst;
+    AscendMat src;
+    src.upload(mat);
+    declare.in(mat, WARMUP_RNG);
+    TEST_CYCLE_N(10) { cv::cann::threshold(src, dst, 100.0, 255.0, cv::THRESH_BINARY); }
+    SANITY_CHECK_NOTHING();
+}
+PERF_TEST_P(CPU, THRESHOLD, TYPICAL_ASCEND_MAT_SIZES)
+{
+    Mat mat(GET_PARAM(0), CV_32FC3);
+    Mat dst;
+    declare.in(mat, WARMUP_RNG);
+    TEST_CYCLE_N(10) { cv::threshold(mat, dst, 100.0, 255.0, cv::THRESH_BINARY); }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(NPU, RESIZE_INTER_NEAREST, DVPP_ASCEND_MAT_SIZES)
+{
+    Mat mat(GET_PARAM(0), CV_8UC3);
+    Mat dst;
+    declare.in(mat, WARMUP_RNG);
+    Size dsize = Size(256, 256);
+    TEST_CYCLE_N(10) { cv::cann::resize(mat, dst, dsize, 0, 0, 0); }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(NPU, COPY_MAKE_BORDER, DVPP_ASCEND_MAT_SIZES)
+{
+    Mat resized_cv, checker, cpuOpRet, cpuMat(GET_PARAM(0), CV_8UC3);
+    declare.in(cpuMat, WARMUP_RNG);
+    int top, bottom, left, right;
+    top = (int)(20);
+    bottom = top;
+    left = (int)(20);
+    right = left;
+    int borderType = 1;
+    float scalarV[3] = {0, 0, 255};
+    Scalar value = {scalarV[0], scalarV[1], scalarV[2]};
+
+    TEST_CYCLE_N(10)
+    {
+        cv::cann::copyMakeBorder(cpuMat, checker, top, bottom, left, right, borderType, value);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+PERF_TEST_P(CPU, COPY_MAKE_BORDER, DVPP_ASCEND_MAT_SIZES)
+{
+    Mat resized_cv, checker, cpuOpRet, cpuMat(GET_PARAM(0), CV_8UC3);
+    declare.in(cpuMat, WARMUP_RNG);
+    int top, bottom, left, right;
+    top = (int)(20);
+    bottom = top;
+    left = (int)(20);
+    right = left;
+    int borderType = 1;
+    float scalarV[3] = {0, 0, 255};
+    Scalar value = {scalarV[0], scalarV[1], scalarV[2]};
+
+    TEST_CYCLE_N(10)
+    {
+        cv::copyMakeBorder(cpuMat, checker, top, bottom, left, right, borderType, value);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(NPU, CROP_RESIZE_MAKE_BORDER, DVPP_ASCEND_MAT_SIZES)
+{
+    Size size = GET_PARAM(0);
+    Mat resized_cv, checker, cpuOpRet, cpuMat(size, CV_8UC3);
+    declare.in(cpuMat, WARMUP_RNG);
+
+    const Rect b(1, 0, size.width / 2, size.height);
+    Size dsize = Size(size.width / 4, size.height / 2);
+    int top, left;
+    top = (int)(20);
+    left = (int)(20);
+    int borderType = 0;
+    float scalarV[3] = {1, 1, 1};
+    Scalar value = {scalarV[0], scalarV[1], scalarV[2]};
+
+    TEST_CYCLE_N(10)
+    {
+        cv::cann::cropResizeMakeBorder(cpuMat, checker, b, dsize, 0, 0, 1, top, left, borderType,
+                                       value);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(CPU, CROP_RESIZE_MAKE_BORDER, DVPP_ASCEND_MAT_SIZES)
+{
+    Size size = GET_PARAM(0);
+    Mat resized_cv, checker, cpuOpRet, cpuMat(size, CV_8UC3);
+    declare.in(cpuMat, WARMUP_RNG);
+    const Rect b(1, 0, size.width / 2, size.height);
+    Size dsize = Size(size.width / 4, size.height / 2);
+    int top, bottom, left, right;
+    top = (int)(20);
+    bottom = 0;
+    left = (int)(20);
+    right = 0;
+    int borderType = 0;
+    float scalarV[3] = {1, 1, 1};
+    Scalar value = {scalarV[0], scalarV[1], scalarV[2]};
+
+    TEST_CYCLE_N(10)
+    {
+        Mat cropped_cv(cpuMat, b);
+        cv::resize(cropped_cv, resized_cv, dsize, 0, 0, 1);
+        cv::copyMakeBorder(resized_cv, cpuOpRet, top, bottom, left, right, borderType, value);
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(NPU, CROP_RESIZE, DVPP_ASCEND_MAT_SIZES)
+{
+    Size size = GET_PARAM(0);
+    Mat resized_cv, checker, cpuOpRet, cpuMat(size, CV_8UC3);
+    declare.in(cpuMat, WARMUP_RNG);
+    const Rect b(1, 0, size.width / 2, size.height);
+    Size dsize = Size(size.width / 4, size.height / 2);
+
+    TEST_CYCLE_N(10) { cv::cann::cropResize(cpuMat, checker, b, dsize, 0, 0, 1); }
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(CPU, CROP_RESIZE, DVPP_ASCEND_MAT_SIZES)
+{
+    Size size = GET_PARAM(0);
+    Mat resized_cv, checker, cpuOpRet, cpuMat(size, CV_8UC3);
+    declare.in(cpuMat, WARMUP_RNG);
+    const Rect b(1, 0, size.width / 2, size.height);
+    Size dsize = Size(size.width / 4, size.height / 2);
+
+    TEST_CYCLE_N(10)
+    {
+        Mat cropped_cv(cpuMat, b);
+        cv::resize(cropped_cv, resized_cv, dsize, 0, 0, 1);
+    }
+    SANITY_CHECK_NOTHING();
+}
+
 } // namespace
 } // namespace opencv_test
diff --git a/modules/cannops/perf/perf_element_operations.cpp b/modules/cannops/perf/perf_element_operations.cpp
index 0612abe6085..4527346e190 100644
--- a/modules/cannops/perf/perf_element_operations.cpp
+++ b/modules/cannops/perf/perf_element_operations.cpp
@@ -207,5 +207,25 @@ PERF_TEST_P(CPU, MAT_BITWISE_NOT_MAT, testing::Combine(TYPICAL_ASCEND_MAT_SIZES,
     SANITY_CHECK_NOTHING();
 }
 
+PERF_TEST_P(NPU, THRESHOLD_ASCENDC, testing::Combine(TYPICAL_ASCEND_MAT_SIZES,  Values(CV_8U, CV_16S, CV_32F)))
+{
+    Mat mat(GET_PARAM(0), GET_PARAM(1));
+    AscendMat dst;
+    AscendMat src;
+    src.upload(mat);
+    declare.in(mat, WARMUP_RNG);
+    TEST_CYCLE_N(10) { cv::cann::threshold(src, dst, 100.0, 255.0, cv::THRESH_BINARY); }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P(CPU, THRESHOLD, testing::Combine(TYPICAL_ASCEND_MAT_SIZES, Values(CV_8U, CV_16S, CV_32F)))
+{
+    Mat mat(GET_PARAM(0), GET_PARAM(1));
+    Mat dst;
+    declare.in(mat, WARMUP_RNG);
+    TEST_CYCLE_N(10) { cv::threshold(mat, dst, 100.0, 255.0, cv::THRESH_BINARY); }
+    SANITY_CHECK_NOTHING();
+}
+
 } // namespace
 } // namespace opencv_test
diff --git a/modules/cannops/perf/perf_main.cpp b/modules/cannops/perf/perf_main.cpp
index 33503ac4158..9e03d48904f 100644
--- a/modules/cannops/perf/perf_main.cpp
+++ b/modules/cannops/perf/perf_main.cpp
@@ -10,8 +10,18 @@ class CannEnvironment : public ::testing::Environment
 {
 public:
     virtual ~CannEnvironment() = default;
-    virtual void SetUp() CV_OVERRIDE { cv::cann::initAcl(); }
-    virtual void TearDown() CV_OVERRIDE { cv::cann::finalizeAcl(); }
+    virtual void SetUp() CV_OVERRIDE
+    {
+        initAcl();
+        cv::cann::setDevice(DEVICE_ID);
+        initDvpp();
+    }
+    virtual void TearDown() CV_OVERRIDE
+    {
+        finalizeAcl();
+        cv::cann::resetDevice();
+        finalizeDvpp();
+    }
 };
 
 static void initTests()
diff --git a/modules/cannops/src/ascend_mat.cpp b/modules/cannops/src/ascend_mat.cpp
index ba17a545bb7..dde838c8d37 100644
--- a/modules/cannops/src/ascend_mat.cpp
+++ b/modules/cannops/src/ascend_mat.cpp
@@ -23,7 +23,11 @@ std::shared_ptr<uchar> DefaultAllocator::allocate(size_t size)
 
 bool DefaultAllocator::allocate(cv::cann::AscendMat* mat, int rows, int cols, size_t elemSize)
 {
-    mat->data = allocate(elemSize * cols * rows);
+    size_t totalBytes = elemSize * cols * rows;
+
+    // align by 32B.
+    totalBytes = ((totalBytes + 32) & ~31);
+    mat->data = allocate(totalBytes);
     mat->step = cols * elemSize;
 
     return true;
diff --git a/modules/cannops/src/cann_call.cpp b/modules/cannops/src/cann_call.cpp
index 3b83052ccbe..97d49d66fd1 100644
--- a/modules/cannops/src/cann_call.cpp
+++ b/modules/cannops/src/cann_call.cpp
@@ -11,7 +11,7 @@ namespace cv
 namespace cann
 {
 /*******************************Acl Error Checker*****************************/
-static inline void checkAclError(aclError err, const char* file, const int line, const char* func)
+void checkAclError(aclError err, const char* file, const int line, const char* func)
 {
     if (ACL_SUCCESS != err)
     {
@@ -20,7 +20,7 @@ static inline void checkAclError(aclError err, const char* file, const int line,
     }
 }
 
-static inline void checkAclPtr(void* ptr, const char* file, const int line, const char* func)
+void checkAclPtr(void* ptr, const char* file, const int line, const char* func)
 {
     if (nullptr == ptr)
     {
@@ -29,14 +29,6 @@ static inline void checkAclPtr(void* ptr, const char* file, const int line, cons
     }
 }
 
-#define CV_ACL_SAFE_CALL(expr) checkAclError((expr), __FILE__, __LINE__, CV_Func)
-#define CV_ACL_SAFE_CALL_PTR(expr)                     \
-    ({                                                 \
-        auto ptr = (expr);                             \
-        checkAclPtr(ptr, __FILE__, __LINE__, CV_Func); \
-        ptr;                                           \
-    })
-
 /******************************Acl Runtime Warpper****************************/
 void aclrtMallocWarpper(void** data, size_t size)
 {
diff --git a/modules/cannops/src/core.cpp b/modules/cannops/src/core.cpp
index 7d328915ef9..027cd119f10 100644
--- a/modules/cannops/src/core.cpp
+++ b/modules/cannops/src/core.cpp
@@ -241,6 +241,56 @@ AscendMat crop(InputArray _src, const Rect& rect, AscendStream& stream)
     return crop(src, rect, stream);
 }
 
+/************************** resize **************************/
+void checkResize(Size& ssize, Size& dsize, double inv_scale_x, double inv_scale_y,
+                 int& interpolation)
+{
+    CV_Assert(!ssize.empty());
+    float_t scaleX = (float_t)inv_scale_x;
+    float_t scaleY = (float_t)inv_scale_y;
+    // interpolation: resize mode, support bilinear/nearest neighbor/bicubic/pixel area relation.
+    CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST ||
+              interpolation == INTER_CUBIC || interpolation == INTER_AREA);
+    switch (interpolation)
+    {
+        case INTER_LINEAR:
+            interpolation = INTER_NEAREST;
+            break;
+        case INTER_NEAREST:
+            interpolation = INTER_LINEAR;
+            break;
+        default:
+            break;
+    }
+
+    if (dsize.empty())
+    {
+        CV_Assert(scaleX > 0);
+        CV_Assert(scaleY > 0);
+        dsize = Size(saturate_cast<int>(ssize.width * inv_scale_x),
+                     saturate_cast<int>(ssize.height * inv_scale_y));
+        CV_Assert(!dsize.empty());
+    }
+    else
+    {
+        scaleX = (float_t)dsize.width / ssize.width;
+        scaleY = (float_t)dsize.height / ssize.height;
+        CV_Assert(scaleX > 0);
+        CV_Assert(scaleY > 0);
+    }
+}
+
+template <typename inMat, typename outMat>
+void resize(const inMat& src, outMat& dst, int interpolation)
+{
+    DvppOperatorDesc op;
+    op.addInput(src).addOutput(dst);
+    uint32_t taskID = 0;
+    vpcResizeWarpper(op.chnId, op.inputDesc_[0].Pic, op.outputDesc_[0].Pic, interpolation, &taskID);
+
+    uint32_t taskIDResult = taskID;
+    op.getResult(dst, taskIDResult);
+}
 void resize(const AscendMat& src, AscendMat& dst, int32_t* dstSize, int interpolation,
             AscendStream& stream)
 {
@@ -258,7 +308,6 @@ void resize(const AscendMat& src, AscendMat& dst, int32_t* dstSize, int interpol
         default:
             break;
     }
-
     runner.setOp(mode)
         .addInput(src, "images")
         .addInput<int32_t>(dstSize, dims, 1, ACL_INT32, "size")
@@ -271,30 +320,18 @@ void resize(const AscendMat& src, AscendMat& dst, Size dsize, double inv_scale_x
             double inv_scale_y, int interpolation, AscendStream& stream)
 {
     Size ssize = src.size();
-    CV_Assert(!ssize.empty());
-    float_t scaleX = (float_t)inv_scale_x;
-    float_t scaleY = (float_t)inv_scale_y;
-    CV_Assert(interpolation == INTER_CUBIC || interpolation == INTER_AREA);
+    checkResize(ssize, dsize, inv_scale_x, inv_scale_y, interpolation);
+    int32_t dstSize[] = {dsize.height, dsize.width};
+    dst.create(dstSize[0], dstSize[1], src.type());
 
-    if (dsize.empty())
+    if (interpolation == INTER_CUBIC || interpolation == INTER_AREA)
     {
-        CV_Assert(scaleX > 0);
-        CV_Assert(scaleY > 0);
-        dsize = Size(saturate_cast<int>(ssize.width * inv_scale_x),
-                     saturate_cast<int>(ssize.height * inv_scale_y));
-        CV_Assert(!dsize.empty());
+        resize(src, dst, dstSize, interpolation, stream);
     }
     else
     {
-        scaleX = (float_t)dsize.width / ssize.width;
-        scaleY = (float_t)dsize.height / ssize.height;
-        CV_Assert(scaleX > 0);
-        CV_Assert(scaleY > 0);
+        resize(src, dst, interpolation);
     }
-
-    int32_t dstSize[] = {dsize.width, dsize.height};
-    dst.create(dstSize[0], dstSize[1], src.type());
-    resize(src, dst, dstSize, interpolation, stream);
 }
 
 void resize(InputArray _src, OutputArray _dst, Size dsize, double inv_scale_x, double inv_scale_y,
@@ -302,8 +339,138 @@ void resize(InputArray _src, OutputArray _dst, Size dsize, double inv_scale_x, d
 {
     AscendMat src, dst;
     src.upload(_src, stream);
-    resize(src, dst, dsize, inv_scale_x, inv_scale_y, interpolation, stream);
-    dst.download(_dst, stream);
+    if (interpolation == INTER_CUBIC || interpolation == INTER_AREA)
+    {
+        resize(src, dst, dsize, inv_scale_x, inv_scale_y, interpolation, stream);
+        dst.download(_dst, stream);
+    }
+    else
+    {
+        Mat srcCV = _src.getMat();
+        Size ssize = srcCV.size();
+        checkResize(ssize, dsize, inv_scale_x, inv_scale_y, interpolation);
+        _dst.create(dsize, srcCV.type());
+        Mat dstCV = _dst.getMat();
+        resize(srcCV, dstCV, interpolation);
+    }
+}
+
+/************************** CropResize **************************/
+template <typename inMat, typename outMat>
+void cropResize(const inMat& src, outMat& dst, const Rect& rect, Size dsize, int interpolation)
+{
+    DvppOperatorDesc op;
+    op.addInput(src).addOutput(dst);
+    uint32_t taskID = 0;
+    int cnt = 1;
+
+    vpcCropResizeWarpper(op.chnId, op.inputDesc_[0].Pic, op.outputDesc_[0].Pic, cnt, &taskID, rect,
+                         dsize, interpolation);
+
+    uint32_t taskIDResult = taskID;
+    op.getResult(dst, taskIDResult);
+}
+
+void cropResize(const AscendMat& src, AscendMat& dst, const Rect& rect, Size dsize,
+                double inv_scale_x, double inv_scale_y, int interpolation)
+{
+    Size ssize = src.size();
+    checkResize(ssize, dsize, inv_scale_x, inv_scale_y, interpolation);
+    dst.create(dsize.height, dsize.width, src.type());
+    cropResize(src, dst, rect, dsize, interpolation);
+}
+
+void cropResize(const InputArray _src, OutputArray _dst, const Rect& rect, Size dsize,
+                double inv_scale_x, double inv_scale_y, int interpolation)
+{
+    Size ssize = _src.size();
+    checkResize(ssize, dsize, inv_scale_x, inv_scale_y, interpolation);
+
+    Mat src = _src.getMat();
+    _dst.create(dsize.height, dsize.width, src.type());
+    Mat dst = _dst.getMat();
+
+    cropResize(src, dst, rect, dsize, interpolation);
+}
+
+/************************** CopyMakeBorder **************************/
+template <typename inMat, typename outMat>
+void copyMakeBorder(const inMat& src, outMat& dst, int* offsets, int borderType,
+                    const Scalar& value)
+{
+    DvppOperatorDesc op;
+    op.addInput(src).addOutput(dst);
+    uint32_t taskID = 0;
+    vpcCopyMakeBorderWarpper(op.chnId, op.inputDesc_[0].Pic, op.outputDesc_[0].Pic, &taskID,
+                             offsets, borderType, value);
+
+    uint32_t taskIDResult = taskID;
+    op.getResult(dst, taskIDResult);
+}
+
+void copyMakeBorder(const AscendMat& src, AscendMat& dst, int top, int bottom, int left, int right,
+                    int borderType, const Scalar& value)
+{
+    dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
+    int offsets[] = {top, bottom, left, right};
+    copyMakeBorder(src, dst, offsets, borderType, value);
+}
+
+void copyMakeBorder(const InputArray _src, OutputArray _dst, int top, int bottom, int left,
+                    int right, int borderType, const Scalar& value)
+{
+    CV_Assert(borderType < 2);
+    Mat src = _src.getMat();
+    _dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
+    Mat dst = _dst.getMat();
+    int offsets[] = {top, bottom, left, right};
+
+    copyMakeBorder(src, dst, offsets, borderType, value);
+}
+
+/************************** CropResizeMakeBorder **************************/
+
+template <typename inMat, typename outMat>
+void cropResizeMakeBorder(const inMat& src, outMat& dst, const Rect& rect, Size dsize,
+                          int interpolation, int top, int left, const int borderType,
+                          Scalar scalarV)
+{
+    DvppOperatorDesc op;
+    op.addInput(src).addOutput(dst);
+    uint32_t taskID = 0;
+    int cnt = 1;
+    vpcCropResizeMakeBorderWarpper(op.chnId, op.inputDesc_, op.outputDesc_, cnt, &taskID, rect,
+                                   dsize, interpolation, borderType, scalarV, top, left);
+
+    uint32_t taskIDResult = taskID;
+    op.getResult(dst, taskIDResult);
+}
+
+void cropResizeMakeBorder(const AscendMat& src, AscendMat& dst, const Rect& rect, Size dsize,
+                          double inv_scale_x, double inv_scale_y, int interpolation, int top,
+                          int left, const int borderType, Scalar scalarV)
+{
+    CV_Assert(borderType < 2);
+    Size ssize = src.size();
+    checkResize(ssize, dsize, inv_scale_x, inv_scale_y, interpolation);
+    dst.create(dsize.height + top, dsize.width + left, src.type());
+
+    cropResizeMakeBorder(src, dst, rect, dsize, interpolation, top, left, borderType, scalarV);
+}
+
+void cropResizeMakeBorder(const InputArray _src, OutputArray _dst, const Rect& rect, Size dsize,
+                          double inv_scale_x, double inv_scale_y, int interpolation, int top,
+                          int left, const int borderType, Scalar scalarV)
+{
+    CV_Assert(borderType < 2);
+    Size ssize = _src.size();
+    checkResize(ssize, dsize, inv_scale_x, inv_scale_y, interpolation);
+
+    Mat src = _src.getMat();
+    _dst.create(dsize.height + top, dsize.width + left, src.type());
+    Mat dst = _dst.getMat();
+
+    cropResizeMakeBorder(src, dst, rect, dsize, interpolation, top, left, borderType, scalarV);
 }
 
 } // namespace cann
diff --git a/modules/cannops/src/dvpp_call.cpp b/modules/cannops/src/dvpp_call.cpp
new file mode 100644
index 00000000000..f81604dc258
--- /dev/null
+++ b/modules/cannops/src/dvpp_call.cpp
@@ -0,0 +1,310 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include <acl/acl.h>
+#include <acl/dvpp/hi_dvpp.h>
+#include "opencv2/dvpp_call.hpp"
+#include <iostream>
+#include <memory>
+#include <cstdarg>
+#include <string>
+
+#define unlikely(expr) __builtin_expect(!!(expr), 0)
+#define likely(expr) __builtin_expect(!!(expr), 1)
+
+namespace cv
+{
+namespace cann
+{
+
+/******************************AscendPicDesc****************************/
+AscendPicDesc& AscendPicDesc::setMemAlign()
+{
+    if (Pic.picture_format == HI_PIXEL_FORMAT_BGR_888 ||
+        Pic.picture_format == HI_PIXEL_FORMAT_RGB_888 ||
+        Pic.picture_format == HI_PIXEL_FORMAT_YUV_PACKED_444)
+    {
+        widthAlignment = 16;
+        heightAlignment = 1;
+        sizeAlignment = 3;
+        sizeNum = 3;
+    }
+    else if (Pic.picture_format == HI_PIXEL_FORMAT_YUV_400)
+    {
+        widthAlignment = 16;
+        heightAlignment = 1;
+        sizeAlignment = 1;
+        sizeNum = 1;
+    }
+    else if (Pic.picture_format == HI_PIXEL_FORMAT_ARGB_8888 ||
+             Pic.picture_format == HI_PIXEL_FORMAT_ABGR_8888 ||
+             Pic.picture_format == HI_PIXEL_FORMAT_RGBA_8888 ||
+             Pic.picture_format == HI_PIXEL_FORMAT_BGRA_8888)
+    {
+        widthAlignment = 16;
+        heightAlignment = 1;
+        sizeAlignment = 4;
+        sizeNum = 4;
+    }
+    return *this;
+}
+
+AscendPicDesc& AscendPicDesc::setPic(hi_pixel_format _picture_format)
+{
+    // set input
+    Pic.picture_format = _picture_format;
+    setMemAlign();
+    Pic.picture_width_stride = ALIGN_UP(Pic.picture_width, widthAlignment) * sizeAlignment;
+    Pic.picture_height_stride = ALIGN_UP(Pic.picture_height, heightAlignment);
+    Pic.picture_buffer_size =
+        Pic.picture_width_stride * Pic.picture_height_stride * sizeAlignment / sizeNum;
+    return *this;
+}
+
+std::shared_ptr<hi_void> AscendPicDesc::allocate()
+{
+    Pic.picture_address = nullptr;
+    uint32_t ret = hi_mpi_dvpp_malloc(0, &Pic.picture_address, Pic.picture_buffer_size);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to malloc mem on dvpp");
+
+    return std::shared_ptr<hi_void>(Pic.picture_address, [](void* ptr) { hi_mpi_dvpp_free(ptr); });
+}
+
+AscendPicDesc::AscendPicDesc(const AscendMat& ascendMat, hi_pixel_format _picture_format)
+{
+    Pic.picture_width = ascendMat.cols;
+    Pic.picture_height = ascendMat.rows;
+    setPic(_picture_format);
+    data = allocate();
+}
+
+AscendPicDesc::AscendPicDesc(const Mat& mat, hi_pixel_format _picture_format)
+{
+    Pic.picture_width = mat.cols;
+    Pic.picture_height = mat.rows;
+    setPic(_picture_format);
+    data = allocate();
+}
+
+/******************************hi_mpi_vpc warppers****************************/
+void vpcCropResizeWarpper(hi_vpc_chn chnId, hi_vpc_pic_info& inPic, hi_vpc_pic_info& outPic,
+                          int cnt, uint32_t* taskID, const Rect& rect, Size dsize,
+                          int interpolation)
+{
+    hi_vpc_crop_region cropRegion = {.top_offset = static_cast<hi_u32>(rect.y),
+                                     .left_offset = static_cast<hi_u32>(rect.x),
+                                     .crop_width = static_cast<hi_u32>(rect.width),
+                                     .crop_height = static_cast<hi_u32>(rect.height)};
+
+    hi_vpc_resize_info resize_info = {.resize_width = static_cast<hi_u32>(dsize.width),
+                                      .resize_height = static_cast<hi_u32>(dsize.height),
+                                      .interpolation = static_cast<hi_u32>(interpolation)};
+    hi_vpc_crop_resize_region crop_resize_info[1];
+    crop_resize_info[0].dest_pic_info = outPic;
+    crop_resize_info[0].crop_region = cropRegion;
+    crop_resize_info[0].resize_info = resize_info;
+    uint32_t ret = hi_mpi_vpc_crop_resize(chnId, (const hi_vpc_pic_info*)&inPic, crop_resize_info,
+                                          cnt, taskID, -1);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to crop and resize image");
+}
+
+void vpcCopyMakeBorderWarpper(hi_vpc_chn chnId, hi_vpc_pic_info& inPic, hi_vpc_pic_info& outPic,
+                              uint32_t* taskID, int* offsets, int bordertype, Scalar value)
+{
+    hi_vpc_make_border_info make_border_info;
+    make_border_info = {.top = static_cast<hi_u32>(offsets[0]),
+                        .bottom = static_cast<hi_u32>(offsets[1]),
+                        .left = static_cast<hi_u32>(offsets[2]),
+                        .right = static_cast<hi_u32>(offsets[3]),
+                        .border_type = saturate_cast<hi_vpc_bord_type>(bordertype)};
+    if (outPic.picture_format == HI_PIXEL_FORMAT_BGR_888)
+    {
+        make_border_info.scalar_value.val[0] = value[2];
+        make_border_info.scalar_value.val[1] = value[1];
+        make_border_info.scalar_value.val[2] = value[0];
+    }
+    else if (outPic.picture_format == HI_PIXEL_FORMAT_YUV_400)
+    {
+        make_border_info.scalar_value.val[0] = value[0];
+        make_border_info.scalar_value.val[1] = value[1];
+        make_border_info.scalar_value.val[2] = value[2];
+    }
+    make_border_info.scalar_value.val[3] = value[3];
+    uint32_t ret = hi_mpi_vpc_copy_make_border(chnId, (const hi_vpc_pic_info*)&inPic, &outPic,
+                                               make_border_info, taskID, -1);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to crop and resize image");
+}
+
+void setBatchCropResizeMakeBorder(std::vector<AscendPicDesc>& outPicDesc,
+                                  hi_vpc_crop_resize_border_region crop_resize_make_border_info[],
+                                  const Rect& rect, Size dsize, int interpolation,
+                                  const int borderType, Scalar scalarV, int top, int left,
+                                  int batchSize)
+{
+    hi_vpc_crop_region cropRegion = {.top_offset = static_cast<hi_u32>(rect.y),
+                                     .left_offset = static_cast<hi_u32>(rect.x),
+                                     .crop_width = static_cast<hi_u32>(rect.width),
+                                     .crop_height = static_cast<hi_u32>(rect.height)};
+
+    hi_vpc_resize_info resize_info = {.resize_width = static_cast<hi_u32>(dsize.width),
+                                      .resize_height = static_cast<hi_u32>(dsize.height),
+                                      .interpolation = static_cast<hi_u32>(interpolation)};
+    for (int i = 0; i < batchSize; i++)
+    {
+        crop_resize_make_border_info[i].dest_pic_info = outPicDesc[i].Pic;
+        crop_resize_make_border_info[i].crop_region = cropRegion;
+        crop_resize_make_border_info[i].resize_info = resize_info;
+        crop_resize_make_border_info[i].dest_top_offset = top;
+        crop_resize_make_border_info[i].dest_left_offset = left;
+        crop_resize_make_border_info[i].border_type = static_cast<hi_vpc_bord_type>(borderType);
+        if (crop_resize_make_border_info[i].dest_pic_info.picture_format == HI_PIXEL_FORMAT_BGR_888)
+        {
+            crop_resize_make_border_info[i].scalar_value.val[0] = scalarV[2];
+            crop_resize_make_border_info[i].scalar_value.val[1] = scalarV[1];
+            crop_resize_make_border_info[i].scalar_value.val[2] = scalarV[0];
+        }
+        else if (crop_resize_make_border_info[i].dest_pic_info.picture_format ==
+                 HI_PIXEL_FORMAT_YUV_400)
+        {
+            crop_resize_make_border_info[i].scalar_value.val[0] = scalarV[0];
+            crop_resize_make_border_info[i].scalar_value.val[1] = scalarV[1];
+            crop_resize_make_border_info[i].scalar_value.val[2] = scalarV[2];
+        }
+        crop_resize_make_border_info[i].scalar_value.val[3] = scalarV[3];
+    }
+}
+
+void vpcCropResizeMakeBorderWarpper(hi_vpc_chn chnId, std::vector<AscendPicDesc>& inPicDesc,
+                                    std::vector<AscendPicDesc>& outPicDesc, int cnt,
+                                    uint32_t* taskID, const Rect& rect, Size dsize,
+                                    int interpolation, const int borderType, Scalar scalarV,
+                                    int top, int left)
+{
+    hi_vpc_crop_resize_border_region crop_resize_make_border_info[1];
+
+    setBatchCropResizeMakeBorder(outPicDesc, crop_resize_make_border_info, rect, dsize,
+                                 interpolation, borderType, scalarV, top, left, 1);
+    uint32_t ret =
+        hi_mpi_vpc_crop_resize_make_border(chnId, (const hi_vpc_pic_info*)&inPicDesc[0].Pic,
+                                           crop_resize_make_border_info, cnt, taskID, -1);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to crop, resize and make border of image");
+}
+
+/******************************DvppOperatorDesc****************************/
+DvppOperatorDesc& DvppOperatorDesc::reset()
+{
+    uint32_t ret = hi_mpi_vpc_destroy_chn(chnId);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to destory DVPP vpc channel");
+    inputDesc_.clear();
+    outputDesc_.clear();
+    holder.clear();
+    return *this;
+}
+void initDvpp() { hi_mpi_sys_init(); }
+
+void finalizeDvpp() { hi_mpi_sys_exit(); }
+
+DvppOperatorDesc& DvppOperatorDesc::createChannel()
+{
+    uint32_t ret = hi_mpi_vpc_sys_create_chn(&chnId, &stChnAttr);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to create DVPP vpc channel");
+    return *this;
+}
+
+// copy input array to dvpp memory
+DvppOperatorDesc& DvppOperatorDesc::addInput(AscendPicDesc& picDesc)
+{
+    inputDesc_.push_back(picDesc);
+    holder.insert(picDesc.data);
+    return *this;
+}
+
+template <typename inMat>
+hi_pixel_format setPixelFormat(const inMat& mat)
+{
+    CV_Assert(mat.channels() == 3 || mat.channels() == 1);
+    hi_pixel_format _picture_format;
+    if (mat.channels() == 3)
+    {
+        _picture_format = HI_PIXEL_FORMAT_BGR_888;
+    }
+    else if (mat.channels() == 1)
+    {
+        _picture_format = HI_PIXEL_FORMAT_YUV_400;
+    }
+    return _picture_format;
+}
+
+DvppOperatorDesc& DvppOperatorDesc::addInput(const AscendMat& mat)
+{
+    Mat matHost;
+    mat.download(matHost);
+    return addInput(matHost);
+}
+
+DvppOperatorDesc& DvppOperatorDesc::addInput(const Mat& mat)
+{
+    hi_pixel_format _picture_format = setPixelFormat(mat);
+
+    AscendPicDesc picDesc(mat, _picture_format);
+    aclrtMemcpy2d(picDesc.Pic.picture_address, picDesc.Pic.picture_width_stride, mat.data,
+                  mat.step[0], mat.step[0], picDesc.Pic.picture_height, ACL_MEMCPY_HOST_TO_DEVICE);
+
+    return addInput(picDesc);
+}
+
+// malloc memory for output
+DvppOperatorDesc& DvppOperatorDesc::addOutput(AscendPicDesc& picDesc)
+{
+    outputDesc_.push_back(picDesc);
+    holder.insert(picDesc.data);
+    return *this;
+}
+
+DvppOperatorDesc& DvppOperatorDesc::addOutput(AscendMat& mat)
+{
+    hi_pixel_format _picture_format = setPixelFormat(mat);
+    AscendPicDesc picDesc(mat, _picture_format);
+    return addOutput(picDesc);
+}
+
+DvppOperatorDesc& DvppOperatorDesc::addOutput(Mat& mat)
+{
+    hi_pixel_format _picture_format = setPixelFormat(mat);
+    AscendPicDesc picDesc(mat, _picture_format);
+    return addOutput(picDesc);
+}
+
+// get process result and copy it to host/device
+DvppOperatorDesc& DvppOperatorDesc::getResult(Mat& dst, uint32_t& taskIDResult)
+{
+    uint32_t ret = hi_mpi_vpc_get_process_result(chnId, taskIDResult, -1);
+    if (ret != HI_SUCCESS)
+        CV_Error(Error::StsBadFlag, "failed to get process result.");
+    const uint32_t esz = CV_ELEM_SIZE(dst.type());
+    size_t step = esz * dst.cols;
+
+    aclrtMemcpy2d(dst.data, dst.step[0], outputDesc_[0].Pic.picture_address,
+                  outputDesc_[0].Pic.picture_width_stride, dst.step[0],
+                  outputDesc_[0].Pic.picture_height, ACL_MEMCPY_DEVICE_TO_HOST);
+    return *this;
+}
+
+DvppOperatorDesc& DvppOperatorDesc::getResult(AscendMat& dst, uint32_t& taskIDResult)
+{
+    Mat matHost;
+    matHost.create(dst.rows, dst.cols, dst.type());
+    getResult(matHost, taskIDResult);
+    dst.upload(matHost);
+    return *this;
+}
+
+} // namespace cann
+} // namespace cv
diff --git a/modules/cannops/src/element_operations.cpp b/modules/cannops/src/element_operations.cpp
index 402658369b5..cacf6e6cff1 100644
--- a/modules/cannops/src/element_operations.cpp
+++ b/modules/cannops/src/element_operations.cpp
@@ -3,6 +3,7 @@
 // of this distribution and at http://opencv.org/license.html.
 
 #include "precomp.hpp"
+
 namespace cv
 {
 namespace cann
@@ -110,8 +111,8 @@ static void convert(const Scalar& src, Scalar& dst, AscendStream& stream)
 }
 
 template <typename T1, typename T2>
-static void arithm_op(const T1& src1, const T2& src2, AscendMat& dst, const AscendMat& mask, float scale,
-                      int dtype, const char* op, AscendStream& stream)
+static void arithm_op(const T1& src1, const T2& src2, AscendMat& dst, const AscendMat& mask,
+                      float scale, int dtype, const char* op, AscendStream& stream)
 {
     T1 castedSrc1;
     T2 castedSrc2;
@@ -170,8 +171,9 @@ static void arithm_op(const T1& src1, const T2& src2, AscendMat& dst, const Asce
     }
 }
 
-static void arithm_op(const InputArray _src1, const InputArray _src2, OutputArray _dst, const InputArray _mask,
-                      float scale, int dtype, const char* op, AscendStream& stream)
+static void arithm_op(const InputArray _src1, const InputArray _src2, OutputArray _dst,
+                      const InputArray _mask, float scale, int dtype, const char* op,
+                      AscendStream& stream)
 {
     const bool isScalar1 = (_src1.kind() == _InputArray::MATX);
     const bool isScalar2 = (_src2.kind() == _InputArray::MATX);
@@ -213,56 +215,54 @@ static void arithm_op(const InputArray _src1, const InputArray _src2, OutputArra
 }
 
 // In order to supply more interfaces, differnet function declaration shoule be done.
-void add(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask, int dtype,
-         AscendStream& stream)
+void add(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask,
+         int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Add", stream);
 }
 
-void add(const AscendMat& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask, int dtype,
-         AscendStream& stream)
+void add(const AscendMat& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask,
+         int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Add", stream);
 }
 
-void add(const AscendMat& src1, const Scalar& src2, AscendMat& dst, const AscendMat& mask, int dtype,
-         AscendStream& stream)
+void add(const AscendMat& src1, const Scalar& src2, AscendMat& dst, const AscendMat& mask,
+         int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Add", stream);
 }
 
-void add(const Scalar& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask, int dtype,
-         AscendStream& stream)
+void add(const Scalar& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask,
+         int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Add", stream);
 }
 
-
-void subtract(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask, int dtype,
-              AscendStream& stream)
+void subtract(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask,
+              int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Sub", stream);
 }
 
-void subtract(const AscendMat& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask, int dtype,
-              AscendStream& stream)
+void subtract(const AscendMat& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask,
+              int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Sub", stream);
 }
 
-void subtract(const AscendMat& src1, const Scalar& src2, AscendMat& dst, const AscendMat& mask, int dtype,
-              AscendStream& stream)
+void subtract(const AscendMat& src1, const Scalar& src2, AscendMat& dst, const AscendMat& mask,
+              int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Sub", stream);
 }
 
-void subtract(const Scalar& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask, int dtype,
-              AscendStream& stream)
+void subtract(const Scalar& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask,
+              int dtype, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, dtype, "Sub", stream);
 }
 
-
 void multiply(const InputArray src1, const InputArray src2, OutputArray dst, float scale, int dtype,
               AscendStream& stream)
 {
@@ -287,7 +287,6 @@ void multiply(const Scalar& src1, const AscendMat& src2, AscendMat& dst, float s
     arithm_op(src1, src2, dst, AscendMat(), scale, dtype, "Mul", stream);
 }
 
-
 void divide(const InputArray src1, const InputArray src2, OutputArray dst, float scale, int dtype,
             AscendStream& stream)
 {
@@ -312,15 +311,14 @@ void divide(const Scalar& src1, const AscendMat& src2, AscendMat& dst, float sca
     arithm_op(src1, src2, dst, AscendMat(), scale, dtype, "RealDiv", stream);
 }
 
-
-void bitwise_and(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask,
-                 AscendStream& stream)
+void bitwise_and(const InputArray src1, const InputArray src2, OutputArray dst,
+                 const InputArray mask, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseAnd", stream);
 }
 
-void bitwise_and(const AscendMat& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask,
-                 AscendStream& stream)
+void bitwise_and(const AscendMat& src1, const AscendMat& src2, AscendMat& dst,
+                 const AscendMat& mask, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseAnd", stream);
 }
@@ -337,9 +335,8 @@ void bitwise_and(const Scalar& src1, const AscendMat& src2, AscendMat& dst, cons
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseAnd", stream);
 }
 
-
-void bitwise_or(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask,
-                AscendStream& stream)
+void bitwise_or(const InputArray src1, const InputArray src2, OutputArray dst,
+                const InputArray mask, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseOr", stream);
 }
@@ -362,15 +359,14 @@ void bitwise_or(const Scalar& src1, const AscendMat& src2, AscendMat& dst, const
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseOr", stream);
 }
 
-
-void bitwise_xor(const InputArray src1, const InputArray src2, OutputArray dst, const InputArray mask,
-                 AscendStream& stream)
+void bitwise_xor(const InputArray src1, const InputArray src2, OutputArray dst,
+                 const InputArray mask, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseXor", stream);
 }
 
-void bitwise_xor(const AscendMat& src1, const AscendMat& src2, AscendMat& dst, const AscendMat& mask,
-                 AscendStream& stream)
+void bitwise_xor(const AscendMat& src1, const AscendMat& src2, AscendMat& dst,
+                 const AscendMat& mask, AscendStream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseXor", stream);
 }
@@ -387,7 +383,6 @@ void bitwise_xor(const Scalar& src1, const AscendMat& src2, AscendMat& dst, cons
     arithm_op(src1, src2, dst, mask, 1, -1, "BitwiseXor", stream);
 }
 
-
 void bitwise_not(const InputArray src, OutputArray dst, const InputArray mask, AscendStream& stream)
 {
     arithm_op(src, noArray(), dst, mask, 1, -1, "Invert", stream);
@@ -398,9 +393,8 @@ void bitwise_not(const AscendMat& src, AscendMat& dst, const AscendMat& mask, As
     arithm_op(src, AscendMat(), dst, mask, 1, -1, "Invert", stream);
 }
 
-
-void addWeighted(const AscendMat& src1, double alpha, const AscendMat& src2, double beta, double gamma,
-                 AscendMat& dst, int dtype, AscendStream& stream)
+void addWeighted(const AscendMat& src1, double alpha, const AscendMat& src2, double beta,
+                 double gamma, AscendMat& dst, int dtype, AscendStream& stream)
 {
     if (dtype < 0)
         dtype = src1.depth();
@@ -421,8 +415,8 @@ void addWeighted(const AscendMat& src1, double alpha, const AscendMat& src2, dou
     arithm_op(srcWeightedSumRet, (float)gamma, dst, "Adds", stream);
 }
 
-void addWeighted(const InputArray _src1, double alpha, const InputArray _src2, double beta, double gamma,
-                 OutputArray _dst, int dtype, AscendStream& stream)
+void addWeighted(const InputArray _src1, double alpha, const InputArray _src2, double beta,
+                 double gamma, OutputArray _dst, int dtype, AscendStream& stream)
 {
     AscendMat src1, src2, dst;
     src1.upload(_src1, stream);
@@ -442,45 +436,23 @@ double threshold(const AscendMat& src, AscendMat& dst, double thresh, double max
 
     dst.create(src.rows, src.cols, src.type());
 
-    OperatorRunner runner;
-    runner.setOp("Threshold")
-        .addInput(src, "x")
-        .addOutput(threshMat, "y")
-        .addAttr((float)thresh, "threshold")
-        .run(stream);
-
-    // THRESH_*_INV, THRESH_TRUNC need a inverse threshMat.
-    // THRESH_BINARY_INV = 1, THRESH_TRUNC = 2, THRESH_TOZERO_INV = 4,
-    if (type == 1 || type == 2 || type == 4)
+    if (src.depth() == CV_8U || src.depth() == CV_8S || src.depth() == CV_16S ||
+        src.depth() == CV_32S || src.depth() == CV_32F || src.depth() == CV_16F)
     {
-        AscendMat threshInvMat(src.size(), src.type());
-        AscendMat ones(src.size(), src.type());
-        Scalar s(1, 1, 1, 1);
-        ones.setTo(s, stream);
-        arithm_op(ones, threshMat, threshInvMat, "Sub", stream);
-
-        if (type == 1)
-            arithm_op(threshInvMat, (float)maxval, dst, "Muls", stream);
-        else if (type == 2)
-        {
-            AscendMat ToZeroInvMat(src.size(), src.type());
-            AscendMat TruncMat(src.size(), src.type());
-            arithm_op(threshInvMat, src, ToZeroInvMat, "Mul", stream);
-            arithm_op(threshMat, (float)thresh, TruncMat, "Muls", stream);
-            arithm_op(ToZeroInvMat, TruncMat, dst, "Add", stream);
-        }
-        else
-            arithm_op(threshInvMat, src, dst, "Mul", stream);
+        ThresholdOpencvTilingData tiling;
+        tiling.maxVal = maxval;
+        tiling.thresh = thresh;
+        // AscendMat memory will be align to 32B, it's safe to set totalLengh a little bigger.
+        size_t totalBytes = src.rows * src.cols * src.channels();
+        tiling.totalLength = ALIGN_UP(totalBytes, 32);
+        tiling.threshType = type;
+        tiling.dtype = src.depth();
+
+        kernel_launch(aclrtlaunch_threshold_opencv, stream, tiling, src.data.get(), dst.data.get());
     }
     else
-    {
-        if (type == 0) /* THRESH_BINARY = 0 */
-            arithm_op(threshMat, (float)maxval, dst, "Muls", stream);
-        else if (type == 3) /* THRESH_TOZERO = 3 */
-            arithm_op(threshMat, src, dst, "Mul", stream);
-        else
-            CV_Error(Error::StsError, "Unknown/unsupported threshold type");
-    }
+        CV_Error(Error::StsUnsupportedFormat, "");
+
     return thresh;
 }
 
diff --git a/modules/cannops/src/precomp.hpp b/modules/cannops/src/precomp.hpp
index 8411cc40407..fe81c8a42cb 100644
--- a/modules/cannops/src/precomp.hpp
+++ b/modules/cannops/src/precomp.hpp
@@ -10,5 +10,8 @@
 #include "opencv2/cann_call.hpp"
 #include "opencv2/cann_interface.hpp"
 #include "opencv2/cann_private.hpp"
+#include "opencv2/dvpp_call.hpp"
+#include "opencv2/ascendc_kernels.hpp"
+#define ALIGN_UP(num, align) (((num) + (align) - 1) & ~((align) - 1))
 
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/cannops/test/test_core.cpp b/modules/cannops/test/test_core.cpp
index 6b63a8cf061..98d554335aa 100644
--- a/modules/cannops/test/test_core.cpp
+++ b/modules/cannops/test/test_core.cpp
@@ -212,6 +212,128 @@ TEST(CORE, RESIZE)
     cv::cann::resetDevice();
 }
 
+TEST(CORE, RESIZE_NEW)
+{
+    Mat resized_cv, checker;
+    Mat cpuMat = randomMat(1280, 1706, CV_8UC3, 100.0, 255.0);
+    Size dsize = Size(768, 832);
+    // add support for {0 INTER_NEAREST} and {1 INTER_LINEAR}
+    // only the resize result of INTER_LINEAR is close to CV's.
+    int interpolation = 1;
+    cv::resize(cpuMat, resized_cv, dsize, 0, 0, interpolation);
+    cv::cann::resize(cpuMat, checker, dsize, 0, 0, interpolation);
+    EXPECT_MAT_NEAR(resized_cv, checker, 1);
+
+    cv::resize(cpuMat, resized_cv, Size(), 0.5, 0.5, interpolation);
+    cv::cann::resize(cpuMat, checker, Size(), 0.5, 0.5, interpolation);
+    EXPECT_MAT_NEAR(resized_cv, checker, 1);
+
+    AscendMat npuMat, npuChecker;
+    npuMat.upload(cpuMat);
+    cv::resize(cpuMat, resized_cv, dsize, 0, 0, interpolation);
+    cv::cann::resize(npuMat, npuChecker, dsize, 0, 0, interpolation);
+    npuChecker.download(checker);
+    EXPECT_MAT_NEAR(resized_cv, checker, 1);
+
+    cv::resize(cpuMat, resized_cv, Size(), 0.5, 0.5, interpolation);
+    cv::cann::resize(npuMat, npuChecker, Size(), 0.5, 0.5, interpolation);
+    npuChecker.download(checker);
+    EXPECT_MAT_NEAR(resized_cv, checker, 1);
+}
+
+TEST(CORE, CROP_RESIZE)
+{
+    Mat cpuMat = randomMat(1280, 1706, CV_8UC1, 100.0, 255.0);
+    Mat resized_cv, checker, cpuOpRet;
+    Size dsize = Size(496, 512);
+    const Rect b(300, 500, 224, 256);
+
+    cv::cann::cropResize(cpuMat, checker, b, dsize, 0, 0, 1);
+    Mat cropped_cv(cpuMat, b);
+    cv::resize(cropped_cv, cpuOpRet, dsize, 0, 0, 1);
+    EXPECT_MAT_NEAR(checker, cpuOpRet, 1);
+
+    AscendMat npuMat, npuChecker;
+    npuMat.upload(cpuMat);
+    cv::cann::cropResize(npuMat, npuChecker, b, dsize, 0, 0, 1);
+    npuChecker.download(checker);
+    EXPECT_MAT_NEAR(cpuOpRet, checker, 1);
+}
+TEST(CORE, CROP_RESIZE_MAKE_BORDER)
+{
+    Mat cpuMat = randomMat(1024, 896, CV_8UC1, 100.0, 255.0);
+
+    Mat resized_cv, checker, cpuOpRet;
+    Size dsize = Size(320, 256);
+    const Rect b(300, 500, 496, 512);
+    RNG rng(12345);
+    float scalarV[3] = {0, 0, 255};
+    int top, bottom, left, right;
+    top = 54;
+    bottom = 0;
+    left = 32;
+    right = 0;
+    int interpolation = 1;
+
+    Scalar value = {scalarV[0], scalarV[1], scalarV[2], 0};
+    for (int borderType = 0; borderType < 2; borderType++)
+    {
+        cv::cann::cropResizeMakeBorder(cpuMat, checker, b, dsize, 0, 0, interpolation, top, left,
+                                       borderType, value);
+        Mat cropped_cv(cpuMat, b);
+        cv::resize(cropped_cv, resized_cv, dsize, 0, 0, interpolation);
+        cv::copyMakeBorder(resized_cv, cpuOpRet, top, bottom, left, right, borderType, value);
+        EXPECT_MAT_NEAR(checker, cpuOpRet, 1e-10);
+    }
+    AscendMat npuMat, npuChecker;
+    npuMat.upload(cpuMat);
+    for (int borderType = 0; borderType < 2; borderType++)
+    {
+        cv::cann::cropResizeMakeBorder(npuMat, npuChecker, b, dsize, 0, 0, interpolation, top, left,
+                                       borderType, value);
+        npuChecker.download(checker);
+        Mat cropped_cv(cpuMat, b);
+        cv::resize(cropped_cv, resized_cv, dsize, 0, 0, interpolation);
+        cv::copyMakeBorder(resized_cv, cpuOpRet, top, bottom, left, right, borderType, value);
+        EXPECT_MAT_NEAR(checker, cpuOpRet, 1e-10);
+    }
+}
+
+TEST(CORE, COPY_MAKE_BORDER)
+{
+    Mat cpuMat = randomMat(1280, 1706, CV_8UC3, 100, 255);
+
+    Mat cpuOpRet, checker;
+    RNG rng(12345);
+    Scalar value = {static_cast<double>(rng.uniform(0, 255)),
+                    static_cast<double>(rng.uniform(0, 255)),
+                    static_cast<double>(rng.uniform(0, 255))};
+    int top, bottom, left, right;
+    top = 20;
+    bottom = 30;
+    left = 30;
+    right = 20;
+
+    int borderType = 0;
+    for (borderType = 0; borderType < 2; borderType++)
+    {
+        cv::cann::copyMakeBorder(cpuMat, checker, top, bottom, left, right, borderType, value);
+
+        cv::copyMakeBorder(cpuMat, cpuOpRet, top, bottom, left, right, borderType, value);
+        EXPECT_MAT_NEAR(checker, cpuOpRet, 1e-10);
+    }
+
+    AscendMat npuMat, npuChecker;
+    npuMat.upload(cpuMat);
+    for (borderType = 0; borderType < 2; borderType++)
+    {
+        cv::cann::copyMakeBorder(npuMat, npuChecker, top, bottom, left, right, borderType, value);
+        npuChecker.download(checker);
+
+        cv::copyMakeBorder(cpuMat, cpuOpRet, top, bottom, left, right, borderType, value);
+        EXPECT_MAT_NEAR(checker, cpuOpRet, 1e-10);
+    }
+}
 
 } // namespace
 } // namespace opencv_test
diff --git a/modules/cannops/test/test_element_operations.cpp b/modules/cannops/test/test_element_operations.cpp
index 76c103a65f4..730d2912d95 100644
--- a/modules/cannops/test/test_element_operations.cpp
+++ b/modules/cannops/test/test_element_operations.cpp
@@ -678,7 +678,6 @@ TEST(ELEMENTWISE_OP, MAT_THRESHOLD)
     for (int i = 0; i <= 4; i++)
     {
         cv::threshold(cpuMat, cpuOpRet, 128, 250, i);
-        // TODO find the reason empty AscendMat is not continuous.
         cv::cann::threshold(ascendMat16F, aclOpRet, 128, 250, i);
         aclOpRet.convertTo(aclOpRet16S, CV_16S);
         aclOpRet16S.download(checker);
@@ -693,5 +692,37 @@ TEST(ELEMENTWISE_OP, MAT_THRESHOLD)
     cv::cann::resetDevice();
 }
 
+TEST(ELEMENTWISE_OP, MAT_THRESHOLD_ASCENDC)
+{
+    cv::cann::setDevice(DEVICE_ID);
+    Mat cpuRet, npuRet;
+    AscendMat npuImg, npuTmpMat;
+
+    // opencv do not support CV_8S, CV_32S, CV_16F
+    // ascend do not support CV_16U, CV_64F
+    uint8_t dtypes[] = {CV_8U, CV_16S, CV_32F};
+
+    for (uint i = 0; i <= 4; i++)
+    {
+        for (uint j = 0; j < sizeof(dtypes) / sizeof(dtypes[0]); j++)
+        {
+            double thresh = 90.5;
+            double maxVal = 85.2;
+
+            Mat img = randomMat(10, 10, CV_MAKETYPE(dtypes[j], 3), 0.0f, 128.0f);
+            npuImg.upload(img);
+            npuTmpMat.create(npuImg.rows, npuImg.cols, npuImg.type());
+
+            cv::threshold(img, cpuRet, thresh, maxVal, i);
+            cv::cann::threshold(npuImg, npuTmpMat, thresh, maxVal, i);
+
+            npuTmpMat.download(npuRet);
+            EXPECT_MAT_NEAR(cpuRet, npuRet, 10.0f);
+        }
+    }
+
+    cv::cann::resetDevice();
+}
+
 } // namespace
 } // namespace opencv_test
diff --git a/modules/cannops/test/test_kernel.cpp b/modules/cannops/test/test_kernel.cpp
new file mode 100644
index 00000000000..ac0996a27b6
--- /dev/null
+++ b/modules/cannops/test/test_kernel.cpp
@@ -0,0 +1,51 @@
+#include "test_precomp.hpp"
+#include "opencv2/cann_call.hpp"
+
+namespace opencv_test
+{
+namespace
+{
+
+TEST(ASCENDC_KERNEL, THRESHOLD)
+{
+    cv::cann::setDevice(DEVICE_ID);
+    Mat cpuRet, npuRet;
+    AscendMat npuImg, npuTmpMat;
+
+    // opencv do not support CV_8S, CV_32S, CV_16F
+    // ascend do not support CV_16U, CV_64F
+    uint8_t dtypes[] = {CV_8U, CV_16S, CV_32F};
+
+    for (uint i = 0; i <= 4; i++)
+    {
+        for (uint j = 0; j < sizeof(dtypes) / sizeof(dtypes[0]); j++)
+        {
+            double thresh = 90.5;
+            double maxVal = 85.2;
+
+            Mat img = randomMat(10, 10, CV_MAKETYPE(dtypes[j], 3), 0.0f, 128.0f);
+            npuImg.upload(img);
+            npuTmpMat.create(npuImg.rows, npuImg.cols, npuImg.type());
+
+            cv::threshold(img, cpuRet, thresh, maxVal, i);
+            ThresholdOpencvTilingData tiling;
+            tiling.maxVal = maxVal;
+            tiling.thresh = thresh;
+            size_t totalBytes = img.rows * img.cols * img.channels();
+            // AscendMat memory will be align to 32B, it's safe to set totalLengh a little bigger.
+            tiling.totalLength = ((totalBytes + 32) & ~31);
+            tiling.threshType = i;
+            tiling.dtype = dtypes[j];
+            kernel_launch(aclrtlaunch_threshold_opencv, AscendStream::Null(), tiling,
+                          npuImg.data.get(), npuTmpMat.data.get());
+
+            npuTmpMat.download(npuRet);
+            EXPECT_MAT_NEAR(cpuRet, npuRet, 10.0f);
+        }
+    }
+
+    cv::cann::resetDevice();
+}
+
+} // namespace
+} // namespace opencv_test
diff --git a/modules/cannops/test/test_main.cpp b/modules/cannops/test/test_main.cpp
index 202c6af27ee..d14f2a2869e 100644
--- a/modules/cannops/test/test_main.cpp
+++ b/modules/cannops/test/test_main.cpp
@@ -8,8 +8,18 @@ class CannEnvironment : public ::testing::Environment
 {
 public:
     virtual ~CannEnvironment() = default;
-    virtual void SetUp() CV_OVERRIDE { initAcl(); }
-    virtual void TearDown() CV_OVERRIDE { finalizeAcl(); }
+    virtual void SetUp() CV_OVERRIDE
+    {
+        initAcl();
+        cv::cann::setDevice(DEVICE_ID);
+        initDvpp();
+    }
+    virtual void TearDown() CV_OVERRIDE
+    {
+        finalizeAcl();
+        cv::cann::resetDevice();
+        finalizeDvpp();
+    }
 };
 
 static void initTests()
diff --git a/modules/cannops/test/test_precomp.hpp b/modules/cannops/test/test_precomp.hpp
index f7bdbea0b08..74cfcb11ee9 100644
--- a/modules/cannops/test/test_precomp.hpp
+++ b/modules/cannops/test/test_precomp.hpp
@@ -9,6 +9,7 @@
 #include "opencv2/cann.hpp"
 #include "opencv2/ts/cuda_test.hpp"
 #include "opencv2/cann_interface.hpp"
+#include "opencv2/ascendc_kernels.hpp"
 
 using namespace cv;
 using namespace cv::cann;
diff --git a/modules/cannops/tutorials/ascend_npu_image_processing.markdown b/modules/cannops/tutorials/ascend_npu_image_processing.markdown
index ed905831d31..80e54b4cc23 100644
--- a/modules/cannops/tutorials/ascend_npu_image_processing.markdown
+++ b/modules/cannops/tutorials/ascend_npu_image_processing.markdown
@@ -108,23 +108,4 @@ Results
 
 4. Upon applying the flip operation with a flip code of 0 (flipping around the x-axis), we achieve the final result:
 
-   ![puppy_processed_normalized](./puppy_processed.jpg)
-
-
-
-## Usage Limitations
-
-While Ascend supports most commonly used operators, there are still some limitations that need to be addressed.
-
-- There is no strict limit on the size of the input image used for encoding; however, it depends on the available RAM size of your device.
-- Please note that not all data types (dtypes) are supported by every operator. The current dtype limitations are outlined in the following table. We are actively working on addressing these limitations through automatic dtype conversion in an upcoming commit.
-
-
-| Operator               | Supported Dtype                                              |
-| ---------------------- | ------------------------------------------------------------ |
-| multiply (with scale)  | float16,float32,int32                                        |
-| divide (with scale)    | float16,float,int32,int8,uint8                               |
-| bitwise add/or/xor/not | int32,int16,uint16                                           |
-| flip                   | float16,float,int64,int32,int16,uint16                       |
-| transpose              | float16,float,int64,int32,int16,int8,uint64,uint32,uint16,uint8,bool |
-| rotate                 | float16,float,int64,int32,int16,uint16                       |
+   ![puppy_processed_normalized](./puppy_processed.jpg)
\ No newline at end of file
diff --git a/modules/cudev/test/test_nd.cu b/modules/cudev/test/test_nd.cu
index fc41f4b1e9d..327b14f7e3b 100644
--- a/modules/cudev/test/test_nd.cu
+++ b/modules/cudev/test/test_nd.cu
@@ -95,7 +95,7 @@ public:
 
     static void doTest3(const SizeArray& size)
     {
-        if (std::is_same<CnType, float16_t>::value) // GpuMat::convertTo is not implemented for CV_16F
+        if (std::is_same<CnType, hfloat>::value) // GpuMat::convertTo is not implemented for CV_16F
             return;
 
         const MatType gold = RandomMat(size);
@@ -134,7 +134,7 @@ public:
 
     static void doTest4(const SizeArray& size)
     {
-        if (std::is_same<CnType, float16_t>::value) // GpuMat::convertTo is not implemented for CV_16F
+        if (std::is_same<CnType, hfloat>::value) // GpuMat::convertTo is not implemented for CV_16F
             return;
 
         const MatType gold = RandomMat(size);
@@ -163,7 +163,7 @@ public:
 
     static void doTest5(const SizeArray& size)
     {
-        if (std::is_same<CnType, float16_t>::value) // GpuMat::convertTo is not implemented for CV_16F
+        if (std::is_same<CnType, hfloat>::value) // GpuMat::convertTo is not implemented for CV_16F
             return;
 
         const MatType gold = RandomMat(size);
@@ -198,7 +198,7 @@ using ElemTypes = ::testing::Types<
     Vec<int, 1>, Vec<int, 2>, Vec<int, 3>, Vec<int, 4>, // CV_32S
     Vec<float, 1>, Vec<float, 2>, Vec<float, 3>, Vec<float, 4>, // CV_32F
     Vec<double, 1>, Vec<double, 2>, Vec<double, 3>, Vec<double, 4>, //CV_64F
-    Vec<float16_t, 1>, Vec<float16_t, 2>, Vec<float16_t, 3>, Vec<float16_t, 4> // CV_16F
+    Vec<hfloat, 1>, Vec<hfloat, 2>, Vec<hfloat, 3>, Vec<hfloat, 4> // CV_16F
 >;
 
 using SizeArray = GpuMatND::SizeArray;
diff --git a/modules/mcc/include/opencv2/mcc/checker_model.hpp b/modules/mcc/include/opencv2/mcc/checker_model.hpp
index 0768c691e05..5552ea4030b 100644
--- a/modules/mcc/include/opencv2/mcc/checker_model.hpp
+++ b/modules/mcc/include/opencv2/mcc/checker_model.hpp
@@ -89,6 +89,15 @@ class CV_EXPORTS_W CChecker
 
     CV_WRAP virtual TYPECHART getTarget() = 0;
     CV_WRAP virtual std::vector<Point2f> getBox() = 0;
+
+    /** @brief Computes and returns the coordinates of the central parts of the charts modules.
+     *
+     * This method computes transformation matrix from the checkers's coordinates (`cv::mcc::CChecker::getBox()`)
+     * and find by this the coordinates of the central parts of the charts modules.
+     * It is used in `cv::mcc::CCheckerDraw::draw()` and in `ChartsRGB` calculation.
+     */
+    CV_WRAP virtual std::vector<Point2f> getColorCharts() = 0;
+
     CV_WRAP virtual Mat getChartsRGB() = 0;
     CV_WRAP virtual Mat getChartsYCbCr() = 0;
     CV_WRAP virtual float getCost() = 0;
diff --git a/modules/mcc/perf/perf_main.cpp b/modules/mcc/perf/perf_main.cpp
new file mode 100644
index 00000000000..c6d28db59f8
--- /dev/null
+++ b/modules/mcc/perf/perf_main.cpp
@@ -0,0 +1,3 @@
+#include "perf_precomp.hpp"
+
+CV_PERF_TEST_MAIN(mcc)
diff --git a/modules/mcc/perf/perf_mcc.cpp b/modules/mcc/perf/perf_mcc.cpp
new file mode 100644
index 00000000000..f5e721074b7
--- /dev/null
+++ b/modules/mcc/perf/perf_mcc.cpp
@@ -0,0 +1,51 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test
+{
+namespace
+{
+
+using namespace std;
+
+PERF_TEST(CV_mcc_perf, detect) {
+    string path = cvtest::findDataFile("cv/mcc/mcc_ccm_test.jpg");
+    Mat img = imread(path, IMREAD_COLOR);
+    Ptr<CCheckerDetector> detector = CCheckerDetector::create();
+
+    // detect MCC24 board
+    TEST_CYCLE() {
+        ASSERT_TRUE(detector->process(img, MCC24, 1, false));
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST(CV_mcc_perf, infer) {
+    // read gold chartsRGB
+    string path = cvtest::findDataFile("cv/mcc/mcc_ccm_test.yml");
+    FileStorage fs(path, FileStorage::READ);
+    Mat chartsRGB;
+    FileNode node = fs["chartsRGB"];
+    node >> chartsRGB;
+    fs.release();
+
+    // compute CCM
+    ColorCorrectionModel model(chartsRGB.col(1).clone().reshape(3, chartsRGB.rows/3) / 255., COLORCHECKER_Macbeth);
+    model.run();
+
+    Mat img(1000, 4000, CV_8UC3);
+    randu(img, 0, 255);
+    img.convertTo(img, CV_64F, 1. / 255.);
+
+    TEST_CYCLE() {
+        model.infer(img);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+} // namespace
+} // namespace opencv_test
diff --git a/modules/mcc/perf/perf_precomp.hpp b/modules/mcc/perf/perf_precomp.hpp
new file mode 100644
index 00000000000..5ef54694959
--- /dev/null
+++ b/modules/mcc/perf/perf_precomp.hpp
@@ -0,0 +1,14 @@
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/mcc.hpp"
+
+namespace opencv_test
+{
+using namespace cv::mcc;
+using namespace cv::ccm;
+using namespace perf;
+}
+
+#endif
diff --git a/modules/mcc/src/ccm.cpp b/modules/mcc/src/ccm.cpp
index 7e26d164124..aec535e05b4 100644
--- a/modules/mcc/src/ccm.cpp
+++ b/modules/mcc/src/ccm.cpp
@@ -289,14 +289,13 @@ Mat ColorCorrectionModel::infer(const Mat& img, bool islinear)
         CV_Error(Error::StsBadArg, "No CCM values!" );
     }
     Mat img_lin = (p->linear)->linearize(img);
-    Mat img_ccm(img_lin.size(), img_lin.type());
-    Mat ccm_ = p->ccm.reshape(0, p->shape / 3);
-    img_ccm = multiple(p->prepare(img_lin), ccm_);
+    Mat ccm = p->ccm.reshape(0, p->shape / 3);
+    Mat img_ccm = multiple(p->prepare(img_lin), ccm);
     if (islinear == true)
     {
         return img_ccm;
     }
-    return p->cs.fromL(img_ccm);
+    return p->cs.fromLFunc(img_ccm, img_lin);
 }
 
 void ColorCorrectionModel::Impl::getColor(CONST_COLOR constcolor)
diff --git a/modules/mcc/src/checker_detector.cpp b/modules/mcc/src/checker_detector.cpp
index dcff0a25f3b..2de276d87fc 100644
--- a/modules/mcc/src/checker_detector.cpp
+++ b/modules/mcc/src/checker_detector.cpp
@@ -511,7 +511,7 @@ void CCheckerDetectorImpl::
     if (params->minImageSize > min_size)
     {
         aspOut = (float)params->minImageSize / min_size;
-        cv::resize(bgr, bgrOut, cv::Size(int(size.width * aspOut), int(size.height * aspOut)));
+        cv::resize(bgr, bgrOut, cv::Size(int(size.width * aspOut), int(size.height * aspOut)), INTER_LINEAR_EXACT);
     }
 
     // Convert to grayscale
@@ -539,17 +539,18 @@ void CCheckerDetectorImpl::
     // number of window sizes (scales) to apply adaptive thresholding
     int nScales = (params->adaptiveThreshWinSizeMax - params->adaptiveThreshWinSizeMin) / params->adaptiveThreshWinSizeStep + 1;
     thresholdImgs.create(nScales, 1, CV_8U);
-    std::vector<cv::Mat> _thresholdImgs;
-    for (int i = 0; i < nScales; i++)
-    {
-        int currScale = params->adaptiveThreshWinSizeMin + i * params->adaptiveThreshWinSizeStep;
-
-        cv::Mat tempThresholdImg;
-        cv::adaptiveThreshold(grayscaleImg, tempThresholdImg, 255, cv::ADAPTIVE_THRESH_MEAN_C,
-                              cv::THRESH_BINARY_INV, currScale, params->adaptiveThreshConstant);
-
-        _thresholdImgs.push_back(tempThresholdImg);
-    }
+    std::vector<cv::Mat> _thresholdImgs(nScales);
+    parallel_for_(Range(0, nScales),[&](const Range& range) {
+        const int start = range.start;
+        const int end = range.end;
+        for (int i = start; i < end; i++) {
+            int currScale = params->adaptiveThreshWinSizeMin + i * params->adaptiveThreshWinSizeStep;
+            cv::Mat tempThresholdImg;
+            cv::adaptiveThreshold(grayscaleImg, tempThresholdImg, 255, ADAPTIVE_THRESH_MEAN_C,
+                                  THRESH_BINARY_INV, currScale, params->adaptiveThreshConstant);
+            _thresholdImgs[i] = tempThresholdImg;
+        }
+    });
 
     thresholdImgs.assign(_thresholdImgs);
 }
@@ -1176,31 +1177,6 @@ void CCheckerDetectorImpl::
         x_new.insert(x_new.begin() + idx + 1, (x_new[idx] + x_new[idx + 1]) / 2);
 }
 
-void CCheckerDetectorImpl::
-    transform_points_forward(InputArray T, const std::vector<cv::Point2f> &X, std::vector<cv::Point2f> &Xt)
-{
-    size_t N = X.size();
-    if (N == 0)
-        return;
-
-    Xt.clear();
-    Xt.resize(N);
-    cv::Matx31f p, xt;
-    cv::Point2f pt;
-
-    cv::Matx33f _T = T.getMat();
-    for (int i = 0; i < (int)N; i++)
-    {
-        p(0, 0) = X[i].x;
-        p(1, 0) = X[i].y;
-        p(2, 0) = 1;
-        xt = _T * p;
-        pt.x = xt(0, 0) / xt(2, 0);
-        pt.y = xt(1, 0) / xt(2, 0);
-        Xt[i] = pt;
-    }
-}
-
 void CCheckerDetectorImpl::
     transform_points_inverse(InputArray T, const std::vector<cv::Point2f> &X, std::vector<cv::Point2f> &Xt)
 {
diff --git a/modules/mcc/src/checker_detector.hpp b/modules/mcc/src/checker_detector.hpp
index 75b1644a51b..4c922b5d1d4 100644
--- a/modules/mcc/src/checker_detector.hpp
+++ b/modules/mcc/src/checker_detector.hpp
@@ -171,11 +171,6 @@ class CCheckerDetectorImpl : public CCheckerDetector
         std::vector<float> &x_new,
         float tol);
 
-    void transform_points_forward(
-        InputArray T,
-        const std::vector<cv::Point2f> &X,
-        std::vector<cv::Point2f> &Xt);
-
     void transform_points_inverse(
         InputArray T,
         const std::vector<cv::Point2f> &X,
diff --git a/modules/mcc/src/checker_model.cpp b/modules/mcc/src/checker_model.cpp
index 2062e7705e5..310ed8baf96 100644
--- a/modules/mcc/src/checker_model.cpp
+++ b/modules/mcc/src/checker_model.cpp
@@ -411,6 +411,39 @@ std::vector<Point2f> CCheckerImpl::getBox()
 {
     return box;
 }
+std::vector<Point2f> CCheckerImpl::getColorCharts()
+{
+    // color chart classic model
+    CChartModel cccm(getTarget());
+    Mat lab;
+    size_t N;
+    std::vector<Point2f> fbox = cccm.box;
+    std::vector<Point2f> cellchart = cccm.cellchart;
+    std::vector<Point2f> charts(cellchart.size());
+
+    // tranformation
+    Matx33f ccT = getPerspectiveTransform(fbox, getBox());
+
+    std::vector<Point2f> bch(4), bcht(4);
+    N = cellchart.size() / 4;
+    for (size_t i = 0, k; i < N; i++)
+    {
+        k = 4 * i;
+        for (size_t j = 0ull; j < 4ull; j++)
+            bch[j] = cellchart[k + j];
+
+        polyanticlockwise(bch);
+        transform_points_forward(ccT, bch, bcht);
+
+        Point2f c(0, 0);
+        for (size_t j = 0; j < 4; j++)
+            c += bcht[j];
+        c /= 4;
+        for (size_t j = 0ull; j < 4ull; j++)
+            charts[k+j] = ((bcht[j] - c) * 0.50) + c;
+    }
+    return charts;
+}
 Mat CCheckerImpl::getChartsRGB()
 {
     return chartsRGB;
@@ -435,70 +468,40 @@ Ptr<CCheckerDraw> CCheckerDraw::create(Ptr<CChecker> pChecker, cv::Scalar color
     return makePtr<CCheckerDrawImpl>(pChecker, color, thickness);
 }
 
-void CCheckerDrawImpl::
-    draw(InputOutputArray img)
+void CCheckerDrawImpl::draw(InputOutputArray img)
 {
-
-    // color chart classic model
-    CChartModel cccm(m_pChecker->getTarget());
-    cv::Mat lab;
-    size_t N;
-    std::vector<cv::Point2f> fbox = cccm.box;
-    std::vector<cv::Point2f> cellchart = cccm.cellchart;
-
-    // tranformation
-    cv::Matx33f ccT = cv::getPerspectiveTransform(fbox, m_pChecker->getBox());
-
-    std::vector<cv::Point2f> bch(4), bcht(4);
-    N = cellchart.size() / 4;
+    std::vector<Point2f> charts = m_pChecker->getColorCharts();
+    size_t N = charts.size() / 4;
     for (size_t i = 0, k; i < N; i++)
     {
         k = 4 * i;
-        bch[0] = cellchart[k + 0];
-        bch[1] = cellchart[k + 1];
-        bch[2] = cellchart[k + 2];
-        bch[3] = cellchart[k + 3];
-
-        polyanticlockwise(bch);
-        transform_points_forward(ccT, bch, bcht);
-
-        cv::Point2f c(0, 0);
         for (size_t j = 0; j < 4; j++)
-            c += bcht[j];
-        c /= 4;
-        for (size_t j = 0; j < 4; j++)
-            bcht[j] = ((bcht[j] - c) * 0.50) + c;
-
-        cv::line(img, bcht[0], bcht[1], m_color, m_thickness, LINE_AA);
-        cv::line(img, bcht[1], bcht[2], m_color, m_thickness, LINE_AA);
-        cv::line(img, bcht[2], bcht[3], m_color, m_thickness, LINE_AA);
-        cv::line(img, bcht[3], bcht[0], m_color, m_thickness, LINE_AA);
+            cv::line(img, charts[k+j], charts[k+((j + 1) % 4)], m_color, m_thickness, LINE_AA);
     }
 }
 
-void CCheckerDrawImpl::
-    transform_points_forward(InputArray T, const std::vector<cv::Point2f> &X, std::vector<cv::Point2f> &Xt)
+void transform_points_forward(const Matx33f& T, const std::vector<Point2f> &X, std::vector<Point2f> &Xt)
 {
-
-    cv::Matx33f _T = T.getMat();
     size_t N = X.size();
-    Xt.clear();
-    Xt.resize(N);
+    if (Xt.size() != N)
+        Xt.resize(N);
+    std::fill(Xt.begin(), Xt.end(), Point2f(0.f, 0.f));
     if (N == 0)
         return;
 
-    cv::Matx31f p, xt;
-    cv::Point2f pt;
+    Matx31f p, xt;
+    Point2f pt;
     for (size_t i = 0; i < N; i++)
     {
         p(0, 0) = X[i].x;
         p(1, 0) = X[i].y;
         p(2, 0) = 1;
-        xt = _T * p;
+        xt = T * p;
         pt.x = xt(0, 0) / xt(2, 0);
         pt.y = xt(1, 0) / xt(2, 0);
         Xt[i] = pt;
     }
 }
+
 } // namespace mcc
 } // namespace cv
diff --git a/modules/mcc/src/checker_model.hpp b/modules/mcc/src/checker_model.hpp
index 31b85a5a144..4f116a8bdf5 100644
--- a/modules/mcc/src/checker_model.hpp
+++ b/modules/mcc/src/checker_model.hpp
@@ -137,6 +137,7 @@ class CCheckerImpl : public CChecker
 
     TYPECHART getTarget() CV_OVERRIDE;
     std::vector<Point2f> getBox() CV_OVERRIDE;
+    std::vector<Point2f> getColorCharts() CV_OVERRIDE;
     Mat getChartsRGB() CV_OVERRIDE;
     Mat getChartsYCbCr() CV_OVERRIDE;
     float getCost() CV_OVERRIDE;
@@ -173,16 +174,11 @@ class CCheckerDrawImpl : public CCheckerDraw
     Ptr<CChecker> m_pChecker;
     cv::Scalar m_color;
     int m_thickness;
-
-private:
-    /** \brief transformation perspetive*/
-    void transform_points_forward(
-        InputArray T,
-        const std::vector<cv::Point2f> &X,
-        std::vector<cv::Point2f> &Xt);
 };
 // @}
 
+void transform_points_forward(const Matx33f& T, const std::vector<Point2f> &X, std::vector<Point2f> &Xt);
+
 } // namespace mcc
 } // namespace cv
 
diff --git a/modules/mcc/src/colorspace.cpp b/modules/mcc/src/colorspace.cpp
index 40fa5364bef..59b7a068b53 100644
--- a/modules/mcc/src/colorspace.cpp
+++ b/modules/mcc/src/colorspace.cpp
@@ -83,9 +83,9 @@ Operations RGBBase_::relation(const ColorSpace& other) const
     }
     if (linear)
     {
-        return Operations({ Operation(fromL) });
+        return Operations({ Operation([this](Mat rgbl) -> Mat { return fromLFunc(rgbl); }) });
     }
-    return Operations({ Operation(toL) });
+    return Operations({ Operation([this](Mat rgb) -> Mat { return toLFunc(rgb); })});
 }
 
 /* @brief Initial operations.
@@ -135,12 +135,6 @@ void RGBBase_::calM()
  */
 void RGBBase_::calOperations()
 {
-    // rgb -> rgbl
-    toL = [this](Mat rgb) -> Mat { return toLFunc(rgb); };
-
-    // rgbl -> rgb
-    fromL = [this](Mat rgbl) -> Mat { return fromLFunc(rgbl); };
-
     if (linear)
     {
         to = Operations({ Operation(M_to.t()) });
@@ -148,23 +142,25 @@ void RGBBase_::calOperations()
     }
     else
     {
-        to = Operations({ Operation(toL), Operation(M_to.t()) });
-        from = Operations({ Operation(M_from.t()), Operation(fromL) });
+        // rgb -> rgbl
+        to = Operations({ Operation([this](Mat rgb) -> Mat { return toLFunc(rgb); }), Operation(M_to.t()) });
+        // rgbl -> rgb
+        from = Operations({ Operation(M_from.t()), Operation([this](Mat rgbl) -> Mat { return fromLFunc(rgbl); }) });
     }
 }
 
-Mat RGBBase_::toLFunc(Mat& /*rgb*/) { return Mat(); }
+Mat RGBBase_::toLFunc(Mat& /*rgb*/) const { return Mat(); }
 
-Mat RGBBase_::fromLFunc(Mat& /*rgbl*/) { return Mat(); }
+Mat RGBBase_::fromLFunc(Mat& /*rgbl*/, Mat dst) const { return dst; }
 
 /* @brief Base of Adobe RGB color space;
  */
 
-Mat AdobeRGBBase_::toLFunc(Mat& rgb) { return gammaCorrection(rgb, gamma); }
+Mat AdobeRGBBase_::toLFunc(Mat& rgb) const { return gammaCorrection(rgb, gamma); }
 
-Mat AdobeRGBBase_::fromLFunc(Mat& rgbl)
+Mat AdobeRGBBase_::fromLFunc(Mat& rgbl, Mat dst) const
 {
-    return gammaCorrection(rgbl, 1. / gamma);
+    return gammaCorrection(rgbl, 1. / gamma, dst);
 }
 
 /* @brief Base of sRGB color space;
@@ -180,7 +176,7 @@ void sRGBBase_::calLinear()
 
 /* @brief Used by toLFunc.
  */
-double sRGBBase_::toLFuncEW(double& x)
+double sRGBBase_::toLFuncEW(double& x) const
 {
     if (x > K0)
     {
@@ -200,7 +196,7 @@ double sRGBBase_::toLFuncEW(double& x)
  * @param rgb the input array, type of cv::Mat.
  * @return the output array, type of cv::Mat.
  */
-Mat sRGBBase_::toLFunc(Mat& rgb)
+Mat sRGBBase_::toLFunc(Mat& rgb) const
 {
     return elementWise(rgb,
             [this](double a_) -> double { return toLFuncEW(a_); });
@@ -208,7 +204,7 @@ Mat sRGBBase_::toLFunc(Mat& rgb)
 
 /* @brief Used by fromLFunc.
  */
-double sRGBBase_::fromLFuncEW(double& x)
+double sRGBBase_::fromLFuncEW(const double& x) const
 {
     if (x > beta)
     {
@@ -228,10 +224,9 @@ double sRGBBase_::fromLFuncEW(double& x)
  * @param rgbl the input array, type of cv::Mat.
  * @return the output array, type of cv::Mat.
  */
-Mat sRGBBase_::fromLFunc(Mat& rgbl)
+Mat sRGBBase_::fromLFunc(Mat& rgbl, Mat dst) const
 {
-    return elementWise(rgbl,
-            [this](double a_) -> double { return fromLFuncEW(a_); });
+    return elementWise(rgbl, [this](double a_) -> double { return fromLFuncEW(a_); }, dst);
 }
 
 /* @brief sRGB color space.
diff --git a/modules/mcc/src/colorspace.hpp b/modules/mcc/src/colorspace.hpp
index 57b5bc2ff40..572fea38781 100644
--- a/modules/mcc/src/colorspace.hpp
+++ b/modules/mcc/src/colorspace.hpp
@@ -83,8 +83,6 @@ class RGBBase_ : public ColorSpace
     double yg;
     double xb;
     double yb;
-    MatFunc toL;
-    MatFunc fromL;
     Mat M_to;
     Mat M_from;
 
@@ -108,6 +106,9 @@ class RGBBase_ : public ColorSpace
     */
     void bind(RGBBase_& rgbl);
 
+    virtual Mat toLFunc(Mat& /*rgb*/) const;
+
+    virtual Mat fromLFunc(Mat& /*rgbl*/, Mat dst=Mat()) const;
 private:
     virtual void setParameter() {};
 
@@ -120,10 +121,6 @@ class RGBBase_ : public ColorSpace
     virtual void calOperations();
 
     virtual void calLinear() {};
-
-    virtual Mat toLFunc(Mat& /*rgb*/);
-
-    virtual Mat fromLFunc(Mat& /*rgbl*/);
 };
 
 /** @brief Base of Adobe RGB color space;
@@ -136,8 +133,8 @@ class AdobeRGBBase_ : public RGBBase_
     double gamma;
 
 private:
-    Mat toLFunc(Mat& rgb) CV_OVERRIDE;
-    Mat fromLFunc(Mat& rgbl) CV_OVERRIDE;
+    Mat toLFunc(Mat& rgb) const CV_OVERRIDE;
+    Mat fromLFunc(Mat& rgbl, Mat dst=Mat()) const CV_OVERRIDE;
 };
 
 /** @brief Base of sRGB color space;
@@ -160,23 +157,23 @@ class sRGBBase_ : public RGBBase_
     virtual void calLinear() CV_OVERRIDE;
     /** @brief Used by toLFunc.
     */
-    double toLFuncEW(double& x);
+    double toLFuncEW(double& x) const;
 
     /** @brief Linearization.
         @param rgb the input array, type of cv::Mat.
         @return the output array, type of cv::Mat.
     */
-    Mat toLFunc(Mat& rgb) CV_OVERRIDE;
+    Mat toLFunc(Mat& rgb) const CV_OVERRIDE;
 
     /** @brief Used by fromLFunc.
     */
-    double fromLFuncEW(double& x);
+    double fromLFuncEW(const double& x) const;
 
     /** @brief Delinearization.
         @param rgbl the input array, type of cv::Mat.
         @return the output array, type of cv::Mat.
     */
-    Mat fromLFunc(Mat& rgbl) CV_OVERRIDE;
+    Mat fromLFunc(Mat& rgbl, Mat dst=Mat()) const CV_OVERRIDE;
 };
 
 /** @brief sRGB color space.
diff --git a/modules/mcc/src/utils.cpp b/modules/mcc/src/utils.cpp
index 3a0128b6ef6..ceac095a5ab 100644
--- a/modules/mcc/src/utils.cpp
+++ b/modules/mcc/src/utils.cpp
@@ -30,14 +30,14 @@
 namespace cv {
 namespace ccm {
 
-double gammaCorrection_(const double& element, const double& gamma)
+inline double gammaCorrection_(const double& element, const double& gamma)
 {
     return (element >= 0 ? pow(element, gamma) : -pow((-element), gamma));
 }
 
-Mat gammaCorrection(const Mat& src, const double& gamma)
+Mat gammaCorrection(const Mat& src, const double& gamma, Mat dst)
 {
-    return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); });
+    return elementWise(src, [gamma](double element) -> double { return gammaCorrection_(element, gamma); }, dst);
 }
 
 Mat maskCopyTo(const Mat& src, const Mat& mask)
diff --git a/modules/mcc/src/utils.hpp b/modules/mcc/src/utils.hpp
index 02570ca0184..07ca65cb968 100644
--- a/modules/mcc/src/utils.hpp
+++ b/modules/mcc/src/utils.hpp
@@ -42,8 +42,9 @@ double gammaCorrection_(const double& element, const double& gamma);
             \f]
     @param src the input array,type of Mat.
     @param gamma a constant for gamma correction.
+    @param dst the output array, type of Mat.
  */
-Mat gammaCorrection(const Mat& src, const double& gamma);
+Mat gammaCorrection(const Mat& src, const double& gamma, Mat dst=Mat());
 
 /** @brief maskCopyTo a function to delete unsatisfied elementwise.
     @param src the input array, type of Mat.
@@ -77,10 +78,26 @@ Mat rgb2gray(const Mat& rgb);
     @param lambda a for operation
  */
 template <typename F>
-Mat elementWise(const Mat& src, F&& lambda)
+Mat elementWise(const Mat& src, F&& lambda, Mat dst=Mat())
 {
-    Mat dst = src.clone();
+    if (dst.empty() || !dst.isContinuous() || dst.total() != src.total() || dst.type() != src.type())
+        dst = Mat(src.rows, src.cols, src.type());
     const int channel = src.channels();
+    if (src.isContinuous()) {
+        const int num_elements = (int)src.total()*channel;
+        const double *psrc = (double*)src.data;
+        double *pdst = (double*)dst.data;
+        const int batch = getNumThreads() > 1 ? 128 : num_elements;
+        const int N = (num_elements / batch) + ((num_elements % batch) > 0);
+        parallel_for_(Range(0, N),[&](const Range& range) {
+            const int start = range.start * batch;
+            const int end = std::min(range.end*batch, num_elements);
+            for (int i = start; i < end; i++) {
+                pdst[i] = lambda(psrc[i]);
+            }
+        });
+        return dst;
+    }
     switch (channel)
     {
     case 1:
diff --git a/modules/mcc/test/test_mcc.cpp b/modules/mcc/test/test_mcc.cpp
index 4aa8ae31302..374b829b4b2 100644
--- a/modules/mcc/test/test_mcc.cpp
+++ b/modules/mcc/test/test_mcc.cpp
@@ -81,5 +81,101 @@ TEST(CV_mccRunCCheckerDetectorBasic, accuracy_VINYL18)
     runCCheckerDetectorBasic("VINYL18.png", VINYL18);
 }
 
+TEST(CV_mcc_ccm_test, detect_Macbeth)
+{
+    string path = cvtest::findDataFile("mcc/mcc_ccm_test.jpg");
+    Mat img = imread(path, IMREAD_COLOR);
+    Ptr<CCheckerDetector> detector = CCheckerDetector::create();
+
+    // detect MCC24 board
+    ASSERT_TRUE(detector->process(img, MCC24, 1, false));
+
+    // read gold Macbeth corners
+    path = cvtest::findDataFile("mcc/mcc_ccm_test.yml");
+    FileStorage fs(path, FileStorage::READ);
+    ASSERT_TRUE(fs.isOpened());
+    FileNode node = fs["Macbeth_corners"];
+    ASSERT_FALSE(node.empty());
+    vector<Point2f> gold_corners;
+    node >> gold_corners;
+    Ptr<CChecker> checker = detector->getBestColorChecker();
+
+    // check Macbeth corners
+    vector<Point2f> corners = checker->getBox();
+    EXPECT_MAT_NEAR(gold_corners, corners, 3.6); // diff 3.57385 in ARM only
+
+    // read gold chartsRGB
+    node = fs["chartsRGB"];
+    Mat goldChartsRGB;
+    node >> goldChartsRGB;
+    fs.release();
+
+    // check chartsRGB
+    Mat chartsRGB = checker->getChartsRGB();
+    EXPECT_MAT_NEAR(goldChartsRGB.col(1), chartsRGB.col(1), 0.25); // diff 0.240634 in ARM only
+}
+
+TEST(CV_mcc_ccm_test, compute_ccm)
+{
+    // read gold chartsRGB
+    string path = cvtest::findDataFile("mcc/mcc_ccm_test.yml");
+    FileStorage fs(path, FileStorage::READ);
+    Mat chartsRGB;
+    FileNode node = fs["chartsRGB"];
+    node >> chartsRGB;
+
+    // compute CCM
+    ColorCorrectionModel model(chartsRGB.col(1).clone().reshape(3, chartsRGB.rows/3) / 255., COLORCHECKER_Macbeth);
+    model.run();
+
+    // read gold CCM
+    node = fs["ccm"];
+    ASSERT_FALSE(node.empty());
+    Mat gold_ccm;
+    node >> gold_ccm;
+    fs.release();
+
+    // check CCM
+    Mat ccm = model.getCCM();
+    EXPECT_MAT_NEAR(gold_ccm, ccm, 1e-8);
+
+    const double gold_loss = 4.6386569120323129;
+    // check loss
+    const double loss = model.getLoss();
+    EXPECT_NEAR(gold_loss, loss, 1e-8);
+}
+
+TEST(CV_mcc_ccm_test, infer)
+{
+    string path = cvtest::findDataFile("mcc/mcc_ccm_test.jpg");
+    Mat img = imread(path, IMREAD_COLOR);
+    // read gold calibrate img
+    path = cvtest::findDataFile("mcc/mcc_ccm_test_res.png");
+    Mat gold_img = imread(path);
+
+    // read gold chartsRGB
+    path = cvtest::findDataFile("mcc/mcc_ccm_test.yml");
+    FileStorage fs(path, FileStorage::READ);
+    Mat chartsRGB;
+    FileNode node = fs["chartsRGB"];
+    node >> chartsRGB;
+    fs.release();
+
+    // compute CCM
+    ColorCorrectionModel model(chartsRGB.col(1).clone().reshape(3, chartsRGB.rows/3) / 255., COLORCHECKER_Macbeth);
+    model.run();
+
+    // compute calibrate image
+    Mat calibratedImage;
+    cvtColor(img, calibratedImage, COLOR_BGR2RGB);
+    calibratedImage.convertTo(calibratedImage, CV_64F, 1. / 255.);
+    calibratedImage = model.infer(calibratedImage);
+    calibratedImage.convertTo(calibratedImage, CV_8UC3, 255.);
+    cvtColor(calibratedImage, calibratedImage, COLOR_RGB2BGR);
+    // check calibrated image
+    EXPECT_MAT_NEAR(gold_img, calibratedImage, 0.1);
+}
+
+
 } // namespace
 } // namespace opencv_test
diff --git a/modules/videostab/src/precomp.hpp b/modules/videostab/src/precomp.hpp
index 3d0615bf433..3c78087e0ce 100644
--- a/modules/videostab/src/precomp.hpp
+++ b/modules/videostab/src/precomp.hpp
@@ -59,7 +59,7 @@
 
 inline float sqr(float x) { return x * x; }
 
-inline float intensity(const cv::Point3_<uchar> &bgr)
+inline float intensity(const cv::Point3_<uint8_t> &bgr)
 {
     return 0.3f*bgr.x + 0.59f*bgr.y + 0.11f*bgr.z;
 }
diff --git a/modules/wechat_qrcode/perf/perf_wechat_qrcode_pipeline.cpp b/modules/wechat_qrcode/perf/perf_wechat_qrcode_pipeline.cpp
index fe00e0168b1..e074a9cb8f8 100644
--- a/modules/wechat_qrcode/perf/perf_wechat_qrcode_pipeline.cpp
+++ b/modules/wechat_qrcode/perf/perf_wechat_qrcode_pipeline.cpp
@@ -17,7 +17,8 @@ std::string qrcode_images_name[] = {
     "version_5_down.jpg", "version_5_left.jpg", "version_5_up.jpg", "version_5_top.jpg",
     "russian.jpg", "kanji.jpg", "link_wiki_cv.jpg"};
 
-std::string qrcode_images_multiple[] = {"2_qrcodes.png", "3_qrcodes.png", "3_close_qrcodes.png",
+// NB: exclude "2_qrcodes.png" as this image appears too difficult, so that this test fails on it
+std::string qrcode_images_multiple[] = {/*"2_qrcodes.png",*/ "3_qrcodes.png", "3_close_qrcodes.png",
                                         "4_qrcodes.png", "5_qrcodes.png", "7_qrcodes.png"};
 
 WeChatQRCode createQRDetectorWithDNN(std::string& model_path)
diff --git a/modules/wechat_qrcode/src/decodermgr.cpp b/modules/wechat_qrcode/src/decodermgr.cpp
index 1e93aa15c0c..5083cc930bb 100644
--- a/modules/wechat_qrcode/src/decodermgr.cpp
+++ b/modules/wechat_qrcode/src/decodermgr.cpp
@@ -33,7 +33,7 @@ int DecoderMgr::decodeImage(cv::Mat src, bool use_nn_detector, vector<string>& r
     decode_hints_.setUseNNDetector(use_nn_detector);
 
     Ref<ImgSource> source;
-    qbarUicomBlock_ = new UnicomBlock(width, height);
+    qbarUicomBlock_ = new UnicomBlock(height, width);
 
     // Four Binarizers
     int tryBinarizeTime = 4;
@@ -89,4 +89,4 @@ vector<Ref<Result>> DecoderMgr::Decode(Ref<BinaryBitmap> image, DecodeHints hint
     return reader_->decode(image, hints);
 }
 }  // namespace wechat_qrcode
-}  // namespace cv
\ No newline at end of file
+}  // namespace cv
diff --git a/modules/wechat_qrcode/src/wechat_qrcode.cpp b/modules/wechat_qrcode/src/wechat_qrcode.cpp
index f4bec7c2b36..64aad73610b 100644
--- a/modules/wechat_qrcode/src/wechat_qrcode.cpp
+++ b/modules/wechat_qrcode/src/wechat_qrcode.cpp
@@ -35,7 +35,8 @@ class WeChatQRCode::Impl {
      * @param points succussfully decoded qrcode with bounding box points.
      * @return vector<string>
      */
-    std::vector<std::string> decode(const Mat& img, std::vector<Mat>& candidate_points,
+    std::vector<std::string> decode(const Mat& img,
+                                    const std::vector<Mat>& candidate_points,
                                     std::vector<Mat>& points);
     int applyDetector(const Mat& img, std::vector<Mat>& points);
     Mat cropObj(const Mat& img, const Mat& point, Align& aligner);
@@ -123,13 +124,14 @@ float WeChatQRCode::getScaleFactor() {
     return p->scaleFactor;
 };
 
-vector<string> WeChatQRCode::Impl::decode(const Mat& img, vector<Mat>& candidate_points,
+vector<string> WeChatQRCode::Impl::decode(const Mat& img,
+                                          const vector<Mat>& candidate_points,
                                           vector<Mat>& points) {
     if (candidate_points.size() == 0) {
         return vector<string>();
     }
     vector<string> decode_results;
-    for (auto& point : candidate_points) {
+    for (const auto& point : candidate_points) {
         Mat cropped_img;
         Align aligner;
         if (use_nn_detector_) {
@@ -155,9 +157,11 @@ vector<string> WeChatQRCode::Impl::decode(const Mat& img, vector<Mat>& candidate
 
                     if (use_nn_detector_)
                         points_qr = aligner.warpBack(points_qr);
+
+                    auto point_to_save = Mat(4, 2, CV_32FC1);
                     for (int j = 0; j < 4; ++j) {
-                        point.at<float>(j, 0) = points_qr[j].x;
-                        point.at<float>(j, 1) = points_qr[j].y;
+                        point_to_save.at<float>(j, 0) = points_qr[j].x;
+                        point_to_save.at<float>(j, 1) = points_qr[j].y;
                     }
                     // try to find duplicate qr corners
                     bool isDuplicate = false;
@@ -175,7 +179,7 @@ vector<string> WeChatQRCode::Impl::decode(const Mat& img, vector<Mat>& candidate
                         }
                     }
                     if (isDuplicate == false) {
-                        points.push_back(point);
+                        points.push_back(point_to_save);
                         check_points.push_back(points_qr);
                     }
                     else {
@@ -244,4 +248,4 @@ vector<float> WeChatQRCode::Impl::getScaleList(const int width, const int height
     return {0.5, 1.0};
 }
 }  // namespace wechat_qrcode
-}  // namespace cv
\ No newline at end of file
+}  // namespace cv
diff --git a/modules/wechat_qrcode/src/zxing/common/binarizer/adaptive_threshold_mean_binarizer.cpp b/modules/wechat_qrcode/src/zxing/common/binarizer/adaptive_threshold_mean_binarizer.cpp
index 38a79b378e1..3f879f47fc2 100644
--- a/modules/wechat_qrcode/src/zxing/common/binarizer/adaptive_threshold_mean_binarizer.cpp
+++ b/modules/wechat_qrcode/src/zxing/common/binarizer/adaptive_threshold_mean_binarizer.cpp
@@ -88,7 +88,7 @@ int AdaptiveThresholdMeanBinarizer::TransMatToBuffer(cv::Mat mSrc, unsigned char
         unsigned char* pdi = ppBuffer + j * nWidth;
         for (int z = 0; z < nWidth; ++z) {
             int nj = nHeight - j - 1;
-            int value = *(uchar*)(mSrc.ptr<uchar>(nj) + z);
+            int value = *(uint8_t*)(mSrc.ptr<uint8_t>(nj) + z);
             if (value > 120)
                 pdi[z] = 0;
             else
@@ -96,4 +96,4 @@ int AdaptiveThresholdMeanBinarizer::TransMatToBuffer(cv::Mat mSrc, unsigned char
         }
     }
     return 0;
-}
\ No newline at end of file
+}
diff --git a/modules/wechat_qrcode/src/zxing/common/unicomblock.cpp b/modules/wechat_qrcode/src/zxing/common/unicomblock.cpp
index 652458e91c8..a7180d41116 100644
--- a/modules/wechat_qrcode/src/zxing/common/unicomblock.cpp
+++ b/modules/wechat_qrcode/src/zxing/common/unicomblock.cpp
@@ -26,7 +26,7 @@ void UnicomBlock::Init() {
 
 void UnicomBlock::Reset(Ref<BitMatrix> poImage) {
     m_poImage = poImage;
-    memset(&m_vcIndex[0], 0, m_vcIndex.size() * sizeof(short));
+    memset(&m_vcIndex[0], 0, m_vcIndex.size() * sizeof(m_vcIndex[0]));
     m_iNowIdx = 0;
 }
 
diff --git a/modules/wechat_qrcode/src/zxing/qrcode/detector/finder_pattern_finder.cpp b/modules/wechat_qrcode/src/zxing/qrcode/detector/finder_pattern_finder.cpp
index 438928c093d..93017a7f515 100644
--- a/modules/wechat_qrcode/src/zxing/qrcode/detector/finder_pattern_finder.cpp
+++ b/modules/wechat_qrcode/src/zxing/qrcode/detector/finder_pattern_finder.cpp
@@ -1066,8 +1066,6 @@ bool FinderPatternFinder::handlePossibleCenter(int* stateCount, size_t i, size_t
     }
     float estimatedHorizontalModuleSize = (float)stateCountTotal / 7.0f;
 
-    float estimatedVerticalModuleSize;
-
     // try different size according to the estimatedHorizontalModuleSize
     float tolerateModuleSize =
         estimatedHorizontalModuleSize > 4.0 ? estimatedHorizontalModuleSize / 2.0f : 1.0f;
@@ -1082,7 +1080,9 @@ bool FinderPatternFinder::handlePossibleCenter(int* stateCount, size_t i, size_t
     int image_width = image_->getWidth();
     for (int k = 0; k < CENTER_CHECK_TIME; k++) {
         float possibleCenterJ = possbileCenterJs[k];
-        if (possibleCenterJ < 0 || possibleCenterJ >= image_width) continue;
+        if (possibleCenterJ < 0 || possibleCenterJ >= image_width)
+            continue;
+        float estimatedVerticalModuleSize = 0;
         float centerI = crossCheckVertical(i, (size_t)possibleCenterJ, stateCount[2],
                                            stateCountTotal, estimatedVerticalModuleSize);
 
@@ -1505,4 +1505,4 @@ Ref<BitMatrix> FinderPatternFinder::getImage() { return image_; }
 vector<Ref<FinderPattern>>& FinderPatternFinder::getPossibleCenters() { return possibleCenters_; }
 
 }  // namespace qrcode
-}  // namespace zxing
\ No newline at end of file
+}  // namespace zxing
diff --git a/modules/wechat_qrcode/test/test_qrcode.cpp b/modules/wechat_qrcode/test/test_qrcode.cpp
index 5b11ea98d1e..ddc2828e716 100644
--- a/modules/wechat_qrcode/test/test_qrcode.cpp
+++ b/modules/wechat_qrcode/test/test_qrcode.cpp
@@ -303,7 +303,7 @@ TEST(Objdetect_QRCode_points_position, rotate45) {
     Ptr<QRCodeEncoder> qrcode_enc = cv::QRCodeEncoder::create(params);
     Mat qrImage;
     qrcode_enc->encode(expect_msg, qrImage);
-    Mat image(800, 800, CV_8UC1);
+    Mat image(800, 800, CV_8UC1, Scalar(0));
     const int pixInBlob = 4;
     Size qrSize = Size((21+(params.version-1)*4)*pixInBlob,(21+(params.version-1)*4)*pixInBlob);
     Rect2f rec(static_cast<float>((image.cols - qrSize.width)/2),
@@ -364,7 +364,7 @@ TEST(Objdetect_QRCode_Big, regression) {
     Ptr<QRCodeEncoder> qrcode_enc = cv::QRCodeEncoder::create(params);
     Mat qrImage;
     qrcode_enc->encode(expect_msg, qrImage);
-    Mat largeImage(4032, 3024, CV_8UC1);
+    Mat largeImage(4032, 3024, CV_8UC1, Scalar(0));
     const int pixInBlob = 4;
     Size qrSize = Size((21+(params.version-1)*4)*pixInBlob,(21+(params.version-1)*4)*pixInBlob);
     Mat roiImage = largeImage(Rect((largeImage.cols - qrSize.width)/2, (largeImage.rows - qrSize.height)/2,
@@ -395,7 +395,7 @@ TEST(Objdetect_QRCode_Tiny, regression) {
     Ptr<QRCodeEncoder> qrcode_enc = cv::QRCodeEncoder::create(params);
     Mat qrImage;
     qrcode_enc->encode(expect_msg, qrImage);
-    Mat tinyImage(80, 80, CV_8UC1);
+    Mat tinyImage(80, 80, CV_8UC1, Scalar(0));
     const int pixInBlob = 2;
     Size qrSize = Size((21+(params.version-1)*4)*pixInBlob,(21+(params.version-1)*4)*pixInBlob);
     Mat roiImage = tinyImage(Rect((tinyImage.cols - qrSize.width)/2, (tinyImage.rows - qrSize.height)/2,
diff --git a/modules/xfeatures2d/src/latch.cpp b/modules/xfeatures2d/src/latch.cpp
index da5041fa0ae..49cd8f58947 100644
--- a/modules/xfeatures2d/src/latch.cpp
+++ b/modules/xfeatures2d/src/latch.cpp
@@ -519,7 +519,7 @@ namespace cv
             switch (image.type())
             {
             case CV_8UC1:
-                grayImage = image;
+                grayImage = sigma_ ? image.clone() : image;
                 break;
             case CV_8UC3:
                 cvtColor(image, grayImage, COLOR_BGR2GRAY);
diff --git a/modules/ximgproc/perf/perf_thining.cpp b/modules/ximgproc/perf/perf_thining.cpp
new file mode 100644
index 00000000000..6703a01da80
--- /dev/null
+++ b/modules/ximgproc/perf/perf_thining.cpp
@@ -0,0 +1,36 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+typedef tuple<Size, int> ThinningPerfParam;
+typedef TestBaseWithParam<ThinningPerfParam> ThinningPerfTest;
+
+PERF_TEST_P(ThinningPerfTest, perf,
+    Combine(
+        Values(sz1080p, sz720p, szVGA),
+        Values(THINNING_ZHANGSUEN, THINNING_GUOHALL)
+    )
+)
+{
+    ThinningPerfParam params = GetParam();
+    Size size = get<0>(params);
+    int type  = get<1>(params);
+
+    Mat src = Mat::zeros(size, CV_8UC1);
+    for (int x = 50; x < src.cols - 50; x += 50)
+        cv::circle(src, Point(x, x/2), 30 + x/2, Scalar(255), 5);
+
+    Mat dst;
+    TEST_CYCLE()
+    {
+        thinning(src, dst, type);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+}} // namespace
diff --git a/modules/ximgproc/src/thinning.cpp b/modules/ximgproc/src/thinning.cpp
index b28784d2894..00017fe0acb 100644
--- a/modules/ximgproc/src/thinning.cpp
+++ b/modules/ximgproc/src/thinning.cpp
@@ -5,65 +5,175 @@ using namespace std;
 namespace cv {
 namespace ximgproc {
 
+// look up table - there is one entry for each of the 2^8=256 possible
+// combinations of 8 binary neighbors.
+static uint8_t lut_zhang_iter0[] = {
+    1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1,
+    0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
+    0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
+    1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
+    1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,
+    1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+    1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1};
+
+static uint8_t lut_zhang_iter1[] = {
+    1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1,
+    0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
+    0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
+    0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,
+    1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,
+    0, 1, 1, 1};
+
+static uint8_t lut_guo_iter0[] = {
+    1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,
+    0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
+    0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
+    0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,
+    0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
+    1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1};
+
+static uint8_t lut_guo_iter1[] = {
+    1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
+    1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,
+    1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
+    1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,
+    1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
+    1, 1, 1, 1};
+
 // Applies a thinning iteration to a binary image
 static void thinningIteration(Mat img, int iter, int thinningType){
     Mat marker = Mat::zeros(img.size(), CV_8UC1);
+    int rows = img.rows;
+    int cols = img.cols;
 
     if(thinningType == THINNING_ZHANGSUEN){
-        for (int i = 1; i < img.rows-1; i++)
-        {
-            for (int j = 1; j < img.cols-1; j++)
-            {
-                uchar p2 = img.at<uchar>(i-1, j);
-                uchar p3 = img.at<uchar>(i-1, j+1);
-                uchar p4 = img.at<uchar>(i, j+1);
-                uchar p5 = img.at<uchar>(i+1, j+1);
-                uchar p6 = img.at<uchar>(i+1, j);
-                uchar p7 = img.at<uchar>(i+1, j-1);
-                uchar p8 = img.at<uchar>(i, j-1);
-                uchar p9 = img.at<uchar>(i-1, j-1);
-
-                int A  = (p2 == 0 && p3 == 1) + (p3 == 0 && p4 == 1) +
-                         (p4 == 0 && p5 == 1) + (p5 == 0 && p6 == 1) +
-                         (p6 == 0 && p7 == 1) + (p7 == 0 && p8 == 1) +
-                         (p8 == 0 && p9 == 1) + (p9 == 0 && p2 == 1);
-                int B  = p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9;
-                int m1 = iter == 0 ? (p2 * p4 * p6) : (p2 * p4 * p8);
-                int m2 = iter == 0 ? (p4 * p6 * p8) : (p2 * p6 * p8);
-
-                if (A == 1 && (B >= 2 && B <= 6) && m1 == 0 && m2 == 0)
-                    marker.at<uchar>(i,j) = 1;
-            }
-        }
+        marker.forEach<uchar>([=](uchar& value, const int postion[]) {
+            int i = postion[0];
+            int j = postion[1];
+            if (i == 0 || j == 0 || i == rows - 1 || j == cols - 1)
+                return;
+
+            auto ptr = img.ptr(i, j); // p1
+
+            // p9 p2 p3
+            // p8 p1 p4
+            // p7 p6 p5
+            uchar p2 = ptr[-cols];
+            uchar p3 = ptr[-cols + 1];
+            uchar p4 = ptr[1];
+            uchar p5 = ptr[cols + 1];
+            uchar p6 = ptr[cols];
+            uchar p7 = ptr[cols - 1];
+            uchar p8 = ptr[-1];
+            uchar p9 = ptr[-cols - 1];
+
+            int neighbors = p9 | (p2 << 1) | (p3 << 2) | (p4 << 3) | (p5 << 4) | (p6 << 5) | (p7 << 6) | (p8 << 7);
+
+            if (iter == 0)
+                value = lut_zhang_iter0[neighbors];
+            else
+                value = lut_zhang_iter1[neighbors];
+
+            //int A  = (p2 == 0 && p3 == 1) + (p3 == 0 && p4 == 1) +
+            //         (p4 == 0 && p5 == 1) + (p5 == 0 && p6 == 1) +
+            //         (p6 == 0 && p7 == 1) + (p7 == 0 && p8 == 1) +
+            //         (p8 == 0 && p9 == 1) + (p9 == 0 && p2 == 1);
+            //int B  = p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9;
+            //int m1 = iter == 0 ? (p2 * p4 * p6) : (p2 * p4 * p8);
+            //int m2 = iter == 0 ? (p4 * p6 * p8) : (p2 * p6 * p8);
+            //if (A == 1 && (B >= 2 && B <= 6) && m1 == 0 && m2 == 0) value = 0;
+        });
     }
     if(thinningType == THINNING_GUOHALL){
-        for (int i = 1; i < img.rows-1; i++)
-        {
-            for (int j = 1; j < img.cols-1; j++)
-            {
-                uchar p2 = img.at<uchar>(i-1, j);
-                uchar p3 = img.at<uchar>(i-1, j+1);
-                uchar p4 = img.at<uchar>(i, j+1);
-                uchar p5 = img.at<uchar>(i+1, j+1);
-                uchar p6 = img.at<uchar>(i+1, j);
-                uchar p7 = img.at<uchar>(i+1, j-1);
-                uchar p8 = img.at<uchar>(i, j-1);
-                uchar p9 = img.at<uchar>(i-1, j-1);
-
-                int C  = ((!p2) & (p3 | p4)) + ((!p4) & (p5 | p6)) +
-                         ((!p6) & (p7 | p8)) + ((!p8) & (p9 | p2));
-                int N1 = (p9 | p2) + (p3 | p4) + (p5 | p6) + (p7 | p8);
-                int N2 = (p2 | p3) + (p4 | p5) + (p6 | p7) + (p8 | p9);
-                int N  = N1 < N2 ? N1 : N2;
-                int m  = iter == 0 ? ((p6 | p7 | (!p9)) & p8) : ((p2 | p3 | (!p5)) & p4);
-
-                if ((C == 1) && ((N >= 2) && ((N <= 3)) & (m == 0)))
-                    marker.at<uchar>(i,j) = 1;
-            }
-        }
+        marker.forEach<uchar>([=](uchar& value, const int postion[]) {
+            int i = postion[0];
+            int j = postion[1];
+            if (i == 0 || j == 0 || i == rows - 1 || j == cols - 1)
+                return;
+
+            auto ptr = img.ptr(i, j); // p1
+
+            // p9 p2 p3
+            // p8 p1 p4
+            // p7 p6 p5
+            uchar p2 = ptr[-cols];
+            uchar p3 = ptr[-cols + 1];
+            uchar p4 = ptr[1];
+            uchar p5 = ptr[cols + 1];
+            uchar p6 = ptr[cols];
+            uchar p7 = ptr[cols - 1];
+            uchar p8 = ptr[-1];
+            uchar p9 = ptr[-cols - 1];
+
+            int neighbors = p9 | (p2 << 1) | (p3 << 2) | (p4 << 3) | (p5 << 4) | (p6 << 5) | (p7 << 6) | (p8 << 7);
+
+            if (iter == 0)
+                value = lut_guo_iter0[neighbors];
+            else
+                value = lut_guo_iter1[neighbors];
+
+            //int C  = ((!p2) & (p3 | p4)) + ((!p4) & (p5 | p6)) +
+            //         ((!p6) & (p7 | p8)) + ((!p8) & (p9 | p2));
+            //int N1 = (p9 | p2) + (p3 | p4) + (p5 | p6) + (p7 | p8);
+            //int N2 = (p2 | p3) + (p4 | p5) + (p6 | p7) + (p8 | p9);
+            //int N  = N1 < N2 ? N1 : N2;
+            //int m  = iter == 0 ? ((p6 | p7 | (!p9)) & p8) : ((p2 | p3 | (!p5)) & p4);
+            //if ((C == 1) && ((N >= 2) && ((N <= 3)) & (m == 0))) value = 0;
+        });
     }
 
-    img &= ~marker;
+    img &= marker;
 }
 
 // Apply the thinning procedure to a given image
diff --git a/modules/xphoto/src/annf.hpp b/modules/xphoto/src/annf.hpp
index 111469fe1d0..53a9793baae 100644
--- a/modules/xphoto/src/annf.hpp
+++ b/modules/xphoto/src/annf.hpp
@@ -263,7 +263,7 @@ static void dominantTransforms(const cv::Mat &img, std::vector <cv::Point2i> &tr
     cv::GaussianBlur( annfHist, annfHist,
         cv::Size(0, 0), std::sqrt(2.0), 0.0, cv::BORDER_CONSTANT);
     cv::dilate( annfHist, _annfHist,
-        cv::Matx<uchar, 9, 9>::ones() );
+        cv::Matx<uint8_t, 9, 9>::ones() );
 
     std::vector < std::pair<double, int> > amount;
     std::vector <cv::Point2i> shiftM;
diff --git a/modules/xphoto/src/gcgraph.hpp b/modules/xphoto/src/gcgraph.hpp
index 513e9c2d117..f30073a7735 100644
--- a/modules/xphoto/src/gcgraph.hpp
+++ b/modules/xphoto/src/gcgraph.hpp
@@ -187,7 +187,7 @@ TWeight GCGraph<TWeight>::maxFlow()
         Vtx* v, *u;
         int e0 = -1, ei = 0, ej = 0;
         TWeight minWeight, weight;
-        uchar vt;
+        uint8_t vt;
 
         // grow S & T search trees, find an edge connecting them
         while( first != nilNode )
diff --git a/modules/xphoto/src/inpainting.cpp b/modules/xphoto/src/inpainting.cpp
index d6a406ca623..8a9694c9539 100644
--- a/modules/xphoto/src/inpainting.cpp
+++ b/modules/xphoto/src/inpainting.cpp
@@ -99,7 +99,7 @@ namespace xphoto
 
         for (int i = 0; i < ddmask.rows; ++i)
         {
-            uchar *dmask_data = (uchar *) ddmask.template ptr<uchar>(i);
+            uint8_t *dmask_data = (uint8_t *) ddmask.template ptr<uint8_t>(i);
             int *backref_data = (int *) backref.template ptr< int >(i);
 
             for (int j = 0; j < ddmask.cols; ++j)
@@ -123,7 +123,7 @@ namespace xphoto
 
         for (size_t i = 0; i < pPath.size(); ++i)
         {
-            uchar xmask = dmask.template at<uchar>(pPath[i]);
+            uint8_t xmask = dmask.template at<uint8_t>(pPath[i]);
 
             for (int j = 0; j < nTransform + 1; ++j)
             {
@@ -136,7 +136,7 @@ namespace xphoto
                 &&   u.x < src.cols && u.x >= 0 )
                 {
                     if ( xmask == 0 || j == nTransform )
-                        vmask = mask.template at<uchar>(u);
+                        vmask = mask.template at<uint8_t>(u);
                     vimg = img.template at<cv::Vec<float, cn> >(u);
                 }
 
@@ -221,14 +221,14 @@ namespace xphoto
                                                };
 
                             std::vector <cv::Vec <float, cn> > pointVec;
-                                            std::vector <uchar> maskVec;
+                                            std::vector <uint8_t> maskVec;
 
                             for (uint q = 0; q < sizeof(dv)/sizeof(cv::Point2i); ++q)
                                 if (u.x + dv[q].x >= 0 && u.x + dv[q].x < img.cols
                                 &&  u.y + dv[q].y >= 0 && u.y + dv[q].y < img.rows)
                                 {
                                     pointVec.push_back(img.template at<cv::Vec <float, cn> >(u + dv[q]));
-                                    maskVec.push_back(_mask.template at<uchar>(u + dv[q]));
+                                    maskVec.push_back(_mask.template at<uint8_t>(u + dv[q]));
                                 }
                                 else
                                 {
@@ -325,16 +325,16 @@ namespace xphoto
                 inpaint <char,   4>( src, mask, dst, algorithmType );
                 break;
             case CV_8UC1:
-                inpaint <uchar,  1>( src, mask, dst, algorithmType );
+                inpaint <uint8_t,  1>( src, mask, dst, algorithmType );
                 break;
             case CV_8UC2:
-                inpaint <uchar,  2>( src, mask, dst, algorithmType );
+                inpaint <uint8_t,  2>( src, mask, dst, algorithmType );
                 break;
             case CV_8UC3:
-                inpaint <uchar,  3>( src, mask, dst, algorithmType );
+                inpaint <uint8_t,  3>( src, mask, dst, algorithmType );
                 break;
             case CV_8UC4:
-                inpaint <uchar,  4>( src, mask, dst, algorithmType );
+                inpaint <uint8_t,  4>( src, mask, dst, algorithmType );
                 break;
             case CV_16SC1:
                 inpaint <short,  1>( src, mask, dst, algorithmType );
diff --git a/modules/xphoto/src/norm2.hpp b/modules/xphoto/src/norm2.hpp
index 027e522da3a..1f26ef9c4be 100644
--- a/modules/xphoto/src/norm2.hpp
+++ b/modules/xphoto/src/norm2.hpp
@@ -59,10 +59,10 @@ template <class T> struct same_as<T, T> : ttype {};   // is_same
 
 
 template <typename _Tp> struct is_norm2_type :
-    int_const<bool, !same_as<_Tp,   char>::value
-                 && !same_as<_Tp,  uchar>::value
-                 && !same_as<_Tp, ushort>::value
-                 && !same_as<_Tp,   uint>::value>{};
+    int_const<bool, !same_as<_Tp,   int8_t>::value
+                 && !same_as<_Tp,  uint8_t>::value
+                 && !same_as<_Tp, uint16_t>::value
+                 && !same_as<_Tp, uint32_t>::value>{};
 
 template <typename _Tp, int cn> static inline typename iftype< is_norm2_type<_Tp>::value, _Tp >::
     type norm2(cv::Vec<_Tp, cn> a, cv::Vec<_Tp, cn> b) { return (a - b).dot(a - b); }
@@ -70,4 +70,4 @@ template <typename _Tp, int cn> static inline typename iftype< is_norm2_type<_Tp
 template <typename _Tp> static inline typename iftype< is_norm2_type<_Tp>::value, _Tp >::
     type norm2(const _Tp &a, const _Tp &b) { return (a - b)*(a - b); }
 
-#endif /* __NORM2_HPP__ */
\ No newline at end of file
+#endif /* __NORM2_HPP__ */
diff --git a/modules/xphoto/src/oilpainting.cpp b/modules/xphoto/src/oilpainting.cpp
index daeffd386a7..61e42177e81 100644
--- a/modules/xphoto/src/oilpainting.cpp
+++ b/modules/xphoto/src/oilpainting.cpp
@@ -18,9 +18,9 @@ public :
 };
 
 template<>
-uchar Vec3fTo<uchar>::extract()
+uint8_t Vec3fTo<uint8_t>::extract()
 {
-    return static_cast<uchar>(a[0]);
+    return static_cast<uint8_t>(a[0]);
 }
 
 template<>
@@ -30,7 +30,7 @@ cv::Vec3b Vec3fTo<cv::Vec3b>::extract()
 }
 
 template<>
-cv::Vec3f Vec3fTo<uchar>::make(int x)
+cv::Vec3f Vec3fTo<uint8_t>::make(int x)
 {
     return cv::Vec3f((a*x)/x);
 }
@@ -84,7 +84,7 @@ class ParallelOilPainting : public ParallelLoopBody
                         if (y + yy >= 0 && y + yy < imgSrc.rows)
                         {
                             Type *vPtr = imgSrc.ptr<Type>(y + yy) + x - 0;
-                            uchar *uc = imgLuminance.ptr(y + yy) + x - 0;
+                            uint8_t *uc = imgLuminance.ptr(y + yy) + x - 0;
                             for (int xx = 0; xx <= halfsize; xx++, vPtr++, uc++)
                             {
                                 if (x + xx >= 0 && x + xx < imgSrc.cols)
@@ -104,7 +104,7 @@ class ParallelOilPainting : public ParallelLoopBody
                         if (y + yy >= 0 && y + yy < imgSrc.rows)
                         {
                             Type *vPtr = imgSrc.ptr<Type>(y + yy) + x - halfsize - 1;
-                            uchar *uc = imgLuminance.ptr(y + yy) + x - halfsize - 1;
+                            uint8_t *uc = imgLuminance.ptr(y + yy) + x - halfsize - 1;
                             int xx = -halfsize - 1;
                             if (x + xx >= 0 && x + xx < imgSrc.cols)
                             {
@@ -154,10 +154,10 @@ void oilPainting(InputArray _src, OutputArray _dst, int size, int dynValue,int c
     else
         lum = src.clone();
     double dratio = 1 / double(dynValue);
-    lum.forEach<uchar>([=](uchar &pixel, const int * /*position*/) { pixel = saturate_cast<uchar>(cvRound(pixel * dratio)); });
+    lum.forEach<uint8_t>([=](uint8_t &pixel, const int * /*position*/) { pixel = saturate_cast<uint8_t>(cvRound(pixel * dratio)); });
     if (_src.type() == CV_8UC1)
     {
-        ParallelOilPainting<uchar> oilAlgo(src, dst, lum, size, dynValue);
+        ParallelOilPainting<uint8_t> oilAlgo(src, dst, lum, size, dynValue);
         parallel_for_(Range(0, src.rows), oilAlgo);
     }
     else
diff --git a/modules/xphoto/src/photomontage.hpp b/modules/xphoto/src/photomontage.hpp
index bdafb360dc7..940ea8d2064 100644
--- a/modules/xphoto/src/photomontage.hpp
+++ b/modules/xphoto/src/photomontage.hpp
@@ -79,7 +79,7 @@ template <typename Tp> class Photomontage
 {
 private:
     const std::vector <std::vector <Tp> > &pointSeq;   // points for stitching
-    const std::vector <std::vector <uchar> > &maskSeq; // corresponding masks
+    const std::vector <std::vector <uint8_t> > &maskSeq; // corresponding masks
 
     const std::vector <std::vector <int> > &linkIdx;   // vector of neighbors for pointSeq
 
@@ -116,7 +116,7 @@ template <typename Tp> class Photomontage
     void gradientDescent(); // gradient descent in alpha-expansion topology
 
     Photomontage(const std::vector <std::vector <Tp> > &pointSeq,
-                 const std::vector <std::vector <uchar> > &maskSeq,
+                 const std::vector <std::vector <uint8_t> > &maskSeq,
                  const std::vector <std::vector <int> > &linkIdx,
                        std::vector <labelTp> &labelSeq);
     virtual ~Photomontage(){};
@@ -219,7 +219,7 @@ gradientDescent()
 
 template <typename Tp> Photomontage <Tp>::
 Photomontage( const std::vector <std::vector <Tp> > &_pointSeq,
-            const std::vector <std::vector <uchar> > &_maskSeq,
+            const std::vector <std::vector <uint8_t> > &_maskSeq,
               const std::vector <std::vector <int> > &_linkIdx,
                               std::vector <labelTp> &_labelSeq )
   :
@@ -235,7 +235,7 @@ Photomontage( const std::vector <std::vector <Tp> > &_pointSeq,
 
 template <typename Tp> static inline
 void photomontage( const std::vector <std::vector <Tp> > &pointSeq,
-                 const std::vector <std::vector <uchar> > &maskSeq,
+                 const std::vector <std::vector <uint8_t> > &maskSeq,
                    const std::vector <std::vector <int> > &linkIdx,
                    std::vector <gcoptimization::labelTp> &labelSeq )
 {