@@ -12636,7 +12636,10 @@ MultiplyAByTransposeAUpperTriangle([[maybe_unused]] int nNumThreads,
1263612636 constexpr size_t BLOCK_SIZE_COLS = 256 ;
1263712637 constexpr T ZERO{0 };
1263812638
12639- std::fill (res, res + static_cast <size_t >(rows) * rows, ZERO);
12639+ if constexpr (!std::is_same_v<T, double >)
12640+ {
12641+ std::fill (res, res + static_cast <size_t >(rows) * rows, ZERO);
12642+ }
1264012643
1264112644#ifdef HAVE_OPENMP
1264212645#pragma omp parallel for schedule(static) num_threads(nNumThreads)
@@ -12727,7 +12730,8 @@ ComputeInterBandCovarianceMatrixInternal(GDALDataset *poDS,
1272712730
1272812731 // Matrix of nBandCount * nBandCount storing co-moments, in optimized
1272912732 // case when the block has no nodata value
12730- std::vector<T> adfCurBlockComomentMatrix;
12733+ // Only used if T != double
12734+ [[maybe_unused]] std::vector<T> aCurBlockComomentMatrix;
1273112735
1273212736 // Count number of valid values in padfComomentMatrix for each (i,j) tuple
1273312737 // Updated while iterating over blocks
@@ -12799,7 +12803,10 @@ ComputeInterBandCovarianceMatrixInternal(GDALDataset *poDS,
1279912803 adfMean.resize (nMatrixSize);
1280012804 anCountMean.resize (nMatrixSize);
1280112805
12802- adfCurBlockComomentMatrix.resize (nMatrixSize);
12806+ if constexpr (!std::is_same_v<T, double >)
12807+ {
12808+ aCurBlockComomentMatrix.resize (nMatrixSize);
12809+ }
1280312810
1280412811 anMapLinearIdxToIJ.resize (kMax );
1280512812
@@ -12819,6 +12826,11 @@ ComputeInterBandCovarianceMatrixInternal(GDALDataset *poDS,
1281912826 return CE_Failure;
1282012827 }
1282112828
12829+ constexpr T ZERO{0 };
12830+ std::fill (padfComomentMatrix,
12831+ padfComomentMatrix + static_cast <size_t >(nBandCount) * nBandCount,
12832+ 0 );
12833+
1282212834 {
1282312835 MySignedSize_t nLinearIdx = 0 ;
1282412836 for (int i = 0 ; i < nBandCount; ++i)
@@ -12941,7 +12953,6 @@ ComputeInterBandCovarianceMatrixInternal(GDALDataset *poDS,
1294112953 return eErr;
1294212954
1294312955 // Compute the mean of all bands for this block
12944- constexpr auto ZERO = static_cast <T>(0 );
1294512956 bool bNoBandHasNodata = false ;
1294612957 for (int i = 0 ; i < nBandCount; ++i)
1294712958 {
@@ -13004,8 +13015,6 @@ ComputeInterBandCovarianceMatrixInternal(GDALDataset *poDS,
1300413015 // It is not obvious (to me) what should be the correct behavior.
1300513016 // The current approach has the benefit to avoid recomputing
1300613017 // the mean for each (i,j) tuple, but only for all i.
13007- if (anCount[idxInMatrixI] == 0 )
13008- padfComomentMatrix[idxInMatrixI] = 0 ;
1300913018 if (nCount > 0 )
1301013019 {
1301113020 padfComomentMatrix[idxInMatrixI] +=
@@ -13053,19 +13062,36 @@ ComputeInterBandCovarianceMatrixInternal(GDALDataset *poDS,
1305313062 // Optimized code path where there are no invalid value in the
1305413063 // current block
1305513064
13056- MultiplyAByTransposeAUpperTriangle (
13057- nNumThreads, adfCurBlockPixelsAllBands.data (),
13058- adfCurBlockComomentMatrix.data (), nBandCount,
13059- nThisBlockPixelCount);
13065+ if constexpr (!std::is_same_v<T, double >)
13066+ {
13067+ MultiplyAByTransposeAUpperTriangle (
13068+ nNumThreads, adfCurBlockPixelsAllBands.data (),
13069+ aCurBlockComomentMatrix.data (), nBandCount,
13070+ nThisBlockPixelCount);
13071+ }
13072+ else
13073+ {
13074+ MultiplyAByTransposeAUpperTriangle (
13075+ nNumThreads, adfCurBlockPixelsAllBands.data (),
13076+ padfComomentMatrix, nBandCount, nThisBlockPixelCount);
13077+ }
1306013078
1306113079 for (int i = 0 ; i < nBandCount; ++i)
1306213080 {
1306313081 for (int j = i; j < nBandCount; ++j)
1306413082 {
13065- const auto idxInMatrixI =
13066- static_cast <size_t >(i) * nBandCount + j;
13067- UpdateGlobalValues (i, j, nThisBlockPixelCount,
13068- adfCurBlockComomentMatrix[idxInMatrixI]);
13083+ if constexpr (!std::is_same_v<T, double >)
13084+ {
13085+ const auto idxInMatrixI =
13086+ static_cast <size_t >(i) * nBandCount + j;
13087+ UpdateGlobalValues (
13088+ i, j, nThisBlockPixelCount,
13089+ aCurBlockComomentMatrix[idxInMatrixI]);
13090+ }
13091+ else
13092+ {
13093+ UpdateGlobalValues (i, j, nThisBlockPixelCount, ZERO);
13094+ }
1306913095 }
1307013096 }
1307113097 }
0 commit comments