Skip to content

Commit 25c4fb2

Browse files
committed
GDALDataset::ComputeInterBandCovarianceMatrix(): very slight optimization
1 parent b7dfb32 commit 25c4fb2

File tree

1 file changed

+40
-14
lines changed

1 file changed

+40
-14
lines changed

gcore/gdaldataset.cpp

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12636,7 +12636,10 @@ MultiplyAByTransposeAUpperTriangle([[maybe_unused]] int nNumThreads,
1263612636
constexpr size_t BLOCK_SIZE_COLS = 256;
1263712637
constexpr T ZERO{0};
1263812638

12639-
std::fill(res, res + static_cast<size_t>(rows) * rows, ZERO);
12639+
if constexpr (!std::is_same_v<T, double>)
12640+
{
12641+
std::fill(res, res + static_cast<size_t>(rows) * rows, ZERO);
12642+
}
1264012643

1264112644
#ifdef HAVE_OPENMP
1264212645
#pragma omp parallel for schedule(static) num_threads(nNumThreads)
@@ -12727,7 +12730,8 @@ ComputeInterBandCovarianceMatrixInternal(GDALDataset *poDS,
1272712730

1272812731
// Matrix of nBandCount * nBandCount storing co-moments, in optimized
1272912732
// case when the block has no nodata value
12730-
std::vector<T> adfCurBlockComomentMatrix;
12733+
// Only used if T != double
12734+
[[maybe_unused]] std::vector<T> aCurBlockComomentMatrix;
1273112735

1273212736
// Count number of valid values in padfComomentMatrix for each (i,j) tuple
1273312737
// Updated while iterating over blocks
@@ -12799,7 +12803,10 @@ ComputeInterBandCovarianceMatrixInternal(GDALDataset *poDS,
1279912803
adfMean.resize(nMatrixSize);
1280012804
anCountMean.resize(nMatrixSize);
1280112805

12802-
adfCurBlockComomentMatrix.resize(nMatrixSize);
12806+
if constexpr (!std::is_same_v<T, double>)
12807+
{
12808+
aCurBlockComomentMatrix.resize(nMatrixSize);
12809+
}
1280312810

1280412811
anMapLinearIdxToIJ.resize(kMax);
1280512812

@@ -12819,6 +12826,11 @@ ComputeInterBandCovarianceMatrixInternal(GDALDataset *poDS,
1281912826
return CE_Failure;
1282012827
}
1282112828

12829+
constexpr T ZERO{0};
12830+
std::fill(padfComomentMatrix,
12831+
padfComomentMatrix + static_cast<size_t>(nBandCount) * nBandCount,
12832+
0);
12833+
1282212834
{
1282312835
MySignedSize_t nLinearIdx = 0;
1282412836
for (int i = 0; i < nBandCount; ++i)
@@ -12941,7 +12953,6 @@ ComputeInterBandCovarianceMatrixInternal(GDALDataset *poDS,
1294112953
return eErr;
1294212954

1294312955
// Compute the mean of all bands for this block
12944-
constexpr auto ZERO = static_cast<T>(0);
1294512956
bool bNoBandHasNodata = false;
1294612957
for (int i = 0; i < nBandCount; ++i)
1294712958
{
@@ -13004,8 +13015,6 @@ ComputeInterBandCovarianceMatrixInternal(GDALDataset *poDS,
1300413015
// It is not obvious (to me) what should be the correct behavior.
1300513016
// The current approach has the benefit to avoid recomputing
1300613017
// the mean for each (i,j) tuple, but only for all i.
13007-
if (anCount[idxInMatrixI] == 0)
13008-
padfComomentMatrix[idxInMatrixI] = 0;
1300913018
if (nCount > 0)
1301013019
{
1301113020
padfComomentMatrix[idxInMatrixI] +=
@@ -13053,19 +13062,36 @@ ComputeInterBandCovarianceMatrixInternal(GDALDataset *poDS,
1305313062
// Optimized code path where there are no invalid value in the
1305413063
// current block
1305513064

13056-
MultiplyAByTransposeAUpperTriangle(
13057-
nNumThreads, adfCurBlockPixelsAllBands.data(),
13058-
adfCurBlockComomentMatrix.data(), nBandCount,
13059-
nThisBlockPixelCount);
13065+
if constexpr (!std::is_same_v<T, double>)
13066+
{
13067+
MultiplyAByTransposeAUpperTriangle(
13068+
nNumThreads, adfCurBlockPixelsAllBands.data(),
13069+
aCurBlockComomentMatrix.data(), nBandCount,
13070+
nThisBlockPixelCount);
13071+
}
13072+
else
13073+
{
13074+
MultiplyAByTransposeAUpperTriangle(
13075+
nNumThreads, adfCurBlockPixelsAllBands.data(),
13076+
padfComomentMatrix, nBandCount, nThisBlockPixelCount);
13077+
}
1306013078

1306113079
for (int i = 0; i < nBandCount; ++i)
1306213080
{
1306313081
for (int j = i; j < nBandCount; ++j)
1306413082
{
13065-
const auto idxInMatrixI =
13066-
static_cast<size_t>(i) * nBandCount + j;
13067-
UpdateGlobalValues(i, j, nThisBlockPixelCount,
13068-
adfCurBlockComomentMatrix[idxInMatrixI]);
13083+
if constexpr (!std::is_same_v<T, double>)
13084+
{
13085+
const auto idxInMatrixI =
13086+
static_cast<size_t>(i) * nBandCount + j;
13087+
UpdateGlobalValues(
13088+
i, j, nThisBlockPixelCount,
13089+
aCurBlockComomentMatrix[idxInMatrixI]);
13090+
}
13091+
else
13092+
{
13093+
UpdateGlobalValues(i, j, nThisBlockPixelCount, ZERO);
13094+
}
1306913095
}
1307013096
}
1307113097
}

0 commit comments

Comments
 (0)