Skip to content

Commit 72dd71c

Browse files
mohanchenabacus_fixer
andauthored
Fix potential Data race in OpenMP (#7345)
* fix data race * fix potential data race * fix data race in pw * fix potential data race * update format * fix data race in fft * fix * Fix data race in evolve_ofdft.cpp for TDOFDFT calculation. Cache shared member variables (nspin, nrxx, npw, gg, tpiba, tpiba2, gcar) to local const variables before OpenMP parallel regions to eliminate ThreadSanitizer false positive warnings. * Fix OpenMP barrier placement in pw_gatherscatter.h. The #pragma omp barrier directive must be inside an explicit #pragma omp parallel region, not after #pragma omp for which ends the implicit parallel region. This fixes undefined behavior when compiled with OpenMP enabled. * Remove obsolete OMP barrier comments from pw_gatherscatter.h * Rename local variables with underscore suffix in pw_gatherscatter.h to distinguish from member variables (nst -> nst_, nz -> nz_, etc.) * Rename local variables with underscore suffix in pw_transform.cpp to distinguish from member variables (nrxx -> nrxx_, npw -> npw_, nxyz -> nxyz_, nst -> nst_, nz -> nz_, nx -> nx_, ny -> ny_, nplane -> nplane_, ig2isz -> ig2isz_) * Rename local variables with underscore suffix in fft_cpu.cpp to distinguish from member variables (npy -> npy_, nx -> nx_, lixy -> lixy_, rixy -> rixy_, nplane -> nplane_, and FFTW plan objects) * Remove redundant #pragma omp barrier directives in pw_gatherscatter.h. The #pragma omp for directive already performs implicit synchronization at the end of the loop, making explicit barrier redundant. * add WARNING_QUIT --------- Co-authored-by: abacus_fixer <mohanchen@pku.eud.cn>
1 parent 6095b84 commit 72dd71c

8 files changed

Lines changed: 472 additions & 321 deletions

File tree

source/source_base/math_integral.cpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ void Integral::Simpson_Integral
3737
3838
// simpson's rule integrator for function stored on the
3939
// radial logarithmic mesh
40-
// routine assumes that mesh is an odd number so run check
40+
// routine assumes that mesh is an odd number so run check
4141
if (mesh % 2 == 0)
4242
{
4343
std::cout << "\n error in subroutine simpson ";
@@ -83,21 +83,21 @@ void Integral::Simpson_Integral
8383
*/
8484
// simpson's rule integrator for function stored on the
8585
// radial logarithmic mesh
86-
// routine assumes that mesh is an odd number so run check
86+
// routine assumes that mesh is an odd number so run check
8787
assert(mesh&1);
8888

8989
asum = 0.00;
90-
const size_t end = mesh-2;
90+
const size_t end = mesh-2;
9191
for( size_t i=1; i!=end; i+=2 )
9292
{
93-
const double f1 = func[i]*rab[i];
94-
asum += f1 + f1 + func[i+1]*rab[i+1];
93+
const double f1 = func[i]*rab[i];
94+
asum += f1 + f1 + func[i+1]*rab[i+1];
9595
}
96-
const double f1 = func[mesh-2]*rab[mesh-2];
97-
asum += f1+f1;
98-
asum += asum;
99-
asum += func[0]*rab[0] + func[mesh-1]*rab[mesh-1];
100-
asum /= 3.0;
96+
const double f1 = func[mesh-2]*rab[mesh-2];
97+
asum += f1+f1;
98+
asum += asum;
99+
asum += func[0]*rab[0] + func[mesh-1]*rab[mesh-1];
100+
asum /= 3.0;
101101
return;
102102
}// end subroutine simpson
103103

@@ -124,21 +124,21 @@ void Integral::Simpson_Integral
124124
*/
125125
// simpson's rule integrator for function stored on the
126126
// radial logarithmic mesh
127-
// routine assumes that mesh is an odd number so run check
127+
// routine assumes that mesh is an odd number so run check
128128
assert(mesh&1);
129129

130130
asum = 0.00;
131-
const size_t end = mesh-2;
131+
const size_t end = mesh-2;
132132
for(size_t i=1; i!=end; i+=2 )
133133
{
134-
const double f1 = func[i];
135-
asum += f1 + f1 + func[i+1];
134+
const double f1 = func[i];
135+
asum += f1 + f1 + func[i+1];
136136
}
137-
const double f1 = func[mesh-2];
138-
asum += f1+f1;
139-
asum += asum;
140-
asum += func[0] + func[mesh-1];
141-
asum *= dr/3.0;
137+
const double f1 = func[mesh-2];
138+
asum += f1+f1;
139+
asum += asum;
140+
asum += func[0] + func[mesh-1];
141+
asum *= dr/3.0;
142142
return;
143143
}// end subroutine simpson
144144

@@ -220,10 +220,10 @@ void Integral::Simpson_Integral_alltoinf
220220

221221
const double asum_all = asum[mesh-1];
222222
for (int i = 0;i < mesh; ++i)
223-
{
223+
{
224224
asum[i] = asum_all - asum[i];
225-
}
226-
return;
225+
}
226+
return;
227227
}
228228

229229
double Integral::simpson(const int n, const double* const f, const double dx)

source/source_base/module_fft/fft_cpu.cpp

Lines changed: 80 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -344,60 +344,86 @@ void FFT_CPU<double>::clear()
344344
template <>
345345
void FFT_CPU<double>::fftxyfor(std::complex<double>* in, std::complex<double>* out) const
346346
{
347-
int npy = this->nplane * this->ny;
347+
const int npy_ = this->nplane * this->ny;
348+
const int nx_ = this->nx;
349+
const int lixy_ = this->lixy;
350+
const int rixy_ = this->rixy;
351+
const int nplane_ = this->nplane;
352+
const fftw_plan planyfor_ = this->planyfor;
353+
const fftw_plan planxfor1_ = this->planxfor1;
354+
const fftw_plan planxfor2_ = this->planxfor2;
348355
if (this->xprime)
349356
{
350357

351-
fftw_execute_dft(this->planxfor1, (fftw_complex*)in, (fftw_complex*)out);
358+
fftw_execute_dft(planxfor1_, (fftw_complex*)in, (fftw_complex*)out);
359+
#ifdef _OPENMP
352360
#pragma omp parallel for
353-
for (int i = 0; i < this->lixy + 1; ++i)
361+
#endif
362+
for (int i = 0; i < lixy_ + 1; ++i)
354363
{
355-
fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]);
364+
fftw_execute_dft(planyfor_, (fftw_complex*)&in[i * npy_], (fftw_complex*)&out[i * npy_]);
356365
}
366+
#ifdef _OPENMP
357367
#pragma omp parallel for
358-
for (int i = rixy; i < this->nx; ++i)
368+
#endif
369+
for (int i = rixy_; i < nx_; ++i)
359370
{
360-
fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]);
371+
fftw_execute_dft(planyfor_, (fftw_complex*)&in[i * npy_], (fftw_complex*)&out[i * npy_]);
361372
}
362373
}
363374
else
364375
{
376+
#ifdef _OPENMP
365377
#pragma omp parallel for
366-
for (int i = 0; i < this->nx; ++i)
378+
#endif
379+
for (int i = 0; i < nx_; ++i)
367380
{
368-
fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]);
381+
fftw_execute_dft(planyfor_, (fftw_complex*)&in[i * npy_], (fftw_complex*)&out[i * npy_]);
369382
}
370-
fftw_execute_dft(this->planxfor1, (fftw_complex*)in, (fftw_complex*)out);
371-
fftw_execute_dft(this->planxfor2, (fftw_complex*)&in[rixy * nplane], (fftw_complex*)&out[rixy * nplane]);
383+
fftw_execute_dft(planxfor1_, (fftw_complex*)in, (fftw_complex*)out);
384+
fftw_execute_dft(planxfor2_, (fftw_complex*)&in[rixy_ * nplane_], (fftw_complex*)&out[rixy_ * nplane_]);
372385
}
373386
}
374387

375388
template <>
376389
void FFT_CPU<double>::fftxybac(std::complex<double>* in,std::complex<double>* out) const
377390
{
378-
int npy = this->nplane * this->ny;
391+
const int npy_ = this->nplane * this->ny;
392+
const int nx_ = this->nx;
393+
const int lixy_ = this->lixy;
394+
const int rixy_ = this->rixy;
395+
const int nplane_ = this->nplane;
396+
const fftw_plan planybac_ = this->planybac;
397+
const fftw_plan planxbac1_ = this->planxbac1;
398+
const fftw_plan planxbac2_ = this->planxbac2;
379399
if (this->xprime)
380400
{
401+
#ifdef _OPENMP
381402
#pragma omp parallel for
382-
for (int i = 0; i < this->lixy + 1; ++i)
403+
#endif
404+
for (int i = 0; i < lixy_ + 1; ++i)
383405
{
384-
fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]);
406+
fftw_execute_dft(planybac_, (fftw_complex*)&in[i * npy_], (fftw_complex*)&out[i * npy_]);
385407
}
408+
#ifdef _OPENMP
386409
#pragma omp parallel for
387-
for (int i = rixy; i < this->nx; ++i)
410+
#endif
411+
for (int i = rixy_; i < nx_; ++i)
388412
{
389-
fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]);
413+
fftw_execute_dft(planybac_, (fftw_complex*)&in[i * npy_], (fftw_complex*)&out[i * npy_]);
390414
}
391-
fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)out);
415+
fftw_execute_dft(planxbac1_, (fftw_complex*)in, (fftw_complex*)out);
392416
}
393417
else
394418
{
395-
fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)out);
396-
fftw_execute_dft(this->planxbac2, (fftw_complex*)&in[rixy * nplane], (fftw_complex*)&out[rixy * nplane]);
419+
fftw_execute_dft(planxbac1_, (fftw_complex*)in, (fftw_complex*)out);
420+
fftw_execute_dft(planxbac2_, (fftw_complex*)&in[rixy_ * nplane_], (fftw_complex*)&out[rixy_ * nplane_]);
421+
#ifdef _OPENMP
397422
#pragma omp parallel for
398-
for (int i = 0; i < this->nx; ++i)
423+
#endif
424+
for (int i = 0; i < nx_; ++i)
399425
{
400-
fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]);
426+
fftw_execute_dft(planybac_, (fftw_complex*)&in[i * npy_], (fftw_complex*)&out[i * npy_]);
401427
}
402428
}
403429
}
@@ -417,47 +443,67 @@ void FFT_CPU<double>::fftzbac(std::complex<double>* in, std::complex<double>* ou
417443
template <>
418444
void FFT_CPU<double>::fftxyr2c(double* in, std::complex<double>* out) const
419445
{
420-
int npy = this->nplane * this->ny;
446+
const int npy_ = this->nplane * this->ny;
447+
const int nx_ = this->nx;
448+
const int lixy_ = this->lixy;
449+
const fftw_plan planxr2c_ = this->planxr2c;
450+
const fftw_plan planyfor_ = this->planyfor;
451+
const fftw_plan planyr2c_ = this->planyr2c;
452+
const fftw_plan planxfor1_ = this->planxfor1;
421453
if (this->xprime)
422454
{
423-
fftw_execute_dft_r2c(this->planxr2c, in, (fftw_complex*)out);
455+
fftw_execute_dft_r2c(planxr2c_, in, (fftw_complex*)out);
456+
#ifdef _OPENMP
424457
#pragma omp parallel for
425-
for (int i = 0; i < this->lixy + 1; ++i)
458+
#endif
459+
for (int i = 0; i < lixy_ + 1; ++i)
426460
{
427-
fftw_execute_dft(this->planyfor, (fftw_complex*)&out[i * npy], (fftw_complex*)&out[i * npy]);
461+
fftw_execute_dft(planyfor_, (fftw_complex*)&out[i * npy_], (fftw_complex*)&out[i * npy_]);
428462
}
429463
}
430464
else
431465
{
466+
#ifdef _OPENMP
432467
#pragma omp parallel for
433-
for (int i = 0; i < this->nx; ++i)
468+
#endif
469+
for (int i = 0; i < nx_; ++i)
434470
{
435-
fftw_execute_dft_r2c(this->planyr2c, &in[i * npy], (fftw_complex*)&out[i * npy]);
471+
fftw_execute_dft_r2c(planyr2c_, &in[i * npy_], (fftw_complex*)&out[i * npy_]);
436472
}
437-
fftw_execute_dft(this->planxfor1, (fftw_complex*)out, (fftw_complex*)out);
473+
fftw_execute_dft(planxfor1_, (fftw_complex*)out, (fftw_complex*)out);
438474
}
439475
}
440476

441477
template <>
442478
void FFT_CPU<double>::fftxyc2r(std::complex<double> *in,double *out) const
443479
{
444-
int npy = this->nplane * this->ny;
480+
const int npy_ = this->nplane * this->ny;
481+
const int nx_ = this->nx;
482+
const int lixy_ = this->lixy;
483+
const fftw_plan planybac_ = this->planybac;
484+
const fftw_plan planxc2r_ = this->planxc2r;
485+
const fftw_plan planxbac1_ = this->planxbac1;
486+
const fftw_plan planyc2r_ = this->planyc2r;
445487
if (this->xprime)
446488
{
489+
#ifdef _OPENMP
447490
#pragma omp parallel for
448-
for (int i = 0; i < this->lixy + 1; ++i)
491+
#endif
492+
for (int i = 0; i < lixy_ + 1; ++i)
449493
{
450-
fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&in[i * npy]);
494+
fftw_execute_dft(planybac_, (fftw_complex*)&in[i * npy_], (fftw_complex*)&in[i * npy_]);
451495
}
452-
fftw_execute_dft_c2r(this->planxc2r, (fftw_complex*)in, out);
496+
fftw_execute_dft_c2r(planxc2r_, (fftw_complex*)in, out);
453497
}
454498
else
455499
{
456-
fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)in);
500+
fftw_execute_dft(planxbac1_, (fftw_complex*)in, (fftw_complex*)in);
501+
#ifdef _OPENMP
457502
#pragma omp parallel for
458-
for (int i = 0; i < this->nx; ++i)
503+
#endif
504+
for (int i = 0; i < nx_; ++i)
459505
{
460-
fftw_execute_dft_c2r(this->planyc2r, (fftw_complex*)&in[i * npy], &out[i * npy]);
506+
fftw_execute_dft_c2r(planyc2r_, (fftw_complex*)&in[i * npy_], &out[i * npy_]);
461507
}
462508
}
463509
}

0 commit comments

Comments
 (0)