diff --git a/src/hamiltonian/relativistic_projector.hpp b/src/hamiltonian/relativistic_projector.hpp index 1d77c263..900b7871 100644 --- a/src/hamiltonian/relativistic_projector.hpp +++ b/src/hamiltonian/relativistic_projector.hpp @@ -134,8 +134,10 @@ class relativistic_projector { return beta_; } + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + template - gpu::array project(states::orbital_set const & phi, KpointType const & kpoint) const { + gpu::array project(states::orbital_set const & phi, KpointType const & kpoint) const { gpu::array sphere_phi({sphere_.size(), phi.local_spinor_set_size(), 2}); @@ -146,25 +148,26 @@ class relativistic_projector { sgr[ipoint][ist][1] = gr[point[0]][point[1]][point[2]][1][ist]; }); - gpu::array projections({nproj_, phi.local_spinor_set_size(), 2}); + gpu::array projections({nproj_, phi.local_spinor_set_size()}); gpu::run(phi.local_spinor_set_size(), nproj_, [proj = begin(projections), sgr = begin(sphere_phi), bet = begin(beta_), np = sphere_.size(), vol = phi.basis().volume_element()] GPU_LAMBDA (auto ist, auto iproj) { - proj[iproj][ist][0] = 0.0; - proj[iproj][ist][1] = 0.0; + proj[iproj][ist] = 0.0; for(int ip = 0; ip < np; ip++) { - proj[iproj][ist][0] += bet[iproj][ip][0]*sgr[ip][ist][0]; - proj[iproj][ist][1] += bet[iproj][ip][1]*sgr[ip][ist][1]; + proj[iproj][ist] += bet[iproj][ip][0]*sgr[ip][ist][0] + bet[iproj][ip][1]*sgr[ip][ist][1]; } - proj[iproj][ist][0] *= vol; - proj[iproj][ist][1] *= vol; + proj[iproj][ist] *= vol; }); - //missing parallel reduction of projections + if(phi.basis().comm().size() > 1) { + phi.basis().comm().all_reduce_in_place_n(raw_pointer_cast(projections.data_elements()), projections.num_elements(), std::plus<>{}); + } return projections; } + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + template void apply(states::orbital_set const & phi, states::orbital_set & vnlphi, KpointType const & kpoint) const { @@ -172,31 +175,36 @@ class relativistic_projector { gpu::run(phi.local_spinor_set_size(), nproj_, [proj = begin(projections), coe = begin(kb_coeff_)] GPU_LAMBDA (auto ist, auto iproj) { - - // std::cout << "PROJ " << iproj << '\t' << proj[iproj][ist][0] << '\t' << proj[iproj][ist][1] << '\t' << coe[iproj] << std::endl; - - proj[iproj][ist][0] *= coe[iproj]; - proj[iproj][ist][1] *= coe[iproj]; - + proj[iproj][ist] *= coe[iproj]; }); gpu::run(phi.local_spinor_set_size(), sphere_.size(), [gr = begin(vnlphi.spinor_hypercubic()), sph = sphere_.ref(), nproj = nproj_, bet = begin(beta_), proj = begin(projections)] GPU_LAMBDA (auto ist, auto ip){ auto point = sph.grid_point(ip); + + auto red0 = complex(0.0, 0.0); + auto red1 = complex(0.0, 0.0); for(int iproj = 0; iproj < nproj; iproj++) { - gr[point[0]][point[1]][point[2]][0][ist] += conj(bet[iproj][ip][0])*(proj[iproj][ist][0] + proj[iproj][ist][1]); - gr[point[0]][point[1]][point[2]][1][ist] += conj(bet[iproj][ip][1])*(proj[iproj][ist][0] + proj[iproj][ist][1]); + auto pp = proj[iproj][ist]; + red0 += conj(bet[iproj][ip][0])*pp; + red1 += conj(bet[iproj][ip][1])*pp; } + + gpu::atomic::add(&gr[point[0]][point[1]][point[2]][0][ist], red0); + gpu::atomic::add(&gr[point[0]][point[1]][point[2]][1][ist], red1); + }); } + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + template double energy(states::orbital_set const & phi, Occupations const & occupations, KpointType const & kpoint) const { auto projections = project(phi, kpoint); return gpu::run(gpu::reduce(phi.local_spinor_set_size()), gpu::reduce(nproj_), 0.0, [proj = begin(projections), coe = begin(kb_coeff_), occ = begin(occupations)] GPU_LAMBDA (auto ist, auto iproj) { - auto pp = proj[iproj][ist][0] + proj[iproj][ist][1]; + auto pp = proj[iproj][ist]; return real(conj(pp)*pp)*coe[iproj]*occ[ist]; }); } diff --git a/tests/helium_fr.cpp b/tests/helium_fr.cpp index a69fbec5..81bc0a15 100644 --- a/tests/helium_fr.cpp +++ b/tests/helium_fr.cpp @@ -23,18 +23,16 @@ int main(int argc, char ** argv){ auto result = inq::ground_state::calculate(ions, electrons, inq::options::theory{}.pbe(), inq::options::ground_state{}.energy_tolerance(1e-8_Ha)); - /* - energy_match.check("total energy", result.energy.total(), -0.445160072256); - energy_match.check("kinetic energy", result.energy.kinetic(), 0.414315464604); - energy_match.check("eigenvalues", result.energy.eigenvalues(), -0.234029035766); - energy_match.check("Hartree energy", result.energy.hartree(), 0.281309132025); - energy_match.check("external energy", result.energy.external(), -0.909266382097); - energy_match.check("non-local energy", result.energy.non_local(), 0.000000000000); - energy_match.check("XC energy", result.energy.xc(), -0.231518286787); - energy_match.check("XC density integral", result.energy.nvxc(), -0.301696382322); - energy_match.check("HF exchange energy", result.energy.exact_exchange(), 0.000000000000); - energy_match.check("ion-ion energy", result.energy.ion(), 0.000000000000); - */ + energy_match.check("total energy", result.energy.total(), -2.860762587218); + energy_match.check("kinetic energy", result.energy.kinetic(), 2.504793796156); + energy_match.check("eigenvalues", result.energy.eigenvalues(), -1.161792830932); + energy_match.check("Hartree energy", result.energy.hartree(), 1.986218327002); + energy_match.check("external energy", result.energy.external(), -4.892155878650); + energy_match.check("non-local energy", result.energy.non_local(), -1.440150012679); + energy_match.check("XC energy", result.energy.xc(), -1.019468819047); + energy_match.check("XC density integral", result.energy.nvxc(), -1.306717389763); + energy_match.check("HF exchange energy", result.energy.exact_exchange(), 0.000000000000); + energy_match.check("ion-ion energy", result.energy.ion(), 0.000000000000); return energy_match.fail();