Skip to content

Commit fd0a9ef

Browse files
antoinewdgLuthaf
authored andcommitted
Uniformize memory access
1 parent 6da0482 commit fd0a9ef

File tree

1 file changed

+58
-30
lines changed

1 file changed

+58
-30
lines changed

src/core/src/energy/global/ewald.rs

Lines changed: 58 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -378,12 +378,9 @@ impl Ewald {
378378
return energy;
379379
}
380380

381-
/// Loop over each k_space force.
382-
///
383-
/// Abstracts over the very similar computations in `kspace_forces`
384-
/// and `kspace_virial`.
385-
fn for_each_kspace_force<F>(&mut self, system: &System, mut callback: F)
386-
where F : FnMut(usize, usize, Vector3D){
381+
/// k-space contribution to the forces
382+
fn kspace_forces(&mut self, system: &System, forces: &mut [Vector3D]) {
383+
assert_eq!(forces.len(), system.size());
387384
self.density_fft(system);
388385

389386
let factor = 4.0 * PI / (system.cell().volume() * ELCC);
@@ -394,62 +391,93 @@ impl Ewald {
394391
for ikz in 0..self.kmax {
395392
// The k = 0 and the cutoff in k-space are already handled
396393
// in `expfactors`.
397-
let expfactor = self.expfactors[(ikx, iky, ikz)].abs() ;
398-
if expfactor < f64::EPSILON {continue}
394+
let expfactor = self.expfactors[(ikx, iky, ikz)].abs();
395+
if expfactor < f64::EPSILON { continue }
399396

400397
let f = expfactor * factor;
401398
let k = (ikx as f64) * rec_kx + (iky as f64) * rec_ky + (ikz as f64) * rec_kz;
402399

403400
for i in 0..system.size() {
404401
let qi = system[i].charge;
405-
let fourier_i = self.fourier_phases[(ikx, i, 0)].imag_mul(
406-
self.fourier_phases[(iky, i, 1)] * self.fourier_phases[(ikz, i, 2)]
407-
);
402+
403+
let fourier_i = self.fourier_phases[(ikx, i, 0)] *
404+
self.fourier_phases[(iky, i, 1)] *
405+
self.fourier_phases[(ikz, i, 2)];
406+
let fourier_i = fourier_i.imag();
407+
408+
let mut force_i = Vector3D::zero();
409+
408410
for j in (i + 1)..system.size() {
409411
let qj = system[j].charge;
410412
let force = f * self.kspace_force_factor(j, ikx, iky, ikz, qi, qj, fourier_i) * k;
411-
callback(i, j, force);
413+
force_i -= force;
414+
forces[j] += force;
412415
}
416+
417+
forces[i] += force_i;
413418
}
414419
}
415420
}
416421
}
417422
}
418423

419-
/// k-space contribution to the forces
420-
fn kspace_forces(&mut self, system: &System, forces: &mut [Vector3D]) {
421-
assert_eq!(forces.len(), system.size());
422-
423-
self.for_each_kspace_force(system, |i, j, force|{
424-
forces[i] -= force;
425-
forces[j] += force;
426-
});
427-
}
428-
429424
/// Get the force factor for particles `i` and `j` with charges `qi` and
430425
/// `qj`, at k point `(ikx, iky, ikz)`
431426
#[inline]
432427
#[allow(too_many_arguments)]
433428
fn kspace_force_factor(&self, j: usize, ikx: usize, iky: usize, ikz: usize,
434429
qi: f64, qj: f64, fourier_i: f64) -> f64 {
435430

436-
let fourier_j = self.fourier_phases[(ikx, j, 0)].imag_mul(
437-
self.fourier_phases[(iky, j, 1)] * self.fourier_phases[(ikz, j, 2)]
438-
);
431+
// Here the compiler is smart enough to optimize away
432+
// the useless computation of the last real part.
433+
let fourier_j = self.fourier_phases[(ikx, j, 0)] *
434+
self.fourier_phases[(iky, j, 1)] *
435+
self.fourier_phases[(ikz, j, 2)];
436+
let fourier_j = fourier_j.imag();
439437

440438
return qi * qj * (fourier_i - fourier_j);
441439
}
442440

443441
/// k-space contribution to the virial
444442
fn kspace_virial(&mut self, system: &System) -> Matrix3 {
443+
self.density_fft(system);
445444
let mut virial = Matrix3::zero();
446445

447-
self.for_each_kspace_force(system, |i, j, force|{
448-
let rij = system.nearest_image(i, j);
449-
virial += force.tensorial(&rij);
450-
});
446+
let factor = 4.0 * PI / (system.cell().volume() * ELCC);
447+
let (rec_kx, rec_ky, rec_kz) = system.cell().reciprocal_vectors();
451448

452-
return virial;
449+
for ikx in 0..self.kmax {
450+
for iky in 0..self.kmax {
451+
for ikz in 0..self.kmax {
452+
// The k = 0 and the cutoff in k-space are already handled
453+
// in `expfactors`.
454+
let expfactor = self.expfactors[(ikx, iky, ikz)].abs();
455+
if expfactor < f64::EPSILON { continue }
456+
457+
let f = expfactor * factor;
458+
let k = (ikx as f64) * rec_kx + (iky as f64) * rec_ky + (ikz as f64) * rec_kz;
459+
460+
for i in 0..system.size() {
461+
let qi = system[i].charge;
462+
463+
let fourier_i = self.fourier_phases[(ikx, i, 0)] *
464+
self.fourier_phases[(iky, i, 1)] *
465+
self.fourier_phases[(ikz, i, 2)];
466+
let fourier_i = fourier_i.imag();
467+
468+
for j in (i + 1)..system.size() {
469+
let qj = system[j].charge;
470+
let force = f * self.kspace_force_factor(j, ikx, iky, ikz, qi, qj, fourier_i) * k;
471+
let rij = system.nearest_image(i, j);
472+
virial += force.tensorial(&rij);
473+
}
474+
475+
}
476+
}
477+
}
478+
}
479+
480+
virial
453481
}
454482

455483
fn compute_delta_rho_move_particles(&mut self, system: &System, idxes: &[usize], newpos: &[Vector3D]) {

0 commit comments

Comments
 (0)