Skip to content

Commit eaad950

Browse files
committed
Fix(PW): correct all nonlocal force/stress formulas for nspin=4/SOC
Fixed formula errors in multiple kernels that broke SOC tests like 035_PW_15_SO. The incorrect formula was: ps1*dbb2 + ps2*dbb1 (wrong) Correct formula verified from develop branch: ps1*dbb1 + ps2*dbb2 (correct) Affected kernels: - force_op.cpp: nonlocal force (deeq_nc) and DeltaSpin - stress_op.cpp: nonlocal stress (deeq_nc) - CUDA/ROCm versions of above All formulas now match develop branch implementations.
1 parent d644491 commit eaad950

6 files changed

Lines changed: 6 additions & 6 deletions

File tree

source/source_pw/module_pwdft/kernels/cuda/force_op.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ __global__ void cal_force_nl(
270270
const thrust::complex<FPTYPE> dbb1 = conj(dbecp[index0]) * becp[index1 + nkb];
271271
const thrust::complex<FPTYPE> dbb2 = conj(dbecp[index0 + nkb]) * becp[index1];
272272
const thrust::complex<FPTYPE> dbb3 = conj(dbecp[index0 + nkb]) * becp[index1 + nkb];
273-
const FPTYPE tmp = - fac * (ps0 * dbb0 + ps1 * dbb2 + ps2 * dbb1 + ps3 * dbb3).real();
273+
const FPTYPE tmp = - fac * (ps0 * dbb0 + ps1 * dbb1 + ps2 * dbb2 + ps3 * dbb3).real();
274274
atomicAdd(force + iat * forcenl_nc + ipol, tmp);
275275
}
276276
}

source/source_pw/module_pwdft/kernels/cuda/stress_op.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ __global__ void cal_stress_nl(
361361
const thrust::complex<FPTYPE> dbb1 = conj(dbecp[ib2 * nkb + inkb1]) * becp[(ib2+1) * nkb + inkb2];
362362
const thrust::complex<FPTYPE> dbb2 = conj(dbecp[(ib2+1) * nkb + inkb1]) * becp[ib2 * nkb + inkb2];
363363
const thrust::complex<FPTYPE> dbb3 = conj(dbecp[(ib2+1) * nkb + inkb1]) * becp[(ib2+1) * nkb + inkb2];
364-
stress_var -= fac * (ps0 * dbb0 + ps1 * dbb2 + ps2 * dbb1 + ps3 * dbb3).real();
364+
stress_var -= fac * (ps0 * dbb0 + ps1 * dbb1 + ps2 * dbb2 + ps3 * dbb3).real();
365365
}
366366
++iat;
367367
sum+=nproj;

source/source_pw/module_pwdft/kernels/force_op.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ struct cal_force_nl_op<FPTYPE, base_device::DEVICE_CPU>
254254
const std::complex<FPTYPE> dbb2 = conj(dbecp[index0 + nkb]) * becp[index1];
255255
const std::complex<FPTYPE> dbb3 = conj(dbecp[index0 + nkb]) * becp[index1 + nkb];
256256

257-
local_force[ipol] -= fac * (ps0 * dbb0 + ps1 * dbb2 + ps2 * dbb1 + ps3 * dbb3).real();
257+
local_force[ipol] -= fac * (ps0 * dbb0 + ps1 * dbb1 + ps2 * dbb2 + ps3 * dbb3).real();
258258
}
259259
}
260260
}

source/source_pw/module_pwdft/kernels/rocm/force_op.hip.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ __global__ void cal_force_nl(
256256
const thrust::complex<FPTYPE> dbb1 = conj(dbecp[index0]) * becp[index1 + nkb];
257257
const thrust::complex<FPTYPE> dbb2 = conj(dbecp[index0 + nkb]) * becp[index1];
258258
const thrust::complex<FPTYPE> dbb3 = conj(dbecp[index0 + nkb]) * becp[index1 + nkb];
259-
const FPTYPE tmp = - fac * (ps0 * dbb0 + ps1 * dbb2 + ps2 * dbb1 + ps3 * dbb3).real();
259+
const FPTYPE tmp = - fac * (ps0 * dbb0 + ps1 * dbb1 + ps2 * dbb2 + ps3 * dbb3).real();
260260
atomicAdd(force + iat * forcenl_nc + ipol, tmp);
261261
}
262262
}

source/source_pw/module_pwdft/kernels/rocm/stress_op.hip.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ __global__ void cal_stress_nl(
322322
const thrust::complex<FPTYPE> dbb1 = conj(dbecp[ib2 * nkb + inkb1]) * becp[(ib2+1) * nkb + inkb2];
323323
const thrust::complex<FPTYPE> dbb2 = conj(dbecp[(ib2+1) * nkb + inkb1]) * becp[ib2 * nkb + inkb2];
324324
const thrust::complex<FPTYPE> dbb3 = conj(dbecp[(ib2+1) * nkb + inkb1]) * becp[(ib2+1) * nkb + inkb2];
325-
stress_var -= fac * (ps0 * dbb0 + ps1 * dbb2 + ps2 * dbb1 + ps3 * dbb3).real();
325+
stress_var -= fac * (ps0 * dbb0 + ps1 * dbb1 + ps2 * dbb2 + ps3 * dbb3).real();
326326
}
327327
++iat;
328328
sum+=nproj;

source/source_pw/module_pwdft/kernels/stress_op.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ struct cal_stress_nl_op<FPTYPE, base_device::DEVICE_CPU>
232232
const std::complex<FPTYPE> dbb1 = conj(dbecp[ib2 * nkb + inkb1]) * becp[(ib2+1) * nkb + inkb2];
233233
const std::complex<FPTYPE> dbb2 = conj(dbecp[(ib2+1) * nkb + inkb1]) * becp[ib2 * nkb + inkb2];
234234
const std::complex<FPTYPE> dbb3 = conj(dbecp[(ib2+1) * nkb + inkb1]) * becp[(ib2+1) * nkb + inkb2];
235-
local_stress -= fac * (ps0 * dbb0 + ps1 * dbb2 + ps2 * dbb1 + ps3 * dbb3).real();
235+
local_stress -= fac * (ps0 * dbb0 + ps1 * dbb1 + ps2 * dbb2 + ps3 * dbb3).real();
236236
}
237237
} // end ip
238238
} // ia

0 commit comments

Comments
 (0)