Skip to content

Commit e614606

Browse files
authored
Fix: improve the experience of using molden.py postprocessing tool by both developer and user (#4942)
* Tools: add a basic example of molden output * deprecate basinhopping and update initial guess method * Polish the stdout * add warning information
1 parent 3a3d535 commit e614606

4 files changed

Lines changed: 181 additions & 47 deletions

File tree

tools/molden.py renamed to tools/molden/molden.py

Lines changed: 137 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,10 @@ def build(self, rgrid, normalize = True):
117117
cgto = np.zeros_like(rgrid)
118118
# print(self.NumericalRadials[it][l][i])
119119
for a, c in self.NumericalRadials[it][l][i]: # for each primitive GTO
120-
cgto += GTORadials._build_gto(a, c, l, rgrid, normalize)
120+
cgto += GTORadials._build_gto(a, c, l, rgrid)
121+
if normalize:
122+
norm = np.sqrt(np.sum(cgto**2 * rgrid**2))
123+
cgto /= norm
121124
out[it][l].append(cgto)
122125
return out
123126

@@ -202,13 +205,11 @@ def _cgto_parse(data):
202205

203206
return elems, out
204207

205-
def _build_gto(a, c, l, r, normalize):
208+
def _build_gto(a, c, l, r):
206209
"""build one GTO defined by coefficients c, exponents a, angular momentum l and map on
207210
radial grid r"""
208211
import numpy as np
209212
g = c * np.exp(-a * r**2) * r**l
210-
if normalize:
211-
g /= np.sqrt(np.trapz(g**2, r))
212213
return g
213214

214215
def __str__(self) -> str:
@@ -264,7 +265,7 @@ def molden(self, it, iat) -> str:
264265
out += "\n"
265266
return out
266267

267-
def fit_radial_with_gto(nao, ngto, l, r):
268+
def fit_radial_with_gto(nao, ngto, l, r, rel_r=2):
268269
"""fit one radial function mapped on grid with GTOs
269270
270271
Args:
@@ -273,8 +274,8 @@ def fit_radial_with_gto(nao, ngto, l, r):
273274
l: int, the angular momentum.
274275
r: numpy array, the grid points.
275276
"""
276-
from scipy.optimize import basinhopping
277-
from scipy.integrate import simps
277+
from scipy.optimize import minimize
278+
from scipy.integrate import simpson
278279
import numpy as np
279280
def f(a_and_c, nao=nao, ngto=ngto, l=l, r=r):
280281
"""calculate the distance between the nao and superposition of GTOs of given
@@ -283,7 +284,7 @@ def f(a_and_c, nao=nao, ngto=ngto, l=l, r=r):
283284
assert len(c) == len(a), f"Invalid basis: {c}, {a}"
284285
gto = np.zeros_like(r)
285286
for i in range(len(c)):
286-
gto += GTORadials._build_gto(a[i], c[i], l, r, False)
287+
gto += GTORadials._build_gto(a[i], c[i], l, r)
287288
dorb = gto - nao
288289
if l == 0:
289290
return np.sum(dorb**2)
@@ -294,29 +295,55 @@ def f(a_and_c, nao=nao, ngto=ngto, l=l, r=r):
294295
dorb = dorb[1:]
295296
return np.sum((dorb/r**l)**2)
296297

297-
init = np.random.rand(ngto + ngto)
298-
# find optimal c and a values
298+
def gto_guess(nao, ngto, l, r, rel_r=2):
299+
"""generate the initial guess for the coefficients and exponents of GTOs.
300+
The GTO has form like c * exp(-a * r^2) * r^l, where c is the coefficient,
301+
the l will push the maxima from r = 0 to positive value. On the other hand
302+
the standard Gaussian function is 1/sqrt(2*simga^2) * exp(-r^2/(2*sigma^2)),
303+
, where the mu as taken to be zero.
304+
Therefore a = 1/(2*sigma^2), sigma = 1/sqrt(2*a). We set 3sigma = rmax, then
305+
the smallest a is guessed to be 9/(2*rmax^2), then the second smallest to be
306+
a*rel_r, which means the sigma will be shrink by factor sqrt(rel_r), and so
307+
on. c is set as the generalized cosine value between function to fit and the
308+
GTO with c = 1 and a setted.
309+
"""
310+
amin = 3**2 / (2 * r[-1]**2)
311+
a_init = np.zeros(ngto)
312+
for i in range(ngto):
313+
a_init[i] = amin * rel_r**(i + 1)
314+
c_init = np.zeros(ngto)
315+
for i in range(ngto):
316+
model = GTORadials._build_gto(a_init[i], 1, l, r)
317+
c_init[i] = simpson(nao * model * r**2, x=r)
318+
c_init[i] /= np.sqrt(simpson(model**2 * r**2, x=r))
319+
return np.concatenate((a_init, c_init))
320+
321+
init = gto_guess(nao, ngto, l, r, rel_r)
299322

300323
# bounds for c and a
301-
bounds = [(0, 5) for i in range(ngto)] + [(-np.inf, np.inf) for i in range(ngto)]
302-
res = basinhopping(f, init, niter=100, minimizer_kwargs={"method": "L-BFGS-B", "bounds": bounds}, disp=True)
303-
#res = minimize(f, init, bounds=bounds, method="L-BFGS-B", options={"maxiter": 1000, "disp": True, "ftol": 1e-10})
324+
bounds = [(0, np.inf) for i in range(ngto)] + [(-np.inf, np.inf) for i in range(ngto)]
325+
#res = basinhopping(f, init, niter=100, minimizer_kwargs={"method": "L-BFGS-B", "bounds": bounds}, disp=True)
326+
res = minimize(f, init, bounds=bounds, method="L-BFGS-B",
327+
options={"maxiter": 5000, "disp": False, "ftol": 1e-10, "gtol": 1e-10})
304328
a, c = res.x[:ngto], res.x[ngto:]
305329
err = res.fun
306-
# renormailize the coefficients
307-
gto_obj = GTORadials()
308-
gto_obj.register_cgto(a, c, l, 'w')
309-
out = gto_obj.build(r)[0][l][0]
310-
norm_gto = np.sqrt(simps(out**2*r**2, r))
311-
norm_nao = np.sqrt(simps(nao**2*r**2, r))
312-
print(f"norm loss ratio: {norm_gto/norm_nao:>.4f}, renormalize.")
313-
c /= norm_gto / norm_nao
330+
331+
cgto = GTORadials()
332+
cgto.register_cgto(a, c, l, 'w')
333+
out = cgto.build(r, False)
334+
norm_nao = simpson(nao**2 * r**2, x=r)
335+
norm_gto = simpson(out[0][l][0]**2 * r**2, x=r)
336+
factor = np.sqrt(norm_nao / norm_gto)
337+
print(f"NAO2GTO: Renormalize the CGTO from NAO2GTO method with factor {factor:.4f}")
338+
c *= factor # renormalize the coefficients to make the norm of GTO equals to that of NAO
314339

315340
print(f"""NAO2GTO: Angular momentum {l}, with {ngto} superposition to fit numerical atomic orbitals on given grid,
316-
this method refers to H. Shang et al. Summary:\nNonlinear fitting error: {err}\nCoefficients and exponents of primitive
317-
Gaussian Type Orbitals (GTOs):\n{"a":>10} {"c":>10}\n---------------------""")
341+
Nonlinear fitting error: {err:.4e}
342+
Exponential and contraction coefficients of primitive GTOs in a.u.:
343+
{"a":>10} {"c":>10}\n---------------------""")
318344
for i in range(ngto):
319345
print(f"{a[i]:10.6f} {c[i]:10.6f}")
346+
print(f"\nNAO2GTO: The fitted GTOs are saved in the CGTO instance.")
320347
return a, c
321348

322349
def read_nao(fpath):
@@ -372,13 +399,14 @@ def read_nao(fpath):
372399

373400
return {'elem': elem, 'ecut': ecut, 'rcut': rcut, 'nr': nr, 'dr': dr, 'chi': chi}
374401

375-
def convert_nao_to_gto(fnao, fgto = None, ngto: int = 7):
402+
def convert_nao_to_gto(fnao, fgto = None, ngto: int = 7, rel_r: float = 2):
376403
"""convert the numerical atomic orbitals to GTOs. Each chi (or say the zeta function)
377404
corresponds to a CGTO (contracted GTO), and the GTOs are fitted to the radial functions.
378405
Which also means during the SCF, the coefficient inside each CGTO is unchanged, while the
379406
coefficients of CGTO will be optimized."""
380407
import matplotlib.pyplot as plt
381408
import numpy as np
409+
import os
382410

383411
gto = GTORadials()
384412
# read the numerical atomic orbitals
@@ -390,7 +418,7 @@ def convert_nao_to_gto(fnao, fgto = None, ngto: int = 7):
390418
for l in range(lmax+1):
391419
nchi = len(nao["chi"][l])
392420
for i in range(nchi):
393-
a, c = fit_radial_with_gto(nao["chi"][l][i], ngto, l, rgrid)
421+
a, c = fit_radial_with_gto(nao["chi"][l][i], ngto, l, rgrid, rel_r)
394422
gto.register_cgto(a, c, l, symbol, 'a')
395423

396424
# draw the fitted GTOs
@@ -400,10 +428,12 @@ def convert_nao_to_gto(fnao, fgto = None, ngto: int = 7):
400428
for ic in range(len(out[it][l])):
401429
plt.plot(rgrid, out[it][l][ic], label=f"element {symbol}, l={l}, ic={ic}")
402430
plt.legend()
403-
plt.savefig(fnao.replace(".orb", ".gto.png"))
431+
432+
fgto = os.path.basename(fnao).replace(".orb", "") + ".gto" if fgto is None else fgto
433+
fgto = fnao.replace(os.path.basename(fnao), fgto) # make sure that only the file name is changed
434+
plt.savefig(fgto + ".png")
404435
plt.close()
405436

406-
fgto = fnao.replace(".orb", ".gto") if fgto is None else fgto
407437
with open(fgto, "w") as f:
408438
f.write(str(gto))
409439

@@ -452,10 +482,7 @@ def read_abacus_lowf(flowf, pat=r'^WFC_NAO_(K\d+|GAMMA(1|2)).txt$'):
452482
# discard the first two lines
453483
lines = lines[2:]
454484
# initialize lists
455-
occ = []
456-
band = []
457-
ener = []
458-
data = []
485+
occ, band, ener, data = [], [], [], []
459486

460487
# read nbands and nlocal
461488
i = 0
@@ -483,8 +510,8 @@ def read_abacus_lowf(flowf, pat=r'^WFC_NAO_(K\d+|GAMMA(1|2)).txt$'):
483510
data = [d.real for d in data]
484511
data = np.array(data).reshape(nbands, nlocal)
485512
if data.shape != (nbands, nlocal):
486-
print(f"nbands = {nbands}, nlocal = {nlocal}")
487-
print(f"data.shape = {data.shape}")
513+
print(f"ERROR: nbands = {nbands}, nlocal = {nlocal}")
514+
print(f"ERROR: data.shape = {data.shape}")
488515
raise ValueError("Data read from file is not consistent with expected size.")
489516

490517
return nbands, nlocal, occ, band, ener, data
@@ -681,10 +708,22 @@ def read_stru(fpath):
681708
return stru
682709

683710
def write_molden_cell(const, vec):
684-
"""The Molden requires the cell information in Angstrom, while ABACUS uses Bohr."""
711+
"""The Molden requires the cell information in Angstrom, while ABACUS uses Bohr.
712+
713+
Args:
714+
const (float): the `LATTICE_CONSTANT` set in ABACUS STRU file, always used for
715+
scaling the cell vectors and atomic positions if not set `*_Angstrom` explicitly.
716+
This quantity actually have unit as Bohr.
717+
vec (list): the cell vectors, dimensionless, 3 x 3 matrix
718+
719+
Returns:
720+
str: the string formatted in Molden format
721+
"""
685722
out = "[Cell]\n"
686723
assert len(vec) == 3
687724
assert all(len(v) == 3 for v in vec)
725+
# convert the const unit from Bohr to Angstrom, because Multiwfn requires the unit as Angstrom
726+
const *= 0.529177210903
688727
for i in range(3):
689728
out += f"{vec[i][0]*const:>15.10f}{vec[i][1]*const:>15.10f}{vec[i][2]*const:>15.10f}\n"
690729
return out
@@ -777,6 +816,33 @@ def read_abacus_input(finput):
777816
kv[m.group(1)] = m.group(2)
778817
return kv
779818

819+
def read_abacus_kpt(fkpt):
820+
"""the way to organize information of KPT file of ABACUS still has some degree of freedom.
821+
However, the only one wanted should have content like the following:
822+
K_POINTS
823+
0
824+
Gamma
825+
1 1 1 0 0 0
826+
827+
in which the "Gamma" can be replaced by "MP" but the number of kpoints should be 1.
828+
In the future the multiple kpoints is planned to be supported in a relatively naive way that
829+
simply combining all MOs at different kpoints together but not for now. The occupation of MO
830+
at different kpoints is already multiplied by the weight of kpoints, is it expected?
831+
832+
This function is not really read kpoints and return something, instead, it is for assert
833+
the number of kpoints is 1.
834+
"""
835+
with open(fkpt, 'r') as file:
836+
lines = file.readlines()
837+
lines = [line.strip() for line in lines]
838+
if lines[0] == "K_POINTS":
839+
if lines[1] == "0":
840+
if lines[2] in ["Gamma", "MP"]:
841+
if lines[3] == "1 1 1 0 0 0":
842+
return
843+
raise ValueError(f"Invalid KPT file {fkpt}. Presently only 1 kpoint calculation \
844+
(implicit or explicit) Gamma-only calculation is supported.")
845+
780846
def CondonShortleyPhase(index):
781847
"""Imposing the Condon-Shortley phase on the MO index.
782848
Molden requires the magnetic quantum number to be arranged like 0, +1, -1, +2, -2, ...
@@ -823,8 +889,18 @@ def indexing_mo(total_gto: GTORadials, labels: list):
823889
i += 2*l+1
824890
return out
825891

826-
def moldengen(folder: str, ndigits=3, ngto=7, fmolden="ABACUS.molden"):
827-
"""generate molden file by reading the outdir of ABACUS, for only LCAO calculation!"""
892+
def moldengen(folder: str, ndigits=3, ngto=7, rel_r=2, fmolden="ABACUS.molden"):
893+
"""Entrance function: generate molden file by reading the outdir of ABACUS, for only LCAO
894+
calculation.
895+
896+
Args:
897+
folder (str): the folder containing the ABACUS input and output files
898+
ndigits (int): the number of digits to be printed for the coefficients
899+
ngto (int): the number of GTOs to be fitted to the numerical atomic orbitals
900+
fmolden (str): the file name of the molden file
901+
902+
Returns:
903+
str: the content of the molden file"""
828904
import os
829905
import numpy as np
830906

@@ -841,8 +917,8 @@ def moldengen(folder: str, ndigits=3, ngto=7, fmolden="ABACUS.molden"):
841917
# write the cell #
842918
####################
843919
kv = read_abacus_input("INPUT")
844-
_temp = kv.get("stru_file", "STRU")
845-
stru = read_abacus_stru(_temp)
920+
_ = read_abacus_kpt(kv.get("kpoint_file", "KPT"))
921+
stru = read_abacus_stru(kv.get("stru_file", "STRU"))
846922
out += write_molden_cell(stru['lat']['const'], stru['lat']['vec'])
847923

848924
####################
@@ -877,7 +953,7 @@ def moldengen(folder: str, ndigits=3, ngto=7, fmolden="ABACUS.molden"):
877953

878954
total_gto = GTORadials()
879955
for forb in forbs:
880-
gto = convert_nao_to_gto(forb, None, ngto)
956+
gto = convert_nao_to_gto(forb, None, ngto, rel_r)
881957
total_gto.NumericalRadials.append(gto.NumericalRadials[0])
882958
total_gto.symbols.append(gto.symbols[0])
883959
out += write_molden_gto(total_gto, labels_kinds_map)
@@ -1153,7 +1229,6 @@ def test_cgto_molden(self):
11531229
print(out)
11541230

11551231
def est_fit_radial_with_gto(self):
1156-
from SIAB.spillage.orbio import read_nao
11571232
import numpy as np
11581233

11591234
# read the numerical atomic orbitals
@@ -1167,7 +1242,7 @@ def est_fit_radial_with_gto(self):
11671242
# the fitted GTOs
11681243
gto = np.zeros_like(rgrid)
11691244
for a_, c_ in zip(a, c):
1170-
gto += GTORadials._build_gto(a_, c_, l, rgrid, False)
1245+
gto += GTORadials._build_gto(a_, c_, l, rgrid)
11711246

11721247
import matplotlib.pyplot as plt
11731248
plt.plot(rgrid, chi, label="NAO")
@@ -1240,24 +1315,39 @@ def _argparse():
12401315
-f, --folder: the folder of the ABACUS calculation, in which the STRU, INPUT, KPT, and OUT* folders are located.
12411316
-n, --ndigits: the number of digits for the MO coefficients. For MO coefficients smaller than 10^-n, they will be set to 0.
12421317
-g, --ngto: the number of GTOs to fit ABACUS NAOs. The default is 7.
1318+
-r, --rel_r: the relative cutoff radius for the GTOs. The default is 2.
12431319
-o, --output: the output Molden file name. The default is ABACUS.molden.
12441320
"""
12451321
import argparse
1246-
parser = argparse.ArgumentParser(description="Generate Molden file from ABACUS LCAO calculation")
1247-
welcome = """Once meet any problem, please submit an issue at:\n
1248-
https://github.com/deepmodeling/abacus-develop/issues\n
1322+
parser = argparse.ArgumentParser(description="Generate Molden file from ABACUS LCAO calculation via NAO2GTO method")
1323+
welcome = """WARNING: use at your own risk because the NAO2GTO will not always conserve the shape of radial function, therefore
1324+
the total number of electrons may not be conserved. Always use after a re-normalization operation.
1325+
Once meet any problem, please submit an issue at: https://github.com/deepmodeling/abacus-develop/issues
12491326
"""
12501327
parser.epilog = welcome
12511328
parser.add_argument("-f", "--folder", type=str, help="the folder of the ABACUS calculation")
12521329
parser.add_argument("-n", "--ndigits", type=int, default=3, help="the number of digits for the MO coefficients")
12531330
parser.add_argument("-g", "--ngto", type=int, default=7, help="the number of GTOs to fit ABACUS NAOs")
1331+
parser.add_argument("-r", "--rel_r", type=int, default=2, help="the relative cutoff radius for the GTOs")
12541332
parser.add_argument("-o", "--output", type=str, default="ABACUS.molden", help="the output Molden file name")
12551333
args = parser.parse_args()
12561334
return args
12571335

12581336
if __name__ == "__main__":
12591337
#unittest.main(exit=False)
12601338
args = _argparse()
1261-
moldengen(args.folder, args.ndigits, args.ngto, args.output)
1262-
print(f"Generated Molden file {args.output} from ABACUS calculation in folder {args.folder}")
1263-
1339+
moldengen(args.folder, args.ndigits, args.ngto, args.rel_r, args.output)
1340+
print(" ".join("*"*10).center(80, " "))
1341+
print(f"""MOLDEN: Generated Molden file {args.output} from ABACUS calculation in folder {args.folder}.
1342+
WARNING: use at your own risk because the NAO2GTO will not always conserve the shape of radial function, therefore
1343+
the total number of electrons may not be conserved. Always use after a re-normalization operation.""")
1344+
citation = """If you use this script in your research, please cite the following paper:\n
1345+
ABACUS:
1346+
Li P, Liu X, Chen M, et al. Large-scale ab initio simulations based on systematically improvable atomic basis[J].
1347+
Computational Materials Science, 2016, 112: 503-517.
1348+
1349+
NAO2GTO method:
1350+
Qin X, Shang H, Xiang H, et al. HONPAS: A linear scaling open-source solution for large system simulations[J].
1351+
International Journal of Quantum Chemistry, 2015, 115(10): 647-655.
1352+
"""
1353+
print(citation, flush=True)

tools/molden/water/INPUT

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
INPUT_PARAMETERS
2+
basis_type lcao
3+
calculation scf
4+
pseudo_dir ../../../tests/PP_ORB
5+
orbital_dir ../../../tests/PP_ORB
6+
nspin 1
7+
scf_thr 1e-7
8+
out_wfc_lcao 1
9+
gamma_only 1
10+
ks_solver genelpa
11+
ecutwfc 100

tools/molden/water/KPT

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
K_POINTS
2+
0
3+
Gamma
4+
1 1 1 0 0 0

tools/molden/water/STRU

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
ATOMIC_SPECIES
2+
H 1.0080 H_ONCV_PBE-1.0.upf upf201
3+
O 15.9994 O_ONCV_PBE-1.0.upf upf201
4+
5+
NUMERICAL_ORBITAL
6+
H_gga_8au_60Ry_2s1p.orb
7+
O_gga_7au_60Ry_2s2p1d.orb
8+
9+
LATTICE_CONSTANT
10+
1.0000000000
11+
12+
LATTICE_VECTORS
13+
28.0000000000 0.0000000000 0.0000000000
14+
0.0000000000 28.0000000000 0.0000000000
15+
0.0000000000 0.0000000000 28.0000000000
16+
17+
ATOMIC_POSITIONS
18+
Direct
19+
20+
H #label
21+
0.0000 #magnetism
22+
2 #number of atoms
23+
0.4274183435 0.6688147170 0.2972451590 m 1 1 1
24+
0.5087250804 0.7377638392 0.2679886612 m 1 1 1
25+
26+
O #label
27+
0.0000 #magnetism
28+
1 #number of atoms
29+
0.4839438965 0.7028954209 0.3263932637 m 1 1 1

0 commit comments

Comments
 (0)