Skip to content

Commit 41e4c69

Browse files
authored
Explicitly use int64 in Numpy/cython code to avoid OS inconsistency (materialsproject#3992)
* use explicit int64 to avoid OS inconsistency * pin np <1 to run tests * add missing replacement in cython code * more missing spglib api replacement * globally replace dtype=int with int64 * pre-commit auto-fixes * use int64 in coord_cython * fix assert dtype * NEED confirmation: skip chgnet tests * docstrring tweaks of linear assignment * enough docstring tweaks, go back to real business * last batch of docstring tweak and remove isort tag as it doesn't seem to be used * revert to NP2 in dependency * try to add a np1 test for windows * try another way to install np1 * fix opt dep name * add comment in NP1 install
1 parent 6f280a9 commit 41e4c69

25 files changed

+114
-116
lines changed

.github/workflows/test.yml

+4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ jobs:
3434
python: "3.10"
3535
resolution: highest
3636
extras: ci,optional
37+
- os: windows-latest
38+
python: "3.10"
39+
resolution: highest
40+
extras: ci,optional,numpy1 # Test NP1 on Windows (quite buggy ATM)
3741
- os: ubuntu-latest
3842
python: ">3.10"
3943
resolution: lowest-direct

pyproject.toml

+4-5
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,6 @@ dependencies = [
5959
"matplotlib>=3.8",
6060
"monty>=2024.7.29",
6161
"networkx>=2.2",
62-
# NumPy documentation suggests pinning the current major version as the C API is used
63-
# https://numpy.org/devdocs/dev/depending_on_numpy.html#runtime-dependency-version-ranges
6462
"palettable>=3.3.3",
6563
"pandas>=2",
6664
"plotly>=4.5.0",
@@ -73,7 +71,9 @@ dependencies = [
7371
"tabulate>=0.9",
7472
"tqdm>=4.60",
7573
"uncertainties>=3.1.4",
76-
'numpy>=1.25.0,<3.0',
74+
# NumPy documentation suggests pinning the current major version as the C API is used
75+
# https://numpy.org/devdocs/dev/depending_on_numpy.html#runtime-dependency-version-ranges
76+
"numpy>=1.25.0,<3",
7777
]
7878
version = "2024.8.9"
7979

@@ -115,6 +115,7 @@ optional = [
115115
# "openbabel>=3.1.1; platform_system=='Linux'",
116116
]
117117
numba = ["numba>=0.55"]
118+
numpy1 = ["numpy>=1.25.0,<2"] # Test NP1 on Windows (quite buggy ATM)
118119

119120
[project.scripts]
120121
pmg = "pymatgen.cli.pmg:main"
@@ -266,9 +267,7 @@ exclude_also = [
266267
"@np.deprecate",
267268
"def __repr__",
268269
"except ImportError:",
269-
"if 0:",
270270
"if TYPE_CHECKING:",
271-
"if __name__ == .__main__.:",
272271
"if self.debug:",
273272
"if settings.DEBUG",
274273
"if typing.TYPE_CHECKING:",

src/pymatgen/analysis/chemenv/coordination_environments/coordination_geometry_finder.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1728,7 +1728,7 @@ def coordination_geometry_symmetry_measures_separation_plane_optim(
17281728
continue
17291729
if sep not in nb_set.separations:
17301730
nb_set.separations[sep] = {}
1731-
_sep = [np.array(ss, dtype=int) for ss in separation]
1731+
_sep = [np.array(ss, dtype=np.int64) for ss in separation]
17321732
nb_set.separations[sep][separation] = (plane, _sep)
17331733
if sep == separation_plane_algo.separation:
17341734
new_seps.append(_sep)

src/pymatgen/analysis/chemenv/utils/graph_utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ def get_delta(node1, node2, edge_data):
2525
edge_data:
2626
"""
2727
if node1.isite == edge_data["start"] and node2.isite == edge_data["end"]:
28-
return np.array(edge_data["delta"], dtype=int)
28+
return np.array(edge_data["delta"], dtype=np.int64)
2929
if node2.isite == edge_data["start"] and node1.isite == edge_data["end"]:
30-
return -np.array(edge_data["delta"], dtype=int)
30+
return -np.array(edge_data["delta"], dtype=np.int64)
3131
raise ValueError("Trying to find a delta between two nodes with an edge that seems not to link these nodes.")
3232

3333

src/pymatgen/analysis/local_env.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -822,7 +822,7 @@ def get_all_voronoi_polyhedra(self, structure: Structure):
822822
indices.extend([(x[2],) + x[3] for x in neighs])
823823

824824
# Get the non-duplicates (using the site indices for numerical stability)
825-
indices = np.array(indices, dtype=int)
825+
indices = np.array(indices, dtype=np.int64)
826826
indices, uniq_inds = np.unique(indices, return_index=True, axis=0) # type: ignore[assignment]
827827
sites = [sites[idx] for idx in uniq_inds]
828828

src/pymatgen/analysis/molecule_matcher.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1042,7 +1042,7 @@ def permutations(p_atoms, p_centroid, p_weights, q_atoms, q_centroid, q_weights)
10421042
p_centroid_test = np.dot(p_centroid, U)
10431043

10441044
# generate full view from q shape to fill in atom view on the fly
1045-
perm_inds = np.zeros(len(p_atoms), dtype=int)
1045+
perm_inds = np.zeros(len(p_atoms), dtype=np.int64)
10461046

10471047
# Find unique atoms
10481048
species = np.unique(p_atoms)
@@ -1067,7 +1067,7 @@ def permutations(p_atoms, p_centroid, p_weights, q_atoms, q_centroid, q_weights)
10671067
p_centroid_test = np.dot(p_centroid, U)
10681068

10691069
# generate full view from q shape to fill in atom view on the fly
1070-
perm_inds = np.zeros(len(p_atoms), dtype=int)
1070+
perm_inds = np.zeros(len(p_atoms), dtype=np.int64)
10711071

10721072
# Find unique atoms
10731073
species = np.unique(p_atoms)

src/pymatgen/analysis/structure_matcher.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,7 @@ def _get_mask(self, struct1, struct2, fu, s1_supercell):
559559
if s1_supercell:
560560
# remove the symmetrically equivalent s1 indices
561561
inds = inds[::fu]
562-
return np.array(mask, dtype=int), inds, idx
562+
return np.array(mask, dtype=np.int64), inds, idx
563563

564564
def fit(
565565
self, struct1: Structure, struct2: Structure, symmetric: bool = False, skip_structure_reduction: bool = False

src/pymatgen/analysis/topological/spillage.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def orth(A):
4242
n_rows, n_cols = A.shape
4343
eps = np.finfo(float).eps
4444
tol = max(n_rows, n_cols) * np.amax(s) * eps
45-
num = np.sum(s > tol, dtype=int)
45+
num = np.sum(s > tol, dtype=np.int64)
4646
orthonormal_basis = u[:, :num]
4747
return orthonormal_basis, num
4848

src/pymatgen/core/interface.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1269,7 +1269,7 @@ def get_trans_mat(
12691269
r_matrix = np.dot(np.dot(np.linalg.inv(trans_cry.T), r_matrix), trans_cry.T)
12701270
# Set one vector of the basis to the rotation axis direction, and
12711271
# obtain the corresponding transform matrix
1272-
eye = np.eye(3, dtype=int)
1272+
eye = np.eye(3, dtype=np.int64)
12731273
hh = kk = ll = None
12741274
for hh in range(3):
12751275
if abs(r_axis[hh]) != 0:
@@ -2107,7 +2107,7 @@ def slab_from_csl(
21072107
# Quickly generate a supercell, normal is not working in this way
21082108
if quick_gen:
21092109
scale_factor = []
2110-
eye = np.eye(3, dtype=int)
2110+
eye = np.eye(3, dtype=np.int64)
21112111
for i, j in enumerate(miller):
21122112
if j == 0:
21132113
scale_factor.append(eye[i])

src/pymatgen/core/lattice.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -961,7 +961,7 @@ def get_angles(v1, v2, l1, l2):
961961
for idx, all_j in enumerate(gamma_b):
962962
inds = np.logical_and(all_j[:, None], np.logical_and(alpha_b, beta_b[idx][None, :]))
963963
for j, k in np.argwhere(inds):
964-
scale_m = np.array((f_a[idx], f_b[j], f_c[k]), dtype=int) # type: ignore[index]
964+
scale_m = np.array((f_a[idx], f_b[j], f_c[k]), dtype=np.int64) # type: ignore[index]
965965
if abs(np.linalg.det(scale_m)) < 1e-8:
966966
continue
967967

@@ -1381,7 +1381,7 @@ def get_points_in_sphere(
13811381
frac_points = np.ascontiguousarray(frac_points, dtype=float)
13821382
latt_matrix = np.ascontiguousarray(self.matrix, dtype=float)
13831383
cart_coords = np.ascontiguousarray(self.get_cartesian_coords(frac_points), dtype=float)
1384-
pbc = np.ascontiguousarray(self.pbc, dtype=int)
1384+
pbc = np.ascontiguousarray(self.pbc, dtype=np.int64)
13851385
center_coords = np.ascontiguousarray([center], dtype=float)
13861386

13871387
_, indices, images, distances = find_points_in_spheres(
@@ -1510,12 +1510,12 @@ def get_points_in_sphere_old(
15101510
# Generate all possible images that could be within `r` of `center`
15111511
mins = np.floor(pcoords - nmax)
15121512
maxes = np.ceil(pcoords + nmax)
1513-
arange = np.arange(start=mins[0], stop=maxes[0], dtype=int)
1514-
brange = np.arange(start=mins[1], stop=maxes[1], dtype=int)
1515-
crange = np.arange(start=mins[2], stop=maxes[2], dtype=int)
1516-
arange = arange[:, None] * np.array([1, 0, 0], dtype=int)[None, :]
1517-
brange = brange[:, None] * np.array([0, 1, 0], dtype=int)[None, :]
1518-
crange = crange[:, None] * np.array([0, 0, 1], dtype=int)[None, :]
1513+
arange = np.arange(start=mins[0], stop=maxes[0], dtype=np.int64)
1514+
brange = np.arange(start=mins[1], stop=maxes[1], dtype=np.int64)
1515+
crange = np.arange(start=mins[2], stop=maxes[2], dtype=np.int64)
1516+
arange = arange[:, None] * np.array([1, 0, 0], dtype=np.int64)[None, :]
1517+
brange = brange[:, None] * np.array([0, 1, 0], dtype=np.int64)[None, :]
1518+
crange = crange[:, None] * np.array([0, 0, 1], dtype=np.int64)[None, :]
15191519
images = arange[:, None, None] + brange[None, :, None] + crange[None, None, :]
15201520

15211521
# Generate the coordinates of all atoms within these images
@@ -1629,7 +1629,7 @@ def get_distance_and_image(
16291629
if jimage is None:
16301630
v, d2 = pbc_shortest_vectors(self, frac_coords1, frac_coords2, return_d2=True)
16311631
fc = self.get_fractional_coords(v[0][0]) + frac_coords1 - frac_coords2
1632-
fc = np.array(np.round(fc), dtype=int)
1632+
fc = np.array(np.round(fc), dtype=np.int64)
16331633
return np.sqrt(d2[0, 0]), fc
16341634

16351635
jimage = np.array(jimage)
@@ -1866,7 +1866,7 @@ def get_points_in_spheres(
18661866
neighbors: list[list[tuple[np.ndarray, float, int, np.ndarray]]] = []
18671867

18681868
for ii, jj in zip(center_coords, site_neighbors, strict=True):
1869-
l1 = np.array(_three_to_one(jj, ny, nz), dtype=int).ravel()
1869+
l1 = np.array(_three_to_one(jj, ny, nz), dtype=np.int64).ravel()
18701870
# Use the cube index map to find the all the neighboring
18711871
# coords, images, and indices
18721872
ks = [k for k in l1 if k in cube_to_coords]
@@ -1905,7 +1905,7 @@ def _compute_cube_index(
19051905
Returns:
19061906
np.ndarray: nx3 array int indices
19071907
"""
1908-
return np.array(np.floor((coords - global_min) / radius), dtype=int)
1908+
return np.array(np.floor((coords - global_min) / radius), dtype=np.int64)
19091909

19101910

19111911
def _one_to_three(label1d: np.ndarray, ny: int, nz: int) -> np.ndarray:
@@ -1943,7 +1943,7 @@ def find_neighbors(label: np.ndarray, nx: int, ny: int, nz: int) -> list[np.ndar
19431943
Neighbor cell indices.
19441944
"""
19451945
array = [[-1, 0, 1]] * 3
1946-
neighbor_vectors = np.array(list(itertools.product(*array)), dtype=int)
1946+
neighbor_vectors = np.array(list(itertools.product(*array)), dtype=np.int64)
19471947
label3d = _one_to_three(label, ny, nz) if np.shape(label)[1] == 1 else label
19481948
all_labels = label3d[:, None, :] - neighbor_vectors[None, :, :]
19491949
filtered_labels = []

src/pymatgen/core/structure.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1778,7 +1778,7 @@ def get_neighbor_list(
17781778
site_coords = np.ascontiguousarray([site.coords for site in sites], dtype=float)
17791779
cart_coords = np.ascontiguousarray(self.cart_coords, dtype=float)
17801780
lattice_matrix = np.ascontiguousarray(self.lattice.matrix, dtype=float)
1781-
pbc = np.ascontiguousarray(self.pbc, dtype=int)
1781+
pbc = np.ascontiguousarray(self.pbc, dtype=np.int64)
17821782
center_indices, points_indices, images, distances = find_points_in_spheres(
17831783
cart_coords,
17841784
site_coords,

src/pymatgen/core/surface.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -949,9 +949,9 @@ def add_site_types() -> None:
949949
or "bulk_equivalent" not in initial_structure.site_properties
950950
):
951951
spg_analyzer = SpacegroupAnalyzer(initial_structure)
952-
initial_structure.add_site_property("bulk_wyckoff", spg_analyzer.get_symmetry_dataset()["wyckoffs"])
952+
initial_structure.add_site_property("bulk_wyckoff", spg_analyzer.get_symmetry_dataset().wyckoffs)
953953
initial_structure.add_site_property(
954-
"bulk_equivalent", spg_analyzer.get_symmetry_dataset()["equivalent_atoms"].tolist()
954+
"bulk_equivalent", spg_analyzer.get_symmetry_dataset().equivalent_atoms.tolist()
955955
)
956956

957957
def calculate_surface_normal() -> np.ndarray:
@@ -971,7 +971,7 @@ def calculate_scaling_factor() -> np.ndarray:
971971
"""
972972
slab_scale_factor = []
973973
non_orth_ind = []
974-
eye = np.eye(3, dtype=int)
974+
eye = np.eye(3, dtype=np.int64)
975975
for idx, miller_idx in enumerate(miller_index):
976976
if miller_idx == 0:
977977
# If lattice vector is perpendicular to surface normal, i.e.,

src/pymatgen/electronic_structure/boltztrap.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ def write_struct(self, output_file) -> None:
325325
+ "\n"
326326
)
327327

328-
ops = [np.eye(3)] if self._symprec is None else sym.get_symmetry_dataset()["rotations"] # type: ignore[reportPossiblyUnboundVariable]
328+
ops = [np.eye(3)] if self._symprec is None else sym.get_symmetry_dataset().rotations # type: ignore[reportPossiblyUnboundVariable]
329329

330330
file.write(f"{len(ops)}\n")
331331

src/pymatgen/io/abinit/abiobjects.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def structure_from_abivars(cls=None, *args, **kwargs) -> Structure:
150150
raise ValueError(f"{len(typat)=} must equal {len(coords)=}")
151151

152152
# Note conversion to int and Fortran --> C indexing
153-
typat = np.array(typat, dtype=int)
153+
typat = np.array(typat, dtype=np.int64)
154154
species = [znucl_type[typ - 1] for typ in typat]
155155

156156
return cls(

src/pymatgen/io/lmto.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def as_dict(self):
108108
# The following is to find the classes (atoms that are not symmetry equivalent,
109109
# and create labels. Note that LMTO only attaches numbers with the second atom
110110
# of the same species, e.g. "Bi", "Bi1", "Bi2", etc.
111-
eq_atoms = sga.get_symmetry_dataset()["equivalent_atoms"]
111+
eq_atoms = sga.get_symmetry_dataset().equivalent_atoms
112112
ineq_sites_index = list(set(eq_atoms))
113113
sites = []
114114
classes = []

src/pymatgen/optimization/linear_assignment.pyx

+20-27
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
# cython: language_level=3
22
"""
3-
This module contains an algorithm to solve the Linear Assignment Problem
3+
This module contains the LAPJV algorithm to solve the Linear Assignment Problem.
44
"""
55

6-
# isort: dont-add-imports
7-
86
import numpy as np
97

108
cimport cython
@@ -38,26 +36,21 @@ class LinearAssignment:
3836
rectangular
3937
epsilon: Tolerance for determining if solution vector is < 0
4038
41-
.. attribute: min_cost:
42-
43-
The minimum cost of the matching
44-
45-
.. attribute: solution:
46-
47-
The matching of the rows to columns. i.e solution = [1, 2, 0]
48-
would match row 0 to column 1, row 1 to column 2 and row 2
49-
to column 0. Total cost would be c[0, 1] + c[1, 2] + c[2, 0]
39+
Attributes:
40+
min_cost: The minimum cost of the matching.
41+
solution: The matching of the rows to columns. i.e solution = [1, 2, 0]
42+
would match row 0 to column 1, row 1 to column 2 and row 2
43+
to column 0. Total cost would be c[0, 1] + c[1, 2] + c[2, 0].
5044
"""
5145

52-
def __init__(self, costs, epsilon=1e-13):
53-
# https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword
46+
def __init__(self, costs: np.ndarray, epsilon: float=1e-13) -> None:
5447
self.orig_c = np.asarray(costs, dtype=np.float64, order="C")
5548
self.nx, self.ny = self.orig_c.shape
5649
self.n = self.ny
5750

5851
self.epsilon = fabs(epsilon)
5952

60-
# check that cost matrix is square
53+
# Check that cost matrix is square
6154
if self.nx > self.ny:
6255
raise ValueError("cost matrix must have at least as many columns as rows")
6356

@@ -67,9 +60,9 @@ class LinearAssignment:
6760
self.c = np.zeros((self.n, self.n), dtype=np.float64)
6861
self.c[:self.nx] = self.orig_c
6962

70-
# initialize solution vectors
71-
self._x = np.empty(self.n, dtype=int)
72-
self._y = np.empty(self.n, dtype=int)
63+
# Initialize solution vectors
64+
self._x = np.empty(self.n, dtype=np.int64)
65+
self._y = np.empty(self.n, dtype=np.int64)
7366

7467
self.min_cost = compute(self.n, self.c, self._x, self._y, self.epsilon)
7568
self.solution = self._x[:self.nx]
@@ -79,7 +72,7 @@ class LinearAssignment:
7972
@cython.wraparound(False)
8073
cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_t[:] y, np.float_t eps) nogil:
8174

82-
# augment
75+
# Augment
8376
cdef int i, j, k, i1, j1, f, f0, cnt, low, up, z, last, nrr
8477
cdef int n = size
8578
cdef bint b
@@ -93,7 +86,7 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
9386
for i in range(n):
9487
x[i] = -1
9588

96-
# column reduction
89+
# Column reduction
9790
for j from n > j >= 0:
9891
col[j] = j
9992
h = c[0, j]
@@ -107,12 +100,12 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
107100
x[i1] = j
108101
y[j] = i1
109102
else:
110-
# in the paper its x[i], but likely a typo
103+
# NOTE: in the paper it's x[i], but likely a typo
111104
if x[i1] > -1:
112105
x[i1] = -2 - x[i1]
113106
y[j] = -1
114107

115-
# reduction transfer
108+
# Reduction transfer
116109
f = -1
117110
for i in range(n):
118111
if x[i] == -1:
@@ -129,12 +122,12 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
129122
m = c[i, j] - v[j]
130123
v[j1] = v[j1] - m
131124

132-
# augmenting row reduction
125+
# Augmenting row reduction
133126
for cnt in range(2):
134127
k = 0
135128
f0 = f
136129
f = -1
137-
# this step isn't strictly necessary, and
130+
# This step isn't strictly necessary, and
138131
# time is proportional to 1/eps in the worst case,
139132
# so break early by keeping track of nrr
140133
nrr = 0
@@ -172,7 +165,7 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
172165
x[i] = j1
173166
y[j1] = i
174167

175-
# augmentation
168+
# Augmentation
176169
f0 = f
177170
for f in range(f0 + 1):
178171
i1 = fre[f]
@@ -182,7 +175,7 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
182175
d[j] = c[i1, j] - v[j]
183176
pred[j] = i1
184177
while True:
185-
# the pascal code ends when a single augmentation is found
178+
# The pascal code ends when a single augmentation is found
186179
# really we need to get back to the for f in range(f0+1) loop
187180
b = False
188181
if up == low:
@@ -231,7 +224,7 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
231224
pred[j] = i
232225
if fabs(h - m) < eps:
233226
if y[j] == -1:
234-
# augment
227+
# Augment
235228
for k in range(last+1):
236229
j1 = col[k]
237230
v[j1] = v[j1] + d[j1] - m

0 commit comments

Comments
 (0)