Skip to content

Commit 8a0817f

Browse files
committed
more numpy 2.0 readiness
1 parent 139291b commit 8a0817f

File tree

3 files changed

+11
-11
lines changed

3 files changed

+11
-11
lines changed

dedupe/clustering.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def union_find(scored_pairs: Scores) -> numpy.typing.NDArray[numpy.int_]:
177177

178178
def condensedDistance(
179179
dupes: Scores,
180-
) -> tuple[dict[int, RecordID], numpy.typing.NDArray[numpy.float_], int]:
180+
) -> tuple[dict[int, RecordID], numpy.typing.NDArray[numpy.float64], int]:
181181
"""
182182
Convert the pairwise list of distances in dupes to "condensed
183183
distance matrix" required by the hierarchical clustering
@@ -262,16 +262,16 @@ def cluster(
262262

263263
def confidences(
264264
cluster: Sequence[int],
265-
squared_distances: numpy.typing.NDArray[numpy.float_],
265+
squared_distances: numpy.typing.NDArray[numpy.float64],
266266
d: int,
267-
) -> numpy.typing.NDArray[numpy.float_]:
267+
) -> numpy.typing.NDArray[numpy.float64]:
268268
"""
269269
We calculate a per record score that is similar to a standard
270270
deviation. The main reason is that these record scores can be
271271
used to calculate the standard deviation of an entire cluster,
272272
which is a reasonable metric for clusters.
273273
"""
274-
scores: numpy.typing.NDArray[numpy.float_]
274+
scores: numpy.typing.NDArray[numpy.float64]
275275
scores_d = dict.fromkeys(cluster, 0.0)
276276
C = 2 * d - 3
277277
for i, j in itertools.combinations(cluster, 2):

dedupe/datamodel.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def predicates(self) -> set[Predicate]:
7979

8080
def distances(
8181
self, record_pairs: Sequence[RecordDictPair]
82-
) -> numpy.typing.NDArray[numpy.float_]:
82+
) -> numpy.typing.NDArray[numpy.float64]:
8383
num_records = len(record_pairs)
8484

8585
distances = numpy.empty((num_records, len(self)), "f4")
@@ -98,8 +98,8 @@ def distances(
9898
return distances
9999

100100
def _add_derived_distances(
101-
self, distances: numpy.typing.NDArray[numpy.float_]
102-
) -> numpy.typing.NDArray[numpy.float_]:
101+
self, distances: numpy.typing.NDArray[numpy.float64]
102+
) -> numpy.typing.NDArray[numpy.float64]:
103103
current_column = self._derived_start
104104

105105
for indices in self._interaction_indices:

dedupe/labeler.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def fit(self, pairs: TrainingExamples, y: LabelsLike) -> None:
5656
"""Train on the given data."""
5757

5858
@abstractmethod
59-
def candidate_scores(self) -> numpy.typing.NDArray[numpy.float_]:
59+
def candidate_scores(self) -> numpy.typing.NDArray[numpy.float64]:
6060
"""For each of self.candidates, return our current guess [0,1] of if a match."""
6161

6262
@abstractmethod
@@ -92,7 +92,7 @@ def remove(self, index: int) -> None:
9292
self._candidates.pop(index)
9393
self._features = numpy.delete(self._features, index, axis=0)
9494

95-
def candidate_scores(self) -> numpy.typing.NDArray[numpy.float_]:
95+
def candidate_scores(self) -> numpy.typing.NDArray[numpy.float64]:
9696
if not self._fitted:
9797
raise ValueError("Must call fit() before candidate_scores()")
9898
return self._classifier.predict_proba(self._features)[:, 1].reshape(-1, 1)
@@ -103,7 +103,7 @@ class BlockLearner(Learner):
103103

104104
def __init__(self):
105105
self.current_predicates: tuple[Predicate, ...] = ()
106-
self._cached_scores: numpy.typing.NDArray[numpy.float_] | None = None
106+
self._cached_scores: numpy.typing.NDArray[numpy.float64] | None = None
107107
self._old_dupes: TrainingExamples = []
108108

109109
def fit(self, pairs: TrainingExamples, y: LabelsLike) -> None:
@@ -121,7 +121,7 @@ def fit(self, pairs: TrainingExamples, y: LabelsLike) -> None:
121121
self._old_dupes = dupes
122122
self._fitted = True
123123

124-
def candidate_scores(self) -> numpy.typing.NDArray[numpy.float_]:
124+
def candidate_scores(self) -> numpy.typing.NDArray[numpy.float64]:
125125
if not self._fitted:
126126
raise ValueError("Must call fit() before candidate_scores()")
127127
if self._cached_scores is None:

0 commit comments

Comments
 (0)