File tree Expand file tree Collapse file tree 2 files changed +3
-21
lines changed Expand file tree Collapse file tree 2 files changed +3
-21
lines changed Original file line number Diff line number Diff line change @@ -2,4 +2,4 @@ amazon-textract-caller>=0.2.4,<1
2
2
Pillow
3
3
tabulate >= 0.9 ,< 0.10
4
4
XlsxWriter >= 3.0 ,< 4
5
- editdistance >= 0.6.2 , < 0.9
5
+ rapidfuzz >= 3.9.6
Original file line number Diff line number Diff line change 7
7
# The latter has numpy as dependency.
8
8
pass
9
9
10
- import math
11
- import editdistance
10
+ from rapidfuzz .distance import Levenshtein
12
11
from textractor .data .constants import SimilarityMetric
13
12
from textractor .exceptions import MissingDependencyException
14
13
@@ -59,7 +58,7 @@ def get_word_similarity(
59
58
cls .util = util
60
59
61
60
if similarity_metric == SimilarityMetric .LEVENSHTEIN :
62
- return normalized_edit_distance (word_1 .lower (), word_2 .lower ())
61
+ return Levenshtein . normalized_similarity (word_1 .lower (), word_2 .lower ())
63
62
elif similarity_metric == SimilarityMetric .EUCLIDEAN :
64
63
ref_word_emb = cls .model .encode ([word_1 ])
65
64
word_emb = cls .model .encode ([word_2 ])
@@ -110,20 +109,3 @@ def get_metadata_attr_name(cell_atr):
110
109
return cell_map [cell_atr ]
111
110
except :
112
111
return ""
113
-
114
-
115
- def normalized_edit_distance (s1 : str , s2 : str ):
116
- """
117
- Returns the normalized edit distance
118
-
119
- :param s1: First string
120
- :type s1: str
121
- :param s2: Second string
122
- :type s2: str
123
- """
124
-
125
- dist = editdistance .eval (s1 , s2 )
126
- max_length = max (len (s1 ), len (s2 ))
127
- if max_length - dist == 0 :
128
- return 0.0
129
- return (max_length - dist ) / max_length
You can’t perform that action at this time.
0 commit comments