55import org .apache .commons .lang3 .StringUtils ;
66import org .apache .commons .text .similarity .JaccardSimilarity ;
77import org .apache .commons .text .similarity .JaroWinklerSimilarity ;
8+ import org .apache .commons .text .similarity .LevenshteinDistance ;
89import org .apache .commons .text .similarity .SimilarityScore ;
910import org .apache .logging .log4j .LogManager ;
1011import org .apache .logging .log4j .Logger ;
@@ -29,6 +30,8 @@ public final class LinkerProbabilistic {
2930 static final JaroSimilarity JARO_SIMILARITY = new JaroSimilarity ();
3031 static final ExactSimilarity EXACT_SIMILARITY = new ExactSimilarity ();
3132 static final SoundexSimilarity SOUNDEX_SIMILARITY = new SoundexSimilarity ();
33+ static final LevenshteinSimilarity LEVENSHTEIN_SIMILARITY = new LevenshteinSimilarity ();
34+ static final LevenshteinSimilarityPercentage LEVENSHTEIN_SIMILARITY_PERCENTAGE = new LevenshteinSimilarityPercentage ();
3235 private static final int METRIC_MIN = 0 ;
3336 private static final int METRIC_MAX = 1 ;
3437 private static final int METRIC_SCORE = 2 ;
@@ -79,7 +82,9 @@ public enum SimilarityFunctionName {
7982 JARO_SIMILARITY ,
8083 JACCARD_SIMILARITY ,
8184 SOUNDEX_SIMILARITY ,
82- EXACT_SIMILARITY
85+ EXACT_SIMILARITY ,
86+ LEVENSHTEIN_SIMILARITY ,
87+ LEVENSHTEIN_SIMILARITY_PERCENTAGE
8388 }
8489
8590 static SimilarityScore <Double > getSimilarityFunction (final SimilarityFunctionName func ) {
@@ -92,6 +97,10 @@ static SimilarityScore<Double> getSimilarityFunction(final SimilarityFunctionNam
9297 return JACCARD_SIMILARITY ;
9398 case SOUNDEX_SIMILARITY :
9499 return SOUNDEX_SIMILARITY ;
100+ case LEVENSHTEIN_SIMILARITY :
101+ return LEVENSHTEIN_SIMILARITY ;
102+ case LEVENSHTEIN_SIMILARITY_PERCENTAGE :
103+ return LEVENSHTEIN_SIMILARITY_PERCENTAGE ;
95104 default :
96105 return EXACT_SIMILARITY ;
97106 }
@@ -312,6 +321,47 @@ public Double apply(
312321
313322 }
314323
324+ static class LevenshteinSimilarityPercentage implements SimilarityScore <Double > {
325+
326+ private final LevenshteinDistance levenshteinDistance = new LevenshteinDistance ();
327+
328+ @ Override
329+ public Double apply (
330+ final CharSequence left ,
331+ final CharSequence right ) {
332+ if (StringUtils .isEmpty (left ) || StringUtils .isEmpty (right )) {
333+ return 0.5 ;
334+ }
335+
336+ int maxLength = Math .max (left .length (), right .length ());
337+ double levenshteinDistanceValue = levenshteinDistance .apply (left , right );
338+
339+ // Invert the percentage value
340+ double percentage = (levenshteinDistanceValue / maxLength ) * 100 ;
341+ double invertedPercentage = 100 - percentage ;
342+
343+ return invertedPercentage ;
344+ }
345+
346+ }
347+
348+ static class LevenshteinSimilarity implements SimilarityScore <Double > {
349+
350+ private final LevenshteinDistance levenshteinDistance = new LevenshteinDistance ();
351+
352+ @ Override
353+ public Double apply (
354+ final CharSequence left ,
355+ final CharSequence right ) {
356+ if (StringUtils .isEmpty (left ) || StringUtils .isEmpty (right )) {
357+ return 0.5 ;
358+ }
359+
360+ return Double .valueOf (levenshteinDistance .apply (left , right ));
361+ }
362+
363+ }
364+
315365 static class JaroSimilarity implements SimilarityScore <Double > {
316366
317367 @ Override
0 commit comments