Skip to content

Commit 5ff14e4

Browse files
committed
fuzz token sort ratio
1 parent 0201df3 commit 5ff14e4

File tree

5 files changed

+77
-1
lines changed

5 files changed

+77
-1
lines changed

Fuzzywuzzy_swift.xcodeproj/project.pbxproj

+6
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
3078FD661D7495D20028F477 /* String_Fuzzywuzzy.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3078FD651D7495D20028F477 /* String_Fuzzywuzzy.swift */; };
1515
3078FD681D74AF5D0028F477 /* StringMatcher.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3078FD671D74AF5D0028F477 /* StringMatcher.swift */; };
1616
3078FD6A1D74B3FE0028F477 /* CommonSubstrings.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3078FD691D74B3FE0028F477 /* CommonSubstrings.swift */; };
17+
308B74501D75EFE800FC5C2D /* StringProcessor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 308B744F1D75EFE800FC5C2D /* StringProcessor.swift */; };
1718
/* End PBXBuildFile section */
1819

1920
/* Begin PBXContainerItemProxy section */
@@ -37,6 +38,7 @@
3738
3078FD651D7495D20028F477 /* String_Fuzzywuzzy.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = String_Fuzzywuzzy.swift; sourceTree = "<group>"; };
3839
3078FD671D74AF5D0028F477 /* StringMatcher.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = StringMatcher.swift; sourceTree = "<group>"; };
3940
3078FD691D74B3FE0028F477 /* CommonSubstrings.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CommonSubstrings.swift; sourceTree = "<group>"; };
41+
308B744F1D75EFE800FC5C2D /* StringProcessor.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = StringProcessor.swift; sourceTree = "<group>"; };
4042
/* End PBXFileReference section */
4143

4244
/* Begin PBXFrameworksBuildPhase section */
@@ -85,6 +87,7 @@
8587
3078FD651D7495D20028F477 /* String_Fuzzywuzzy.swift */,
8688
3078FD671D74AF5D0028F477 /* StringMatcher.swift */,
8789
3078FD691D74B3FE0028F477 /* CommonSubstrings.swift */,
90+
308B744F1D75EFE800FC5C2D /* StringProcessor.swift */,
8891
);
8992
path = Fuzzywuzzy_swift;
9093
sourceTree = "<group>";
@@ -207,6 +210,7 @@
207210
buildActionMask = 2147483647;
208211
files = (
209212
3078FD641D7495B80028F477 /* LevenshteinDistance.swift in Sources */,
213+
308B74501D75EFE800FC5C2D /* StringProcessor.swift in Sources */,
210214
3078FD661D7495D20028F477 /* String_Fuzzywuzzy.swift in Sources */,
211215
3078FD6A1D74B3FE0028F477 /* CommonSubstrings.swift in Sources */,
212216
3078FD681D74AF5D0028F477 /* StringMatcher.swift in Sources */,
@@ -397,6 +401,7 @@
397401
3078FD5F1D74959A0028F477 /* Release */,
398402
);
399403
defaultConfigurationIsVisible = 0;
404+
defaultConfigurationName = Release;
400405
};
401406
3078FD601D74959A0028F477 /* Build configuration list for PBXNativeTarget "Fuzzywuzzy_swiftTests" */ = {
402407
isa = XCConfigurationList;
@@ -405,6 +410,7 @@
405410
3078FD621D74959A0028F477 /* Release */,
406411
);
407412
defaultConfigurationIsVisible = 0;
413+
defaultConfigurationName = Release;
408414
};
409415
/* End XCConfigurationList section */
410416
};
+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
//
2+
// StringProcessor.swift
3+
// Fuzzywuzzy_swift
4+
//
5+
// Created by XianLi on 31/8/2016.
6+
// Copyright © 2016 LiXian. All rights reserved.
7+
//
8+
9+
import UIKit
10+
11+
class StringProcessor: NSObject {
12+
/// Process string by
13+
/// removing all but letters and numbers
14+
/// trim whitespace
15+
/// force to lower case
16+
class func process(str: String) -> String {
17+
/// lower case
18+
var str = str.lowercaseString
19+
20+
/// replace other charcters in to white space
21+
let regex = try! NSRegularExpression(pattern: "\\W+",
22+
options: NSRegularExpressionOptions.CaseInsensitive)
23+
let range = NSMakeRange(0, str.characters.count)
24+
str = regex.stringByReplacingMatchesInString(str,
25+
options: [],
26+
range: range,
27+
withTemplate: " ")
28+
str = str.stringByTrimmingCharactersInSet(NSCharacterSet.init(charactersInString: " "))
29+
30+
return str
31+
}
32+
}

Fuzzywuzzy_swift/String_Fuzzywuzzy.swift

+28
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,32 @@ public extension String {
5353
}
5454
return Int((scores.maxElement() ?? 0) * 100)
5555
}
56+
57+
static private func _fuzzProcessAndSort(str: String, fullProcess: Bool = true) -> String {
58+
var str = str
59+
if fullProcess {
60+
str = StringProcessor.process(str)
61+
}
62+
let tokens = Array(str.componentsSeparatedByString(" "))
63+
return tokens.sort().joinWithSeparator(" ").stringByTrimmingCharactersInSet(NSCharacterSet.init(charactersInString: ""))
64+
}
65+
66+
static private func _fuzzTokenSort(str1 str1: String, str2: String, partial: Bool = true, fullProcess: Bool = true) -> Int {
67+
let sorted1 = _fuzzProcessAndSort(str1, fullProcess: fullProcess)
68+
let sorted2 = _fuzzProcessAndSort(str2, fullProcess: fullProcess)
69+
70+
if partial {
71+
return fuzzPartialRatio(str1: sorted1, str2: sorted2)
72+
} else {
73+
return fuzzRatio(str1: sorted1, str2: sorted2)
74+
}
75+
}
76+
77+
static public func fuzzTokenSortRatio(str1 str1: String, str2: String, fullProcess: Bool = true) -> Int {
78+
return _fuzzTokenSort(str1: str1, str2: str2, partial: false, fullProcess: fullProcess)
79+
}
80+
81+
static public func fuzzPartialTokenSortRatio(str1 str1: String, str2: String, fullProcess: Bool = true) -> Int {
82+
return _fuzzTokenSort(str1: str1, str2: str2, partial: true, fullProcess: fullProcess)
83+
}
5684
}

Fuzzywuzzy_swiftTests/Fuzzywuzzy_swiftTests.swift

+11-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,17 @@ class Fuzzywuzzy_swiftTests: XCTestCase {
2121
super.tearDown()
2222
}
2323

24-
func testpartialRatio() {
24+
func testTokenSortRatio() {
25+
let strPairs = [("some", ""), ("", "some"), ("", ""), ("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear"), ("fuzzy$*#&)$#(wuzzy*@()#*()!<><>was a bear", "wuzzy fuzzy was a bear")]
26+
for (str1, str2) in strPairs {
27+
print("STR1: \(str1)")
28+
print("STR2: \(str2)")
29+
print("TOKEN RATIO: \(String.fuzzTokenSortRatio(str1: str1, str2: str2))")
30+
print("-----------------")
31+
}
32+
}
33+
34+
func testPartialRatio() {
2535
let strPairs = [("some", ""), ("", "some"), ("", ""), ("abcd", "XXXbcdeEEE"), ("what a wonderful 世界", "wonderful 世"), ("this is a test", "this is a test!")]
2636
for (str1, str2) in strPairs {
2737
print("STR1: \(str1)")

0 commit comments

Comments
 (0)