diff --git a/CountMinSketch/CountMinSketch.playground/Contents.swift b/CountMinSketch/CountMinSketch.playground/Contents.swift new file mode 100644 index 000000000..07cdd1146 --- /dev/null +++ b/CountMinSketch/CountMinSketch.playground/Contents.swift @@ -0,0 +1,111 @@ +//: # CountMin Sketch +import Foundation + +/// Private wrapper around Hashing, allowing hash different Hashables and keep their value +private final class Hashing where T: Hashable { + private var map: [T: Int] = [:] + + func hash(_ value: T) -> Int { + if let hash = map[value] { + return hash + } + var hasher = Hasher() + hasher.combine(value) + let newValue = abs(hasher.finalize()) + map[value] = newValue + return newValue + } +} + +/* + A class for counting hashable items using the Count-min Sketch strategy. + It fulfills a similar purpose than `itertools.Counter`. + The Count-min Sketch is a randomized data structure that uses a constant + amount of memory and has constant insertion and lookup times at the cost + of an arbitrarily small overestimation of the counts. +*/ +public final class CountMinSketch where T: Hashable { + private var hashers: [Hashing] = [] + private var matrix: [[UInt64]] = [] + private let rows: Int + private let cols: Int + + /// The total amount of elements adedd to the model + private(set) var count: UInt64 = 0 + /// init - will determine the matrix size + /// - Parameters: + /// - rows: the size of the hash tables, larger implies smaller overestimation + /// - cols: the amount of hash tables, larger implies lower probability of + init(rows: Int, cols: Int) { + self.rows = rows + self.cols = cols + for _ in 0.. UInt64 { + var values = [UInt64]() + for row in 0..(rows: 10, cols: 10) + +for element in stream { + sketch.add(element: element) +} + +assert(sketch.count == stream.count) + +print("We have \(sketch.count) elements in the stream") + + +print("The frequency of 1 is \(sketch.query(element: 1))") diff --git a/CountMinSketch/CountMinSketch.playground/contents.xcplayground b/CountMinSketch/CountMinSketch.playground/contents.xcplayground new file mode 100644 index 000000000..fd676d5b4 --- /dev/null +++ b/CountMinSketch/CountMinSketch.playground/contents.xcplayground @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/CountMinSketch/CountMinSketch.playground/playground.xcworkspace/contents.xcworkspacedata b/CountMinSketch/CountMinSketch.playground/playground.xcworkspace/contents.xcworkspacedata new file mode 100644 index 000000000..919434a62 --- /dev/null +++ b/CountMinSketch/CountMinSketch.playground/playground.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/CountMinSketch/CountMinSketch.swift b/CountMinSketch/CountMinSketch.swift new file mode 100644 index 000000000..b8e4ece55 --- /dev/null +++ b/CountMinSketch/CountMinSketch.swift @@ -0,0 +1,89 @@ +import Foundation + +/// Private wrapper around Hashing, allowing hash different Hashables and keep their value +private final class Hashing where T: Hashable { + private var map: [T: Int] = [:] + + func hash(_ value: T) -> Int { + if let hash = map[value] { + return hash + } + var hasher = Hasher() + hasher.combine(value) + let newValue = abs(hasher.finalize()) + map[value] = newValue + return newValue + } +} + +/* + A class for counting hashable items using the Count-min Sketch strategy. + It fulfills a similar purpose than `itertools.Counter`. + The Count-min Sketch is a randomized data structure that uses a constant + amount of memory and has constant insertion and lookup times at the cost + of an arbitrarily small overestimation of the counts. +*/ +public final class CountMinSketch where T: Hashable { + private var hashers: [Hashing] = [] + private var matrix: [[UInt64]] = [] + private let rows: Int + private let cols: Int + + /// The total amount of elements adedd to the model + private(set) var count: UInt64 = 0 + /// init - will determine the matrix size + /// - Parameters: + /// - rows: the size of the hash tables, larger implies smaller overestimation + /// - cols: the amount of hash tables, larger implies lower probability of + init(rows: Int, cols: Int) { + self.rows = rows + self.cols = cols + for _ in 0.. UInt64 { + var values = [UInt64]() + for row in 0.. + + + + diff --git a/CountMinSketch/Tests/Tests.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/CountMinSketch/Tests/Tests.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 000000000..18d981003 --- /dev/null +++ b/CountMinSketch/Tests/Tests.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/CountMinSketch/Tests/Tests.xcodeproj/xcshareddata/xcschemes/Tests.xcscheme b/CountMinSketch/Tests/Tests.xcodeproj/xcshareddata/xcschemes/Tests.xcscheme new file mode 100644 index 000000000..a3659d5ab --- /dev/null +++ b/CountMinSketch/Tests/Tests.xcodeproj/xcshareddata/xcschemes/Tests.xcscheme @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/CountMinSketch/Tests/Tests/CountMinSketchTests.swift b/CountMinSketch/Tests/Tests/CountMinSketchTests.swift new file mode 100644 index 000000000..c44b14e36 --- /dev/null +++ b/CountMinSketch/Tests/Tests/CountMinSketchTests.swift @@ -0,0 +1,34 @@ +import XCTest + + +class CountMinSketchTests: XCTestCase { + + func testZeroInit() { + let sketch = CountMinSketch(delta: 0.01, epsilon: 0.01) + let elements = ["", "1", "b"] + for element in elements { + XCTAssertEqual(sketch.query(element: element), 0) + } + } + + func testSimpleUsage() { + let sketch = CountMinSketch(delta: 0.01, epsilon: 0.01) + let expectedCount: UInt64 = 1000 + for _ in 0..(delta: 0.01, epsilon: 0.01) + sketch.add(element: "a", value: 10) + XCTAssertEqual(sketch.query(element: "a"), 10) + + sketch.add(element: "a", value: 20) + XCTAssertEqual(sketch.query(element: "a"), 30) + } +} diff --git a/CountMinSketch/Tests/Tests/Info.plist b/CountMinSketch/Tests/Tests/Info.plist new file mode 100644 index 000000000..6c6c23c43 --- /dev/null +++ b/CountMinSketch/Tests/Tests/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + en + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + BNDL + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + +