|
5 | 5 |
|
6 | 6 | import Foundation
|
7 | 7 |
|
8 |
| -// Need a container to easily hold N Dimensional Vectors |
9 |
| -class VectorND: CustomStringConvertible { |
10 |
| - private var length = 0 |
11 |
| - private var data = [Double]() |
12 |
| - |
13 |
| - init(d:[Double]) { |
14 |
| - data = d |
15 |
| - length = d.count |
| 8 | +class KMeans { |
| 9 | + var numCenters:Int |
| 10 | + var convergeDist:Double |
| 11 | + |
| 12 | + init(numCenters:Int, convergeDist:Double) { |
| 13 | + self.numCenters = numCenters |
| 14 | + self.convergeDist = convergeDist |
16 | 15 | }
|
17 |
| - |
18 |
| - var description: String { return "VectorND (\(data)" } |
19 |
| - func getData() -> [Double] { return data } |
20 |
| - func getLength() -> Int { return length } |
21 |
| -} |
22 |
| - |
23 |
| -// MARK: VectorND Operators |
24 |
| -func +(left: VectorND, right: VectorND) -> VectorND { |
25 |
| - var results = [Double](count: left.getLength(), repeatedValue: 0.0) |
26 |
| - for idx in 0..<left.getLength() { |
27 |
| - results[idx] = left.getData()[idx] + right.getData()[idx] |
| 16 | + |
| 17 | + private func nearestCenter(x: Vector, Centers: [Vector]) -> Int { |
| 18 | + var nearestDist = DBL_MAX |
| 19 | + var minIndex = 0; |
| 20 | + |
| 21 | + for (idx, c) in Centers.enumerate() { |
| 22 | + let dist = x.distTo(c) |
| 23 | + if dist < nearestDist { |
| 24 | + minIndex = idx |
| 25 | + nearestDist = dist |
| 26 | + } |
| 27 | + } |
| 28 | + return minIndex |
28 | 29 | }
|
29 |
| - return VectorND(d: results) |
30 |
| -} |
31 |
| -func +=(inout left: VectorND, right: VectorND) { |
32 |
| - left = left + right |
33 |
| -} |
34 |
| -func /(left:VectorND, right: Double) -> VectorND { |
35 |
| - var results = [Double](count: left.getLength(), repeatedValue: 0.0) |
36 |
| - for (idx, value) in left.getData().enumerate() { |
37 |
| - results[idx] = value / right |
| 30 | + |
| 31 | + func findCenters(points: [Vector]) -> [Vector] { |
| 32 | + var centerMoveDist = 0.0 |
| 33 | + let zeros = [Double](count: points[0].length, repeatedValue: 0.0) |
| 34 | + |
| 35 | + var kCenters = reservoirSample(points, k: numCenters) |
| 36 | + |
| 37 | + repeat { |
| 38 | + var cnts = [Double](count: numCenters, repeatedValue: 0.0) |
| 39 | + var newCenters = [Vector](count:numCenters, repeatedValue: Vector(d:zeros)) |
| 40 | + |
| 41 | + for p in points { |
| 42 | + let c = nearestCenter(p, Centers: kCenters) |
| 43 | + cnts[c]++ |
| 44 | + newCenters[c] += p |
| 45 | + } |
| 46 | + |
| 47 | + for idx in 0..<numCenters { |
| 48 | + newCenters[idx] /= cnts[idx] |
| 49 | + } |
| 50 | + |
| 51 | + centerMoveDist = 0.0 |
| 52 | + for idx in 0..<numCenters { |
| 53 | + centerMoveDist += kCenters[idx].distTo(newCenters[idx]) |
| 54 | + } |
| 55 | + |
| 56 | + kCenters = newCenters |
| 57 | + } while(centerMoveDist > convergeDist) |
| 58 | + return kCenters |
38 | 59 | }
|
39 |
| - return VectorND(d: results) |
40 |
| -} |
41 |
| -func /=(inout left: VectorND, right: Double) { |
42 |
| - left = left / right |
43 | 60 | }
|
44 | 61 |
|
45 |
| -// MARK: Assist Functions |
46 |
| -// Pick a k random elements from samples |
47 |
| -func reservoirSample(samples:[VectorND], k:Int) -> [VectorND] { |
48 |
| - var result = [VectorND]() |
| 62 | +// Pick k random elements from samples |
| 63 | +func reservoirSample<T>(samples:[T], k:Int) -> [T] { |
| 64 | + var result = [T]() |
49 | 65 |
|
50 | 66 | // Fill the result array with first k elements
|
51 | 67 | for i in 0..<k {
|
52 | 68 | result.append(samples[i])
|
53 | 69 | }
|
54 |
| - // randomly replace elements from remaining ones |
| 70 | + // randomly replace elements from remaining pool |
55 | 71 | for i in (k+1)..<samples.count {
|
56 |
| - let j = Int(arc4random_uniform(UInt32(i+1))) |
| 72 | + let j = random()%(i+1) |
57 | 73 | if j < k {
|
58 | 74 | result[j] = samples[i]
|
59 | 75 | }
|
60 | 76 | }
|
61 | 77 | return result
|
62 | 78 | }
|
63 | 79 |
|
64 |
| -// Calculates the Euclidean distance between two VectorNDs |
65 |
| -func euclidean(v1:VectorND, v2:VectorND) -> Double { |
66 |
| - var result = 0.0 |
67 |
| - for idx in 0..<v1.getLength() { |
68 |
| - result += pow(v1.getData()[idx] - v2.getData()[idx], 2.0) |
69 |
| - } |
70 |
| - return sqrt(result) |
71 |
| -} |
72 |
| - |
73 |
| -// Get the INDEX of nearest Center to X |
74 |
| -func nearestCenter(x: VectorND, Centers: [VectorND]) -> Int { |
75 |
| - var nearestDist = DBL_MAX |
76 |
| - var minIndex = 0; |
77 |
| - |
78 |
| - for (idx, c) in Centers.enumerate() { |
79 |
| - let dist = euclidean(x, v2: c) |
80 |
| - if dist < nearestDist { |
81 |
| - minIndex = idx |
82 |
| - nearestDist = dist |
83 |
| - } |
84 |
| - } |
85 |
| - return minIndex |
86 |
| -} |
87 |
| - |
88 |
| -// MARK: Main Function |
89 |
| -func kMeans(numCenters: Int, convergeDist: Double, points: [VectorND]) -> [VectorND] { |
90 |
| - var centerMoveDist = 0.0 |
91 |
| - let zeros = [Double](count: points[0].getLength(), repeatedValue: 0.0) |
92 |
| - |
93 |
| - // 1. Choose k Random VectorNDs as the initial centers |
94 |
| - var kCenters = reservoirSample(points, k: numCenters) |
95 |
| - |
96 |
| - // do following steps until convergence |
97 |
| - repeat { |
98 |
| - var cnts = [Double](count: numCenters, repeatedValue: 0.0) |
99 |
| - var newCenters = [VectorND](count:numCenters, repeatedValue: VectorND(d:zeros)) |
100 |
| - // 2. Assign VectorNDs to centers |
101 |
| - // a. Determine which center each VectorND is closest to |
102 |
| - // b. Record how many VectorNDs are assigned to each center |
103 |
| - for p in points { |
104 |
| - let c = nearestCenter(p, Centers: kCenters) |
105 |
| - cnts[c]++ |
106 |
| - newCenters[c] += p |
107 |
| - } |
108 |
| - // 3. Calculate a new centers |
109 |
| - for idx in 0..<numCenters { |
110 |
| - newCenters[idx] /= cnts[idx] |
111 |
| - } |
112 |
| - // 4. Determine how far centers moved |
113 |
| - centerMoveDist = 0.0 |
114 |
| - for idx in 0..<numCenters { |
115 |
| - centerMoveDist += euclidean(kCenters[idx], v2: newCenters[idx]) |
116 |
| - } |
117 |
| - // 5. Update centers to the newly calculated ones |
118 |
| - kCenters = newCenters |
119 |
| - print("Complete iteration coverge(\(centerMoveDist) <? \(convergeDist))") |
120 |
| - } while(centerMoveDist > convergeDist) |
121 |
| - return kCenters |
122 |
| -} |
123 |
| - |
124 |
| -// MARK: Sample Data |
125 |
| -var points = [VectorND]() |
126 |
| -let numPoints = 10 |
127 |
| -let numDimmensions = 5 |
128 |
| -for _ in 0..<numPoints { |
129 |
| - var data = [Double]() |
130 |
| - for x in 0..<numDimmensions { |
131 |
| - data.append(Double(arc4random_uniform(UInt32(numPoints*numDimmensions)))) |
132 |
| - } |
133 |
| - points.append(VectorND(d: data)) |
134 |
| -} |
135 |
| - |
136 |
| -print("\nCenters") |
137 |
| -for c in kMeans(3, convergeDist: 0.01, points: points) { |
138 |
| - print(c) |
139 |
| -} |
0 commit comments