Skip to content

Commit 20d3c0e

Browse files
committed
Move kMeans from playground to project
1 parent ebcf1f4 commit 20d3c0e

File tree

9 files changed

+482
-301
lines changed

9 files changed

+482
-301
lines changed

K-Means/K-Means.playground/Contents.swift

Lines changed: 0 additions & 140 deletions
This file was deleted.

K-Means/K-Means.playground/contents.xcplayground

Lines changed: 0 additions & 4 deletions
This file was deleted.

K-Means/K-Means.playground/timeline.xctimeline

Lines changed: 0 additions & 41 deletions
This file was deleted.

K-Means/KMeans.swift

Lines changed: 54 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -5,135 +5,75 @@
55

66
import Foundation
77

8-
// Need a container to easily hold N Dimensional Vectors
9-
class VectorND: CustomStringConvertible {
10-
private var length = 0
11-
private var data = [Double]()
12-
13-
init(d:[Double]) {
14-
data = d
15-
length = d.count
8+
class KMeans {
9+
var numCenters:Int
10+
var convergeDist:Double
11+
12+
init(numCenters:Int, convergeDist:Double) {
13+
self.numCenters = numCenters
14+
self.convergeDist = convergeDist
1615
}
17-
18-
var description: String { return "VectorND (\(data)" }
19-
func getData() -> [Double] { return data }
20-
func getLength() -> Int { return length }
21-
}
22-
23-
// MARK: VectorND Operators
24-
func +(left: VectorND, right: VectorND) -> VectorND {
25-
var results = [Double](count: left.getLength(), repeatedValue: 0.0)
26-
for idx in 0..<left.getLength() {
27-
results[idx] = left.getData()[idx] + right.getData()[idx]
16+
17+
private func nearestCenter(x: Vector, Centers: [Vector]) -> Int {
18+
var nearestDist = DBL_MAX
19+
var minIndex = 0;
20+
21+
for (idx, c) in Centers.enumerate() {
22+
let dist = x.distTo(c)
23+
if dist < nearestDist {
24+
minIndex = idx
25+
nearestDist = dist
26+
}
27+
}
28+
return minIndex
2829
}
29-
return VectorND(d: results)
30-
}
31-
func +=(inout left: VectorND, right: VectorND) {
32-
left = left + right
33-
}
34-
func /(left:VectorND, right: Double) -> VectorND {
35-
var results = [Double](count: left.getLength(), repeatedValue: 0.0)
36-
for (idx, value) in left.getData().enumerate() {
37-
results[idx] = value / right
30+
31+
func findCenters(points: [Vector]) -> [Vector] {
32+
var centerMoveDist = 0.0
33+
let zeros = [Double](count: points[0].length, repeatedValue: 0.0)
34+
35+
var kCenters = reservoirSample(points, k: numCenters)
36+
37+
repeat {
38+
var cnts = [Double](count: numCenters, repeatedValue: 0.0)
39+
var newCenters = [Vector](count:numCenters, repeatedValue: Vector(d:zeros))
40+
41+
for p in points {
42+
let c = nearestCenter(p, Centers: kCenters)
43+
cnts[c]++
44+
newCenters[c] += p
45+
}
46+
47+
for idx in 0..<numCenters {
48+
newCenters[idx] /= cnts[idx]
49+
}
50+
51+
centerMoveDist = 0.0
52+
for idx in 0..<numCenters {
53+
centerMoveDist += kCenters[idx].distTo(newCenters[idx])
54+
}
55+
56+
kCenters = newCenters
57+
} while(centerMoveDist > convergeDist)
58+
return kCenters
3859
}
39-
return VectorND(d: results)
40-
}
41-
func /=(inout left: VectorND, right: Double) {
42-
left = left / right
4360
}
4461

45-
// MARK: Assist Functions
46-
// Pick a k random elements from samples
47-
func reservoirSample(samples:[VectorND], k:Int) -> [VectorND] {
48-
var result = [VectorND]()
62+
// Pick k random elements from samples
63+
func reservoirSample<T>(samples:[T], k:Int) -> [T] {
64+
var result = [T]()
4965

5066
// Fill the result array with first k elements
5167
for i in 0..<k {
5268
result.append(samples[i])
5369
}
54-
// randomly replace elements from remaining ones
70+
// randomly replace elements from remaining pool
5571
for i in (k+1)..<samples.count {
56-
let j = Int(arc4random_uniform(UInt32(i+1)))
72+
let j = random()%(i+1)
5773
if j < k {
5874
result[j] = samples[i]
5975
}
6076
}
6177
return result
6278
}
6379

64-
// Calculates the Euclidean distance between two VectorNDs
65-
func euclidean(v1:VectorND, v2:VectorND) -> Double {
66-
var result = 0.0
67-
for idx in 0..<v1.getLength() {
68-
result += pow(v1.getData()[idx] - v2.getData()[idx], 2.0)
69-
}
70-
return sqrt(result)
71-
}
72-
73-
// Get the INDEX of nearest Center to X
74-
func nearestCenter(x: VectorND, Centers: [VectorND]) -> Int {
75-
var nearestDist = DBL_MAX
76-
var minIndex = 0;
77-
78-
for (idx, c) in Centers.enumerate() {
79-
let dist = euclidean(x, v2: c)
80-
if dist < nearestDist {
81-
minIndex = idx
82-
nearestDist = dist
83-
}
84-
}
85-
return minIndex
86-
}
87-
88-
// MARK: Main Function
89-
func kMeans(numCenters: Int, convergeDist: Double, points: [VectorND]) -> [VectorND] {
90-
var centerMoveDist = 0.0
91-
let zeros = [Double](count: points[0].getLength(), repeatedValue: 0.0)
92-
93-
// 1. Choose k Random VectorNDs as the initial centers
94-
var kCenters = reservoirSample(points, k: numCenters)
95-
96-
// do following steps until convergence
97-
repeat {
98-
var cnts = [Double](count: numCenters, repeatedValue: 0.0)
99-
var newCenters = [VectorND](count:numCenters, repeatedValue: VectorND(d:zeros))
100-
// 2. Assign VectorNDs to centers
101-
// a. Determine which center each VectorND is closest to
102-
// b. Record how many VectorNDs are assigned to each center
103-
for p in points {
104-
let c = nearestCenter(p, Centers: kCenters)
105-
cnts[c]++
106-
newCenters[c] += p
107-
}
108-
// 3. Calculate a new centers
109-
for idx in 0..<numCenters {
110-
newCenters[idx] /= cnts[idx]
111-
}
112-
// 4. Determine how far centers moved
113-
centerMoveDist = 0.0
114-
for idx in 0..<numCenters {
115-
centerMoveDist += euclidean(kCenters[idx], v2: newCenters[idx])
116-
}
117-
// 5. Update centers to the newly calculated ones
118-
kCenters = newCenters
119-
print("Complete iteration coverge(\(centerMoveDist) <? \(convergeDist))")
120-
} while(centerMoveDist > convergeDist)
121-
return kCenters
122-
}
123-
124-
// MARK: Sample Data
125-
var points = [VectorND]()
126-
let numPoints = 10
127-
let numDimmensions = 5
128-
for _ in 0..<numPoints {
129-
var data = [Double]()
130-
for x in 0..<numDimmensions {
131-
data.append(Double(arc4random_uniform(UInt32(numPoints*numDimmensions))))
132-
}
133-
points.append(VectorND(d: data))
134-
}
135-
136-
print("\nCenters")
137-
for c in kMeans(3, convergeDist: 0.01, points: points) {
138-
print(c)
139-
}

K-Means/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,15 @@ This brings about a few of the parameters that are required for k-means:
2222

2323
This is what the algorithm would look like in swift
2424
```swift
25-
func kMeans(numCenters: Int, convergeDist: Double, points: [VectorND]) -> [VectorND] {
25+
func kMeans(numCenters: Int, convergeDist: Double, points: [Vector]) -> [Vector] {
2626
var centerMoveDist = 0.0
2727
let zeros = [Double](count: points[0].getLength(), repeatedValue: 0.0)
2828

2929
var kCenters = reservoirSample(points, k: numCenters)
3030

3131
repeat {
3232
var cnts = [Double](count: numCenters, repeatedValue: 0.0)
33-
var newCenters = [VectorND](count:numCenters, repeatedValue: VectorND(d:zeros))
33+
var newCenters = [Vector](count:numCenters, repeatedValue: Vector(d:zeros))
3434
for p in points {
3535
let c = nearestCenter(p, Centers: kCenters)
3636
cnts[c]++

0 commit comments

Comments
 (0)