Skip to content

Commit e3cf3ff

Browse files
committed
update kmeans docstring, add counts to algos
1 parent 500f7a6 commit e3cf3ff

File tree

6 files changed

+31
-6
lines changed

6 files changed

+31
-6
lines changed

src/coreset.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ function kmeans!(alg::Coreset, containers, X, k, weights, metric::Euclidean = Eu
7070

7171
totalcost = sum(containers.totalcost)
7272

73-
return KmeansResult(res.centers, containers.labels, T[], Int[], T[], totalcost, res.iterations, res.converged)
73+
counts = collect(values(sort(countmap(containers.labels))))
74+
75+
return KmeansResult(res.centers, containers.labels, T[], counts, T[], totalcost, res.iterations, res.converged)
7476
end
7577

7678

src/elkan.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,12 @@ function kmeans!(alg::Elkan, containers, X, k, weights=nothing, metric=Euclidean
8181
println("Successfully terminated with convergence.")
8282
end
8383

84+
counts = collect(values(sort(countmap(containers.labels))))
85+
8486
# TODO empty placeholder vectors should be calculated
8587
# TODO Float64 type definitions is too restrictive, should be relaxed
8688
# especially during GPU related development
87-
return KmeansResult(centroids, containers.labels, T[], Int[], T[], totalcost, niters, converged)
89+
return KmeansResult(centroids, containers.labels, T[], counts, T[], totalcost, niters, converged)
8890
end
8991

9092

src/hamerly.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,12 @@ function kmeans!(alg::Hamerly, containers, X, k, weights=nothing, metric=Euclide
7070
println("Successfully terminated with convergence.")
7171
end
7272

73+
counts = collect(values(sort(countmap(containers.labels))))
74+
7375
# TODO empty placeholder vectors should be calculated
7476
# TODO Float64 type definitions is too restrictive, should be relaxed
7577
# especially during GPU related development
76-
return KmeansResult(centroids, containers.labels, T[], Int[], T[], totalcost, niters, converged)
78+
return KmeansResult(centroids, containers.labels, T[], counts, T[], totalcost, niters, converged)
7779
end
7880

7981

src/kmeans.jl

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,8 @@ end
146146
kmeans([alg::AbstractKMeansAlg,] design_matrix, k; n_threads = nthreads(),
147147
k_init="k-means++", max_iters=300, tol=1e-6, verbose=true, rng = Random.GLOBAL_RNG)
148148
149+
### IMPLEMENTATION NOTES
150+
149151
This main function employs the K-means algorithm to cluster all examples
150152
in the training data (design_matrix) into k groups using either the
151153
`k-means++` or random initialisation technique for selecting the initial
@@ -155,7 +157,8 @@ At the end of the number of iterations specified (max_iters), convergence is
155157
achieved if difference between the current and last cost objective is
156158
less than the tolerance level (tol). An error is thrown if convergence fails.
157159
158-
Arguments:
160+
### ARGUMENTS
161+
159162
- `alg` defines one of the algorithms used to calculate `k-means`. This
160163
argument can be omitted, by default Lloyd algorithm is used.
161164
- `n_threads` defines number of threads used for calculations, by default it is equal
@@ -169,6 +172,18 @@ alternatively one can use `rand` to choose random points for init.
169172
- `verbose` is verbosity level. Details of operations can be either printed or not by setting verbose accordingly.
170173
171174
A `KmeansResult` structure representing labels, centroids, and sum_squares is returned.
175+
176+
### EXAMPLE
177+
178+
```julia
179+
X = rand(2, 100) # 100 points in 2d
180+
km = kmeans(X, 5) # 5 clusters with the default (LLoyd) algo
181+
km_yy = kmeans(Yinyang(), X, 5) # 5 clusters with the Yinyang algo
182+
183+
kma = km.assignments # X[:,i] is a member of cluster kma[i]
184+
kmc = km.centers # cluster i has center kmc[:,i]
185+
kmn = km.counts # clister i has kmn[i] points
186+
```
172187
"""
173188
function kmeans(alg::AbstractKMeansAlg, design_matrix, k;
174189
weights = nothing,

src/lloyd.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,12 @@ function kmeans!(alg::Lloyd, containers, X, k, weights=nothing, metric=Euclidean
5858
println("Successfully terminated with convergence.")
5959
end
6060

61+
counts = collect(values(sort(countmap(containers.labels))))
62+
6163
# TODO empty placeholder vectors should be calculated
6264
# TODO Float64 type definitions is too restrictive, should be relaxed
6365
# especially during GPU related development
64-
return KmeansResult(centroids, containers.labels, T[], Int[], T[], totalcost, niters, converged)
66+
return KmeansResult(centroids, containers.labels, T[], counts, T[], totalcost, niters, converged)
6567
end
6668

6769
kmeans(design_matrix, k;

src/yinyang.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,12 @@ function kmeans!(alg::Yinyang, containers, X, k, weights, metric::Euclidean = Eu
106106
println("Successfully terminated with convergence.")
107107
end
108108

109+
counts = collect(values(sort(countmap(containers.labels))))
110+
109111
# TODO empty placeholder vectors should be calculated
110112
# TODO Float64 type definitions is too restrictive, should be relaxed
111113
# especially during GPU related development
112-
return KmeansResult(centroids, containers.labels, T[], Int[], T[], totalcost, niters, converged)
114+
return KmeansResult(centroids, containers.labels, T[], counts, T[], totalcost, niters, converged)
113115
end
114116

115117

0 commit comments

Comments
 (0)