-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathKMinsClustering.js
152 lines (141 loc) · 4.23 KB
/
KMinsClustering.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
class KMinsClustering {
constructor() {
this.clusters = []; // list of data point indicies per cluster
this.centroids = [];
this.sumsqua = 0; // sum of squares of distances to each centroid after current iteration
this.minsumsqua = 0; // minimizing sum of squares
this.bbox = undefined; // data bounding box
this.data = false; // reference to user data
this.clu = {}; // saved clustering
}
initCentroids(N, data) {
this.data = data;
this.clusters = [];
this.centroids = [];
const hash = {};
for (let k = 0; k < N; k++) {
const i = Math.floor(Math.random() * data.length);
if (isset(hash[data[i]])) {
continue;
}
this.centroids.push(data[i]);
hash[data[i]] = 1;
}
console.log('number of centroids = ' + this.centroids.length);
}
// N - number of clusters
initRandomCentroids(N, data) {
this.data = data;
this.bbox = getRangeUpdate();
for (let k = 0; k < data.length; k++) {
this.bbox.update(data[k]);
}
for (let k = 0; k < N; k++) {
const p = this.bbox.min + Math.random() * this.bbox.width;
// if (k==0) p = 2.8; else if (k==1) p = 7.2;
this.centroids.push(p);
}
}
assign() {
// assign() changes amount of clusters and centroids
this.clusters = [];
for (let i = 0; i < Object.keys(this.centroids).length; ++i) {
this.clusters.push(new Array());
}
for (const k of Object.keys(this.data)) {
let dist = 1e6;
let nearest = 0;
for (const c of Object.keys(this.centroids)) {
const d = Math.abs(this.centroids[c] - this.data[k]);
if (d < dist) {
dist = d;
nearest = this.centroids[c];
}
}
// console.log(`nearest for point #${k}, ${this.data[k]} --> cluster #${nearest}, cent=${this.centroids[nearest]}`);
this.clusters[nearest].push(k);
}
// delete empty clusters
for (let k = this.clusters.length - 1; k >= 0; k--) {
if (this.clusters[k].length) {
continue;
}
this.clusters.splice(k, 1);
this.centroids.splice(k, 1);
}
}
getNewCentroids() {
let anyoneMovedQ = false;
for (const c of Object.keys(this.clusters)) {
let avg = 0;
for (const k of Object.keys(this.clusters[c])) {
avg += this.data[this.clusters[c][k]];
}
avg /= this.clusters[c].length;
if (Math.abs(this.centroids[c] - avg) > 1e-6) {
anyoneMovedQ = true;
}
this.centroids[c] = avg;
}
return anyoneMovedQ;
}
getSumOfSquares() {
this.sumsqua = 0;
for (const c of Object.keys(this.clusters)) {
let sum = 0;
for (const k of Object.keys(this.clusters[c])) {
sum += (this.data[this.clusters[c][k]] - this.centroids[c]) * (this.data[this.clusters[c][k]] - this.centroids[c]);
}
this.sumsqua += sum;
}
return this.sumsqua;
}
runOneIteration() {
while (true) {
this.assign();
if (!this.getNewCentroids()) {
break;
}
}
}
runAllIterations(Nclu, data) {
this.minsumsqua = 1e6;
for (let n = Nclu; n > 0; n--) {
console.log('------- iteration Nclu=' + n + '----------');
this.initCentroids(n, data);
this.runOneIteration();
const sum = this.getSumOfSquares();
console.log(this.getClusteringStr());
const diff = sum - this.minsumsqua;
if (diff < 1e-6) {
// less OR equal pick current
this.minsumsqua = sum;
this.saveCurrentClu();
}
}
this.restoreClu();
}
// --------------------------------- utils ------------------
saveCurrentClu() {
this.clu.clusters = this.clusters.slice();
this.clu.centroids = this.centroids.slice();
this.clu.sumsqua = this.sumsqua;
}
restoreClu() {
this.clusters = this.clu.clusters.slice();
this.centroids = this.clu.centroids.slice();
this.sumsqua = this.clu.sumsqua;
}
getClusteringStr() {
let out = '';
for (const c of Object.keys(this.clusters)) {
out += 'clu #' + c + ' : cent=' + this.centroids[c] + ' [ ';
for (const k of Object.keys(this.clusters[c])) {
out += this.data[this.clusters[c][k]] + ' ';
}
out += ']\n';
}
out += 'sum of squares == ' + this.sumsqua + '\n';
return out;
}
}