-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathPCA_Wine.R
59 lines (36 loc) · 1.4 KB
/
PCA_Wine.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
wine <- read.csv("~/Downloads/Data Science/data set/wine.csv")
attach(wine)
wine_scores<-princomp(wine,cor = TRUE,scores = TRUE,covmat = NULL)
summary(wine_scores)
str(wine_scores)
loadings(wine_scores)
plot(wine_scores)
biplot(wine_scores)
plot(cumsum(wine_scores$sdev*wine_scores$sdev)*100/(sum(wine_scores$sdev*wine_scores$sdev)),type="b")
wine_scores$scores[,1:3]
new_wine_data <- cbind(wine,wine_scores$scores[,1:3])
wine_cluster <- new_wine_data[,15:17]
#scale
wine_cluster_scale <- scale(wine_cluster)
wine_euclidean <- dist(wine_cluster_scale,method = "euclidean")
wine_maximum <- dist(wine_cluster_scale,method = "maximum")
wine_manhattan <- dist(wine_cluster_scale,method ="manhattan")
#distance
wine_complete_linkage <- hclust(wine_euclidean,method = "complete")
wine_centroid_linkage <- hclust(wine_maximum,method = "centroid")
wine_mcquitty_linkage <- hclust(wine_manhattan,method = "mcquitty")
#Dendrograme
plot(wine_complete_linkage,labels = FALSE,hang = -1)
rect.hclust(wine_complete_linkage,k=7,border = "blue")
#kmeans
wine_k <- kmeans(wine_cluster_scale,4)
str(wine_k)
total_within = NULL
for (i in 2:10) {
total_within <- c(total_within,kmeans(wine_cluster_scale,i)$tot.withinss)
}
wine_cluster_final <- kmeans(wine_cluster_scale,3)
wine_cluster_final$cluster
wine["PCA_Cluster"] <- wine_cluster_final$cluster
wine<-wine [,c (15,1:14)]
aggregate(wine,by = list(wine$PCA_Cluster),mean)