Skip to content

Commit ef95ba6

Browse files
Add files via upload
1 parent 62dbe2d commit ef95ba6

17 files changed

+271
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
Scotch Whisky |
2+
3+
Scotch whisky is prized for its complexity and variety of flavors.
4+
And the regions of Scotland where it is produced
5+
are believed to have distinct flavor profiles.
6+
In this case study, we will classify scotch whiskies
7+
based on their flavor characteristics.
8+
The dataset we'll be using contains a selection of scotch whiskies
9+
from several distilleries, and we'll attempt to cluster whiskies
10+
into groups that are similar in flavor.
11+
This case study will deepen your understanding of Pandas, NumPy,
12+
and scikit-learn, and perhaps of scotch whisky.
13+
14+
15+
The dataset we'll be using consists of tasting ratings
16+
of one readily available single malt scotch whisky
17+
from almost every active whisky distillery in Scotland.
18+
The resulting dataset has 86 malt whiskies
19+
that are scored between 0 and 4 in 12 different taste categories.
20+
The scores have been aggregated from 10 different tasters.
21+
The taste categories describe whether the whiskies are sweet, smoky,
22+
medicinal, spicy, and so on.
23+
24+
25+
Order of Project subparts :
26+
1.whiskies_loading_&_inspecting_data.py
27+
2.whiskies_exploring_correlations.py
28+
3.clustering_whiskies_by_flavour_profile.py
29+
4.whiskies_comparing_correlation_matrices.py
30+
31+
32+
I did this project to accompany my learnings from HarvardX course - PH526x Using Python for Research.
33+
34+
- Amartya Ranjan Saikia ([email protected]/[email protected])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from sklearn.cluster.bicluster import SpectralCoclustering
2+
import numpy as np
3+
import pandas as pd
4+
import matplotlib.pyplot as plt
5+
6+
model = SpectralCoclustering(n_clusters=6, random_state=0)
7+
model.fit(corr_whisky)
8+
model.rows_
9+
# >>>np.sum(model.rows_, axis=1)
10+
# >>>np.sum(model.rows_, axis=0)
11+
model.row_labels_
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#####Regions#####
2+
Highlands
3+
Speyside
4+
Highlands
5+
Islay
6+
Highlands
7+
Islands
8+
Lowlands
9+
Speyside
10+
Speyside
11+
Highlands
12+
Speyside
13+
Speyside
14+
Highlands
15+
Speyside
16+
Speyside
17+
Speyside
18+
Lowlands
19+
Highlands
20+
Islay
21+
Islay
22+
Islay
23+
Islay
24+
Speyside
25+
Highlands
26+
Speyside
27+
Speyside
28+
Speyside
29+
Highlands
30+
Highlands
31+
Highlands
32+
Speyside
33+
Highlands
34+
Speyside
35+
Speyside
36+
Highlands
37+
Speyside
38+
Speyside
39+
Speyside
40+
Highlands
41+
Campbelltown
42+
Speyside
43+
Speyside
44+
Highlands
45+
Speyside
46+
Speyside
47+
Speyside
48+
Highlands
49+
Lowlands
50+
Speyside
51+
Speyside
52+
Highlands
53+
Speyside
54+
Highlands
55+
Islands
56+
Speyside
57+
Islands
58+
Speyside
59+
Islay
60+
Islay
61+
Speyside
62+
Highlands
63+
Speyside
64+
Speyside
65+
Speyside
66+
Speyside
67+
Speyside
68+
Highlands
69+
Highlands
70+
Highlands
71+
Speyside
72+
Highlands
73+
Islands
74+
Speyside
75+
Speyside
76+
Campbelltown
77+
Speyside
78+
Speyside
79+
Islands
80+
Speyside
81+
Speyside
82+
Highlands
83+
Islands
84+
Highlands
85+
Speyside
86+
Speyside
87+
Highlands
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
RowID,Distillery,Body,Sweetness,Smoky,Medicinal,Tobacco,Honey,Spicy,Winey,Nutty,Malty,Fruity,Floral,Postcode, Latitude, Longitude
2+
01,Aberfeldy,2,2,2,0,0,2,1,2,2,2,2,2, PH15 2EB, 286580,749680
3+
02,Aberlour,3,3,1,0,0,4,3,2,2,3,3,2, AB38 9PJ, 326340,842570
4+
03,AnCnoc,1,3,2,0,0,2,0,0,2,2,3,2, AB5 5LI, 352960,839320
5+
04,Ardbeg,4,1,4,4,0,0,2,0,1,2,1,0, PA42 7EB, 141560,646220
6+
05,Ardmore,2,2,2,0,0,1,1,1,2,3,1,1, AB54 4NH, 355350,829140
7+
06,ArranIsleOf,2,3,1,1,0,1,1,1,0,1,1,2, KA27 8HJ, 194050,649950
8+
07,Auchentoshan,0,2,0,0,0,1,1,0,2,2,3,3, G81 4SJ, 247670,672610
9+
08,Auchroisk,2,3,1,0,0,2,1,2,2,2,2,1, AB55 3XS, 340754,848623
10+
09,Aultmore,2,2,1,0,0,1,0,0,2,2,2,2, AB55 3QY, 340754,848623
11+
10,Balblair,2,3,2,1,0,0,2,0,2,1,2,1, IV19 1LB, 270820,885770
12+
11,Balmenach,4,3,2,0,0,2,1,3,3,0,1,2, PH26 3PF, 307750,827170
13+
12,Belvenie,3,2,1,0,0,3,2,1,0,2,2,2, AB55 4DH, 332680,840840
14+
13,BenNevis,4,2,2,0,0,2,2,0,2,2,2,2, PH33 6TJ, 212600,775710
15+
14,Benriach,2,2,1,0,0,2,2,0,0,2,3,2, IV30 8SJ, 323450,858380
16+
15,Benrinnes,3,2,2,0,0,3,1,1,2,3,2,2, AB38 9NN, 325800,839920
17+
16,Benromach,2,2,2,0,0,2,2,1,2,2,2,2, IV36 3EB, 303330,859350
18+
17,Bladnoch,1,2,1,0,0,0,1,1,0,2,2,3, DG8 9AB, 242260,554260
19+
18,BlairAthol,2,2,2,0,0,1,2,2,2,2,2,2, PH16 5LY, 294860,757580
20+
19,Bowmore,2,2,3,1,0,2,2,1,1,1,1,2, PA43 7GS, 131330,659720
21+
20,Bruichladdich,1,1,2,2,0,2,2,1,2,2,2,2, PA49 7UN, 126680,661400
22+
21,Bunnahabhain,1,2,1,1,0,1,1,1,1,2,2,3, PA46 7RR, 142210,673170
23+
22,Caol Ila,3,1,4,2,1,0,2,0,2,1,1,1, PA46 7RL, 142920,670040
24+
23,Cardhu,1,3,1,0,0,1,1,0,2,2,2,2, AB38 7RY, 318790,843090
25+
24,Clynelish,3,2,3,3,1,0,2,0,1,1,2,0, KW9 6LB, 290250,904230
26+
25,Craigallechie,2,2,2,0,1,2,2,1,2,2,1,4, AB38 9ST, 328920,844920
27+
26,Craigganmore,2,3,2,1,0,0,1,0,2,2,2,2, AB37 9AB, 316600,836370
28+
27,Dailuaine,4,2,2,0,0,1,2,2,2,2,2,1, AB38 7RE, 323520,841010
29+
28,Dalmore,3,2,2,1,0,1,2,2,1,2,3,1, IV17 0UT, 266610,868730
30+
29,Dalwhinnie,2,2,2,0,0,2,1,0,1,2,2,2, PH19 1AB, 263670,785270
31+
30,Deanston,2,2,1,0,0,2,1,1,1,3,2,1, FK16 6AG, 271570,701570
32+
31,Dufftown,2,3,1,1,0,0,0,0,1,2,2,2, AB55 4BR, 332360,839200
33+
32,Edradour,2,3,1,0,0,2,1,1,4,2,2,2, PH16 5JP, 295960,757940
34+
33,GlenDeveronMacduff,2,3,1,1,1,1,1,2,0,2,0,1, AB4 3JT, 372120,860400
35+
34,GlenElgin,2,3,1,0,0,2,1,1,1,1,2,3, IV30 3SL, 322640,861040
36+
35,GlenGarioch,2,1,3,0,0,0,3,1,0,2,2,2, AB51 0ES, 381020,827590
37+
36,GlenGrant,1,2,0,0,0,1,0,1,2,1,2,1, AB38 7BS, 327610,849570
38+
37,GlenKeith,2,3,1,0,0,1,2,1,2,1,2,1, AB55 3BU, 340754,848623
39+
38,GlenMoray,1,2,1,0,0,1,2,1,2,2,2,4, IV30 1YE, 319820,862320
40+
39,GlenOrd,3,2,1,0,0,1,2,1,1,2,2,2, IV6 7UJ, 251810,850860
41+
40,GlenScotia,2,2,2,2,0,1,0,1,2,2,1,1, PA28 6DS, 172090,621010
42+
41,GlenSpey,1,3,1,0,0,0,1,1,1,2,0,2, AB38 7AU, 327760,849140
43+
42,Glenallachie,1,3,1,0,0,1,1,0,1,2,2,2, AB38 9LR, 326490,841240
44+
43,Glendronach,4,2,2,0,0,2,1,4,2,2,2,0, AB54 6DA, 361200,844930
45+
44,Glendullan,3,2,1,0,0,2,1,2,1,2,3,2, AB55 4DJ, 333000,840300
46+
45,Glenfarclas,2,4,1,0,0,1,2,3,2,3,2,2, AB37 9BD, 320950,838160
47+
46,Glenfiddich,1,3,1,0,0,0,0,0,0,2,2,2, AB55 4DH, 332680,840840
48+
47,Glengoyne,1,2,0,0,0,1,1,1,2,2,3,2, G63 9LB, 252810,682750
49+
48,Glenkinchie,1,2,1,0,0,1,2,0,0,2,2,2, EH34 5ET, 344380,666690
50+
49,Glenlivet,2,3,1,0,0,2,2,2,1,2,2,3, AB37 9DB, 319560,828780
51+
50,Glenlossie,1,2,1,0,0,1,2,0,1,2,2,2, IV30 3SS, 322640,861040
52+
51,Glenmorangie,2,2,1,1,0,1,2,0,2,1,2,2, IV19 1PZ, 276750,883450
53+
52,Glenrothes,2,3,1,0,0,1,1,2,1,2,2,0, AB38 7AA, 327650,849170
54+
53,Glenturret,2,3,1,0,0,2,2,2,2,2,1,2, PH7 4HA, 285630,723580
55+
54,Highland Park,2,2,3,1,0,2,1,1,1,2,1,1, KW15 1SU, 345340,1009260
56+
55,Inchgower,1,3,1,1,0,2,2,0,1,2,1,2, AB56 5AB, 342610,863970
57+
56,Isle of Jura,2,1,2,2,0,1,1,0,2,1,1,1, PA60 7XT, 152660,667040
58+
57,Knochando,2,3,1,0,0,2,2,1,2,1,2,2, AB38 7RT, 319470,841570
59+
58,Lagavulin,4,1,4,4,1,0,1,2,1,1,1,0, PA42 7DZ, 140430,645730
60+
59,Laphroig,4,2,4,4,1,0,0,1,1,1,0,0, PA42 7DU, 138680,645160
61+
60,Linkwood,2,3,1,0,0,1,1,2,0,1,3,2, IV30 3RD, 322640,861040
62+
61,Loch Lomond,1,1,1,1,0,1,1,0,1,2,1,2, G83 0TL, 239370,680920
63+
62,Longmorn,3,2,1,0,0,1,1,1,3,3,2,3, IV30 3SJ, 322640,861040
64+
63,Macallan,4,3,1,0,0,2,1,4,2,2,3,1, AB38 9RX, 327710,844480
65+
64,Mannochmore,2,1,1,0,0,1,1,1,2,1,2,2, IV30 3SS, 322640,861040
66+
65,Miltonduff,2,4,1,0,0,1,0,0,2,1,1,2, IV30 3TQ, 322640,861040
67+
66,Mortlach,3,2,2,0,0,2,3,3,2,1,2,2, AB55 4AQ, 332950,839850
68+
67,Oban,2,2,2,2,0,0,2,0,2,2,2,0, PA34 5NH, 185940,730190
69+
68,OldFettercairn,1,2,2,0,1,2,2,1,2,3,1,1, AB30 1YE, 370860,772900
70+
69,OldPulteney,2,1,2,2,1,0,1,1,2,2,2,2, KW1 5BA, 336730,950130
71+
70,RoyalBrackla,2,3,2,1,1,1,2,1,0,2,3,2, IV12 5QY, 286040,851320
72+
71,RoyalLochnagar,3,2,2,0,0,2,2,2,2,2,3,1, AB35 5TB, 326140,794370
73+
72,Scapa,2,2,1,1,0,2,1,1,2,2,2,2, KW15 1SE, 342850,1008930
74+
73,Speyburn,2,4,1,0,0,2,1,0,0,2,1,2, AB38 7AG, 326930,851430
75+
74,Speyside,2,2,1,0,0,1,0,1,2,2,2,2, PH21 1NS, 278740,800600
76+
75,Springbank,2,2,2,2,0,2,2,1,2,1,0,1, PA28 6EJ, 172280,620910
77+
76,Strathisla,2,2,1,0,0,2,2,2,3,3,3,2, AB55 3BS, 340754,848623
78+
77,Strathmill,2,3,1,0,0,0,2,0,2,1,3,2, AB55 5DQ,342650,850500
79+
78,Talisker,4,2,3,3,0,1,3,0,1,2,2,0, IV47 8SR, 137950,831770
80+
79,Tamdhu,1,2,1,0,0,2,0,1,1,2,2,2, AB38 7RP, 319210,841760
81+
80,Tamnavulin,1,3,2,0,0,0,2,0,2,1,2,3, AB37 9JA, 321180,826110
82+
81,Teaninich,2,2,2,1,0,0,2,0,0,0,2,2, IV17 0XB, 265360,869120
83+
82,Tobermory,1,1,1,0,0,1,0,0,1,2,2,2, PA75 6NR, 150450,755070
84+
83,Tomatin,2,3,2,0,0,2,2,1,1,2,0,1, IV13 7YT, 279120,829630
85+
84,Tomintoul,0,3,1,0,0,2,2,1,1,2,1,2, AB37 9AQ, 315100,825560
86+
85,Tormore,2,2,1,0,0,1,0,1,2,1,0,0, PH26 3LR, 315180,834960
87+
86,Tullibardine,2,3,0,0,1,0,2,1,1,2,2,1, PH4 1QG, 289690,708850
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from sklearn.cluster.bicluster import SpectralCoclustering
2+
import numpy as np
3+
import pandas as pd
4+
import matplotlib.pyplot as plt
5+
6+
whisky['Group'] = pd.Series(model.row_labels_, index = whisky.index)
7+
whisky = whisky.ix[np.argsort(model.row_labels_)]
8+
whisky = whisky.reset_index(drop=True)
9+
10+
correlations = pd.DataFrame.corr(whisky.iloc[:,2:14].transpose())
11+
correlations = np.array(correlations)
12+
13+
plt.figure(figsize = (14,7))
14+
plt.subplot(121)
15+
plt.pcolor(corr_whisky)
16+
plt.title("Original")
17+
plt.axis("tight")
18+
plt.subplot(122)
19+
plt.pcolor(correlations)
20+
plt.title("Rearranged")
21+
plt.axis("tight")
22+
plt.show()
23+
plt.savefig("correlations.pdf")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import matplotlib.pyplot as plt
2+
plt.figure(figsize=(10,10))
3+
plt.pcolor(corr_flavors)
4+
plt.colorbar()
5+
# >>>plt.savefig("corlate-whisky1.pdf")
6+
7+
corr_whisky = pd.DataFrame.corr(flavors.transpose())
8+
plt.figure(figsize=(10,10))
9+
plt.pcolor(corr_whisky)
10+
plt.axis("tight")
11+
plt.colorbar()
12+
# >>>plt.savefig("corlate-whisky2.pdf")
13+
14+
plt.show()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
whisky = pd.read_csv("whiskies.txt")
5+
whisky["Region"] = pd.read_csv("regions.txt")
6+
# >>>whisky.head() #iloc method to index a data frame by location.
7+
8+
# >>>whisky.iloc[0:10] #we specified the rows from 0 - 9
9+
# >>>whisky.iloc[0:10,0:5] #we specified the rows from 0 - 9 & columns from 0-5
10+
11+
# >>>whisky.columns
12+
flavors=whisky.iloc[:,2:14]
13+
14+
corr_flavors = pd.DataFrame.corr(flavors)
15+
print(corr_flavors)

0 commit comments

Comments
 (0)