-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmba.py
59 lines (52 loc) · 2.04 KB
/
mba.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
''' make a dataframe of the retail dataset '''
df = pd.read_csv('/content/retail_dataset.csv', sep=',')
df.head()
''' finds out how many unique items are actually there in the table '''
items = (df['0'].unique())
items
''' apriori module requires a dataframe that has either 0 and 1 or True and
False as data so we have to pre-process our data'''
itemset = set(items) # ordered set
encodedVals = [] # list to store the binary encoded values
for index, row in df.iterrows():
rowset = set(row)
labels = {}
uncommons = list(itemset - rowset)
commons = list(itemset.intersection(rowset))
for uncommon in uncommons:
labels[uncommon] = 0
for common in commons:
labels[common] = 1
encodedVals.append(labels)
encodedVals[0]
encodedDf = pd.DataFrame(encodedVals) # encoded dataframe
''' min_support can be set between 0 to 1 is a parameter supplied to the Apriori
algorithm in order to prune candidate rules by specifying a minimum
lower bound for the Support measure of resulting association rules '''
freqItemsDf = apriori(encodedDf, min_support=0.2, use_colnames=True)
freqItemsDf # shows the first 9 rows
''' Frequent if-then associations called association rules
which consists of an antecedent (if) and a consequent (then).
Metrics can be set to confidence, lift, support, leverage and conviction.
'''
rules = association_rules(freqItemsDf, metric="confidence",
min_threshold=0.6)
rules.head()# shows first 5 rows
plt.scatter(rules['support'], rules['confidence'], alpha=0.5)
plt.xlabel('support')
plt.ylabel('confidence')
plt.title('Support vs Confidence')
plt.show()
plt.scatter(rules['support'], rules['lift'], alpha=0.5)
plt.xlabel('support')
plt.ylabel('lift')
plt.title('Support vs Lift')
plt.show()
fit = np.polyfit(rules['lift'], rules['confidence'], 1)
fit_fn = np.poly1d(fit)
plt.plot(rules['lift'], rules['confidence'], 'yo', rules['lift'],
fit_fn(rules['lift']))
plt.xlabel('Lift')
plt.ylabel('Confidence')
plt.title('Lift vs Confidence')
plt.show()