Skip to content

Commit f02e80a

Browse files
committed
decision tree implementation
1 parent accf8e5 commit f02e80a

16 files changed

+41980
-9
lines changed

Diff for: .ipynb_checkpoints/anomaly_detection_algorithms-checkpoint.ipynb

+194
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"cells": [],
3+
"metadata": {},
4+
"nbformat": 4,
5+
"nbformat_minor": 4
6+
}

Diff for: .ipynb_checkpoints/iris-checkpoint.csv

+151
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
5.1,3.5,1.4,0.2,Iris-setosa
2+
4.9,3.0,1.4,0.2,Iris-setosa
3+
4.7,3.2,1.3,0.2,Iris-setosa
4+
4.6,3.1,1.5,0.2,Iris-setosa
5+
5.0,3.6,1.4,0.2,Iris-setosa
6+
5.4,3.9,1.7,0.4,Iris-setosa
7+
4.6,3.4,1.4,0.3,Iris-setosa
8+
5.0,3.4,1.5,0.2,Iris-setosa
9+
4.4,2.9,1.4,0.2,Iris-setosa
10+
4.9,3.1,1.5,0.1,Iris-setosa
11+
5.4,3.7,1.5,0.2,Iris-setosa
12+
4.8,3.4,1.6,0.2,Iris-setosa
13+
4.8,3.0,1.4,0.1,Iris-setosa
14+
4.3,3.0,1.1,0.1,Iris-setosa
15+
5.8,4.0,1.2,0.2,Iris-setosa
16+
5.7,4.4,1.5,0.4,Iris-setosa
17+
5.4,3.9,1.3,0.4,Iris-setosa
18+
5.1,3.5,1.4,0.3,Iris-setosa
19+
5.7,3.8,1.7,0.3,Iris-setosa
20+
5.1,3.8,1.5,0.3,Iris-setosa
21+
5.4,3.4,1.7,0.2,Iris-setosa
22+
5.1,3.7,1.5,0.4,Iris-setosa
23+
4.6,3.6,1.0,0.2,Iris-setosa
24+
5.1,3.3,1.7,0.5,Iris-setosa
25+
4.8,3.4,1.9,0.2,Iris-setosa
26+
5.0,3.0,1.6,0.2,Iris-setosa
27+
5.0,3.4,1.6,0.4,Iris-setosa
28+
5.2,3.5,1.5,0.2,Iris-setosa
29+
5.2,3.4,1.4,0.2,Iris-setosa
30+
4.7,3.2,1.6,0.2,Iris-setosa
31+
4.8,3.1,1.6,0.2,Iris-setosa
32+
5.4,3.4,1.5,0.4,Iris-setosa
33+
5.2,4.1,1.5,0.1,Iris-setosa
34+
5.5,4.2,1.4,0.2,Iris-setosa
35+
4.9,3.1,1.5,0.1,Iris-setosa
36+
5.0,3.2,1.2,0.2,Iris-setosa
37+
5.5,3.5,1.3,0.2,Iris-setosa
38+
4.9,3.1,1.5,0.1,Iris-setosa
39+
4.4,3.0,1.3,0.2,Iris-setosa
40+
5.1,3.4,1.5,0.2,Iris-setosa
41+
5.0,3.5,1.3,0.3,Iris-setosa
42+
4.5,2.3,1.3,0.3,Iris-setosa
43+
4.4,3.2,1.3,0.2,Iris-setosa
44+
5.0,3.5,1.6,0.6,Iris-setosa
45+
5.1,3.8,1.9,0.4,Iris-setosa
46+
4.8,3.0,1.4,0.3,Iris-setosa
47+
5.1,3.8,1.6,0.2,Iris-setosa
48+
4.6,3.2,1.4,0.2,Iris-setosa
49+
5.3,3.7,1.5,0.2,Iris-setosa
50+
5.0,3.3,1.4,0.2,Iris-setosa
51+
7.0,3.2,4.7,1.4,Iris-versicolor
52+
6.4,3.2,4.5,1.5,Iris-versicolor
53+
6.9,3.1,4.9,1.5,Iris-versicolor
54+
5.5,2.3,4.0,1.3,Iris-versicolor
55+
6.5,2.8,4.6,1.5,Iris-versicolor
56+
5.7,2.8,4.5,1.3,Iris-versicolor
57+
6.3,3.3,4.7,1.6,Iris-versicolor
58+
4.9,2.4,3.3,1.0,Iris-versicolor
59+
6.6,2.9,4.6,1.3,Iris-versicolor
60+
5.2,2.7,3.9,1.4,Iris-versicolor
61+
5.0,2.0,3.5,1.0,Iris-versicolor
62+
5.9,3.0,4.2,1.5,Iris-versicolor
63+
6.0,2.2,4.0,1.0,Iris-versicolor
64+
6.1,2.9,4.7,1.4,Iris-versicolor
65+
5.6,2.9,3.6,1.3,Iris-versicolor
66+
6.7,3.1,4.4,1.4,Iris-versicolor
67+
5.6,3.0,4.5,1.5,Iris-versicolor
68+
5.8,2.7,4.1,1.0,Iris-versicolor
69+
6.2,2.2,4.5,1.5,Iris-versicolor
70+
5.6,2.5,3.9,1.1,Iris-versicolor
71+
5.9,3.2,4.8,1.8,Iris-versicolor
72+
6.1,2.8,4.0,1.3,Iris-versicolor
73+
6.3,2.5,4.9,1.5,Iris-versicolor
74+
6.1,2.8,4.7,1.2,Iris-versicolor
75+
6.4,2.9,4.3,1.3,Iris-versicolor
76+
6.6,3.0,4.4,1.4,Iris-versicolor
77+
6.8,2.8,4.8,1.4,Iris-versicolor
78+
6.7,3.0,5.0,1.7,Iris-versicolor
79+
6.0,2.9,4.5,1.5,Iris-versicolor
80+
5.7,2.6,3.5,1.0,Iris-versicolor
81+
5.5,2.4,3.8,1.1,Iris-versicolor
82+
5.5,2.4,3.7,1.0,Iris-versicolor
83+
5.8,2.7,3.9,1.2,Iris-versicolor
84+
6.0,2.7,5.1,1.6,Iris-versicolor
85+
5.4,3.0,4.5,1.5,Iris-versicolor
86+
6.0,3.4,4.5,1.6,Iris-versicolor
87+
6.7,3.1,4.7,1.5,Iris-versicolor
88+
6.3,2.3,4.4,1.3,Iris-versicolor
89+
5.6,3.0,4.1,1.3,Iris-versicolor
90+
5.5,2.5,4.0,1.3,Iris-versicolor
91+
5.5,2.6,4.4,1.2,Iris-versicolor
92+
6.1,3.0,4.6,1.4,Iris-versicolor
93+
5.8,2.6,4.0,1.2,Iris-versicolor
94+
5.0,2.3,3.3,1.0,Iris-versicolor
95+
5.6,2.7,4.2,1.3,Iris-versicolor
96+
5.7,3.0,4.2,1.2,Iris-versicolor
97+
5.7,2.9,4.2,1.3,Iris-versicolor
98+
6.2,2.9,4.3,1.3,Iris-versicolor
99+
5.1,2.5,3.0,1.1,Iris-versicolor
100+
5.7,2.8,4.1,1.3,Iris-versicolor
101+
6.3,3.3,6.0,2.5,Iris-virginica
102+
5.8,2.7,5.1,1.9,Iris-virginica
103+
7.1,3.0,5.9,2.1,Iris-virginica
104+
6.3,2.9,5.6,1.8,Iris-virginica
105+
6.5,3.0,5.8,2.2,Iris-virginica
106+
7.6,3.0,6.6,2.1,Iris-virginica
107+
4.9,2.5,4.5,1.7,Iris-virginica
108+
7.3,2.9,6.3,1.8,Iris-virginica
109+
6.7,2.5,5.8,1.8,Iris-virginica
110+
7.2,3.6,6.1,2.5,Iris-virginica
111+
6.5,3.2,5.1,2.0,Iris-virginica
112+
6.4,2.7,5.3,1.9,Iris-virginica
113+
6.8,3.0,5.5,2.1,Iris-virginica
114+
5.7,2.5,5.0,2.0,Iris-virginica
115+
5.8,2.8,5.1,2.4,Iris-virginica
116+
6.4,3.2,5.3,2.3,Iris-virginica
117+
6.5,3.0,5.5,1.8,Iris-virginica
118+
7.7,3.8,6.7,2.2,Iris-virginica
119+
7.7,2.6,6.9,2.3,Iris-virginica
120+
6.0,2.2,5.0,1.5,Iris-virginica
121+
6.9,3.2,5.7,2.3,Iris-virginica
122+
5.6,2.8,4.9,2.0,Iris-virginica
123+
7.7,2.8,6.7,2.0,Iris-virginica
124+
6.3,2.7,4.9,1.8,Iris-virginica
125+
6.7,3.3,5.7,2.1,Iris-virginica
126+
7.2,3.2,6.0,1.8,Iris-virginica
127+
6.2,2.8,4.8,1.8,Iris-virginica
128+
6.1,3.0,4.9,1.8,Iris-virginica
129+
6.4,2.8,5.6,2.1,Iris-virginica
130+
7.2,3.0,5.8,1.6,Iris-virginica
131+
7.4,2.8,6.1,1.9,Iris-virginica
132+
7.9,3.8,6.4,2.0,Iris-virginica
133+
6.4,2.8,5.6,2.2,Iris-virginica
134+
6.3,2.8,5.1,1.5,Iris-virginica
135+
6.1,2.6,5.6,1.4,Iris-virginica
136+
7.7,3.0,6.1,2.3,Iris-virginica
137+
6.3,3.4,5.6,2.4,Iris-virginica
138+
6.4,3.1,5.5,1.8,Iris-virginica
139+
6.0,3.0,4.8,1.8,Iris-virginica
140+
6.9,3.1,5.4,2.1,Iris-virginica
141+
6.7,3.1,5.6,2.4,Iris-virginica
142+
6.9,3.1,5.1,2.3,Iris-virginica
143+
5.8,2.7,5.1,1.9,Iris-virginica
144+
6.8,3.2,5.9,2.3,Iris-virginica
145+
6.7,3.3,5.7,2.5,Iris-virginica
146+
6.7,3.0,5.2,2.3,Iris-virginica
147+
6.3,2.5,5.0,1.9,Iris-virginica
148+
6.5,3.0,5.2,2.0,Iris-virginica
149+
6.2,3.4,5.4,2.3,Iris-virginica
150+
5.9,3.0,5.1,1.8,Iris-virginica
151+

Diff for: .ipynb_checkpoints/iris-checkpoint.names

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
1. Title: Iris Plants Database
2+
Updated Sept 21 by C.Blake - Added discrepency information
3+
4+
2. Sources:
5+
(a) Creator: R.A. Fisher
6+
(b) Donor: Michael Marshall (MARSHALL%[email protected])
7+
(c) Date: July, 1988
8+
9+
3. Past Usage:
10+
- Publications: too many to mention!!! Here are a few.
11+
1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
12+
Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
13+
to Mathematical Statistics" (John Wiley, NY, 1950).
14+
2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
15+
(Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
16+
3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
17+
Structure and Classification Rule for Recognition in Partially Exposed
18+
Environments". IEEE Transactions on Pattern Analysis and Machine
19+
Intelligence, Vol. PAMI-2, No. 1, 67-71.
20+
-- Results:
21+
-- very low misclassification rates (0% for the setosa class)
22+
4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE
23+
Transactions on Information Theory, May 1972, 431-433.
24+
-- Results:
25+
-- very low misclassification rates again
26+
5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II
27+
conceptual clustering system finds 3 classes in the data.
28+
29+
4. Relevant Information:
30+
--- This is perhaps the best known database to be found in the pattern
31+
recognition literature. Fisher's paper is a classic in the field
32+
and is referenced frequently to this day. (See Duda & Hart, for
33+
example.) The data set contains 3 classes of 50 instances each,
34+
where each class refers to a type of iris plant. One class is
35+
linearly separable from the other 2; the latter are NOT linearly
36+
separable from each other.
37+
--- Predicted attribute: class of iris plant.
38+
--- This is an exceedingly simple domain.
39+
--- This data differs from the data presented in Fishers article
40+
(identified by Steve Chadwick, [email protected] )
41+
The 35th sample should be: 4.9,3.1,1.5,0.2,"Iris-setosa"
42+
where the error is in the fourth feature.
43+
The 38th sample: 4.9,3.6,1.4,0.1,"Iris-setosa"
44+
where the errors are in the second and third features.
45+
46+
5. Number of Instances: 150 (50 in each of three classes)
47+
48+
6. Number of Attributes: 4 numeric, predictive attributes and the class
49+
50+
7. Attribute Information:
51+
1. sepal length in cm
52+
2. sepal width in cm
53+
3. petal length in cm
54+
4. petal width in cm
55+
5. class:
56+
-- Iris Setosa
57+
-- Iris Versicolour
58+
-- Iris Virginica
59+
60+
8. Missing Attribute Values: None
61+
62+
Summary Statistics:
63+
Min Max Mean SD Class Correlation
64+
sepal length: 4.3 7.9 5.84 0.83 0.7826
65+
sepal width: 2.0 4.4 3.05 0.43 -0.4194
66+
petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
67+
petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
68+
69+
9. Class Distribution: 33.3% for each of 3 classes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"cells": [],
3+
"metadata": {},
4+
"nbformat": 4,
5+
"nbformat_minor": 4
6+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"cells": [],
3+
"metadata": {},
4+
"nbformat": 4,
5+
"nbformat_minor": 4
6+
}
+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 2,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"Collecting imblearn\n",
13+
" Downloading imblearn-0.0-py2.py3-none-any.whl (1.9 kB)\n",
14+
"Collecting imbalanced-learn\n",
15+
" Downloading imbalanced_learn-0.7.0-py3-none-any.whl (167 kB)\n",
16+
"\u001b[K |████████████████████████████████| 167 kB 1.1 MB/s eta 0:00:01\n",
17+
"\u001b[?25hRequirement already satisfied: scikit-learn>=0.23 in /Users/deepankarsingh/opt/anaconda3/lib/python3.8/site-packages (from imbalanced-learn->imblearn) (0.23.1)\n",
18+
"Requirement already satisfied: numpy>=1.13.3 in /Users/deepankarsingh/opt/anaconda3/lib/python3.8/site-packages (from imbalanced-learn->imblearn) (1.18.5)\n",
19+
"Requirement already satisfied: joblib>=0.11 in /Users/deepankarsingh/opt/anaconda3/lib/python3.8/site-packages (from imbalanced-learn->imblearn) (0.16.0)\n",
20+
"Requirement already satisfied: scipy>=0.19.1 in /Users/deepankarsingh/opt/anaconda3/lib/python3.8/site-packages (from imbalanced-learn->imblearn) (1.5.0)\n",
21+
"Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/deepankarsingh/opt/anaconda3/lib/python3.8/site-packages (from scikit-learn>=0.23->imbalanced-learn->imblearn) (2.1.0)\n",
22+
"Installing collected packages: imbalanced-learn, imblearn\n",
23+
"Successfully installed imbalanced-learn-0.7.0 imblearn-0.0\n"
24+
]
25+
}
26+
],
27+
"source": [
28+
"!pip install imblearn"
29+
]
30+
},
31+
{
32+
"cell_type": "code",
33+
"execution_count": 3,
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"from imblearn.under_sampling import RandomUnderSampler"
38+
]
39+
},
40+
{
41+
"cell_type": "code",
42+
"execution_count": 5,
43+
"metadata": {},
44+
"outputs": [],
45+
"source": [
46+
"rus = RandomUnderSampler(random_state=42)\n",
47+
"X_res, y_res = rus.fit_resample(X, y)"
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": null,
53+
"metadata": {},
54+
"outputs": [],
55+
"source": []
56+
}
57+
],
58+
"metadata": {
59+
"kernelspec": {
60+
"display_name": "Python 3",
61+
"language": "python",
62+
"name": "python3"
63+
},
64+
"language_info": {
65+
"name": ""
66+
}
67+
},
68+
"nbformat": 4,
69+
"nbformat_minor": 4
70+
}

Diff for: .ipynb_checkpoints/reshaping-checkpoint.ipynb

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"%matplotlib inline\n",
10+
"import numpy as np\n",
11+
"import pandas as pd\n",
12+
"import mglearn\n",
13+
"import matplotlib.pyplot as plt\n",
14+
"import sklearn"
15+
]
16+
},
17+
{
18+
"cell_type": "code",
19+
"execution_count": 2,
20+
"metadata": {},
21+
"outputs": [
22+
{
23+
"name": "stdout",
24+
"output_type": "stream",
25+
"text": [
26+
"Data shape: (506, 13)\n"
27+
]
28+
}
29+
],
30+
"source": [
31+
"from sklearn.datasets import load_boston\n",
32+
"boston = load_boston()\n",
33+
"print(\"Data shape: {}\".format(boston.data.shape))"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": 3,
39+
"metadata": {},
40+
"outputs": [
41+
{
42+
"name": "stdout",
43+
"output_type": "stream",
44+
"text": [
45+
"X.shape: (506, 104)\n"
46+
]
47+
}
48+
],
49+
"source": [
50+
"X, y = mglearn.datasets.load_extended_boston()\n",
51+
"print(\"X.shape: {}\".format(X.shape))"
52+
]
53+
},
54+
{
55+
"cell_type": "code",
56+
"execution_count": null,
57+
"metadata": {},
58+
"outputs": [],
59+
"source": []
60+
}
61+
],
62+
"metadata": {
63+
"kernelspec": {
64+
"display_name": "Python 3",
65+
"language": "python",
66+
"name": "python3"
67+
},
68+
"language_info": {
69+
"codemirror_mode": {
70+
"name": "ipython",
71+
"version": 3
72+
},
73+
"file_extension": ".py",
74+
"mimetype": "text/x-python",
75+
"name": "python",
76+
"nbconvert_exporter": "python",
77+
"pygments_lexer": "ipython3",
78+
"version": "3.8.3"
79+
}
80+
},
81+
"nbformat": 4,
82+
"nbformat_minor": 4
83+
}

0 commit comments

Comments
 (0)