diff --git a/Chapter11/Exercise150-155/Exercise150-155.ipynb b/Chapter11/Exercise150-155/Exercise150-155.ipynb deleted file mode 100644 index 4ec1969..0000000 --- a/Chapter11/Exercise150-155/Exercise150-155.ipynb +++ /dev/null @@ -1,827 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
140.562555.68378214-0.234571412-0.6996483983.19983277619.110426337.97553179474.242224920
0102.50781258.8824300.465318-0.5150881.67725814.86014610.576487127.3935800
1103.01562539.3416490.3233281.0511643.12123721.7446697.73582263.1719090
2136.75000057.178449-0.068415-0.6362383.64297720.9592806.89649953.5936610
388.72656240.6722250.6008661.1234921.17893011.46872014.269573252.5673060
493.57031246.6981140.5319050.4167211.63628814.54507410.621748131.3940040
\n", - "
" - ], - "text/plain": [ - " 140.5625 55.68378214 -0.234571412 -0.699648398 3.199832776 \\\n", - "0 102.507812 58.882430 0.465318 -0.515088 1.677258 \n", - "1 103.015625 39.341649 0.323328 1.051164 3.121237 \n", - "2 136.750000 57.178449 -0.068415 -0.636238 3.642977 \n", - "3 88.726562 40.672225 0.600866 1.123492 1.178930 \n", - "4 93.570312 46.698114 0.531905 0.416721 1.636288 \n", - "\n", - " 19.11042633 7.975531794 74.24222492 0 \n", - "0 14.860146 10.576487 127.393580 0 \n", - "1 21.744669 7.735822 63.171909 0 \n", - "2 20.959280 6.896499 53.593661 0 \n", - "3 11.468720 14.269573 252.567306 0 \n", - "4 14.545074 10.621748 131.394004 0 " - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "df = pd.read_csv('HTRU_2.csv')\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Mean of integrated profileStandard deviation of integrated profileExcess kurtosis of integrated profileSkewness of integrated profileMean of DM-SNR curveStandard deviation of DM-SNR curveExcess kurtosis of DM-SNR curveSkewness of DM-SNR curveClass
0140.56250055.683782-0.234571-0.6996483.19983319.1104267.97553274.2422250
1102.50781258.8824300.465318-0.5150881.67725814.86014610.576487127.3935800
2103.01562539.3416490.3233281.0511643.12123721.7446697.73582263.1719090
3136.75000057.178449-0.068415-0.6362383.64297720.9592806.89649953.5936610
488.72656240.6722250.6008661.1234921.17893011.46872014.269573252.5673060
\n", - "
" - ], - "text/plain": [ - " Mean of integrated profile Standard deviation of integrated profile \\\n", - "0 140.562500 55.683782 \n", - "1 102.507812 58.882430 \n", - "2 103.015625 39.341649 \n", - "3 136.750000 57.178449 \n", - "4 88.726562 40.672225 \n", - "\n", - " Excess kurtosis of integrated profile Skewness of integrated profile \\\n", - "0 -0.234571 -0.699648 \n", - "1 0.465318 -0.515088 \n", - "2 0.323328 1.051164 \n", - "3 -0.068415 -0.636238 \n", - "4 0.600866 1.123492 \n", - "\n", - " Mean of DM-SNR curve Standard deviation of DM-SNR curve \\\n", - "0 3.199833 19.110426 \n", - "1 1.677258 14.860146 \n", - "2 3.121237 21.744669 \n", - "3 3.642977 20.959280 \n", - "4 1.178930 11.468720 \n", - "\n", - " Excess kurtosis of DM-SNR curve Skewness of DM-SNR curve Class \n", - "0 7.975532 74.242225 0 \n", - "1 10.576487 127.393580 0 \n", - "2 7.735822 63.171909 0 \n", - "3 6.896499 53.593661 0 \n", - "4 14.269573 252.567306 0 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.read_csv('HTRU_2.csv', header = None)\n", - "df.columns = [['Mean of integrated profile', 'Standard deviation of integrated profile', \n", - " 'Excess kurtosis of integrated profile', 'Skewness of integrated profile',\n", - " 'Mean of DM-SNR curve', 'Standard deviation of DM-SNR curve',\n", - " 'Excess kurtosis of DM-SNR curve', 'Skewness of DM-SNR curve', 'Class' ]]\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 17898 entries, 0 to 17897\n", - "Data columns (total 9 columns):\n", - "(Mean of integrated profile,) 17898 non-null float64\n", - "(Standard deviation of integrated profile,) 17898 non-null float64\n", - "(Excess kurtosis of integrated profile,) 17898 non-null float64\n", - "(Skewness of integrated profile,) 17898 non-null float64\n", - "(Mean of DM-SNR curve,) 17898 non-null float64\n", - "(Standard deviation of DM-SNR curve,) 17898 non-null float64\n", - "(Excess kurtosis of DM-SNR curve,) 17898 non-null float64\n", - "(Skewness of DM-SNR curve,) 17898 non-null float64\n", - "(Class,) 17898 non-null int64\n", - "dtypes: float64(8), int64(1)\n", - "memory usage: 1.2 MB\n" - ] - } - ], - "source": [ - "df.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "17898" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "#Exercise152 begins from here" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.model_selection import cross_val_score" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "X = df.iloc[:, 0:8]\n", - "y = df.iloc[:, 8]" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def clf_model(model):\n", - " clf = model\n", - "\n", - " scores = cross_val_score(clf, X, y)\n", - "\n", - " print('Scores:', scores)\n", - " print('Mean score:', scores.mean())" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Scores: [0.9740238 0.98223265 0.97686505]\n", - "Mean score: 0.9777071651161332\n" - ] - } - ], - "source": [ - "clf_model(LogisticRegression(random_state = 0))" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "#Exercise153 begins from here" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Scores: [0.95692978 0.92474019 0.94836547]\n", - "Mean score: 0.9433451467026904\n" - ] - } - ], - "source": [ - "from sklearn.naive_bayes import GaussianNB\n", - "clf_model(GaussianNB())" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Scores: [0.96899615 0.97200805 0.97082984]\n", - "Mean score: 0.9706113439320188\n" - ] - } - ], - "source": [ - "from sklearn.neighbors import KNeighborsClassifier\n", - "clf_model(KNeighborsClassifier())" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Scores: [0.96782303 0.96161582 0.9673093 ]\n", - "Mean score: 0.9655827179728252\n" - ] - } - ], - "source": [ - "from sklearn.tree import DecisionTreeClassifier\n", - "clf_model(DecisionTreeClassifier(random_state = 0))" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Scores: [0.97586727 0.97820986 0.97720034]\n", - "Mean score: 0.9770924870413066\n" - ] - } - ], - "source": [ - "from sklearn.ensemble import RandomForestClassifier\n", - "clf_model(RandomForestClassifier(random_state = 0))" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "#Exercise154 begins from here" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Class 17898\n", - "dtype: int64" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.Class.count()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Class 1639\n", - "dtype: int64" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df.Class == 1].Class.count()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Class 0.091574\n", - "dtype: float64" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df.Class == 1].Class.count()/df.Class.count()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.metrics import classification_report\n", - "from sklearn.metrics import confusion_matrix\n", - "from sklearn.model_selection import train_test_split" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "def confusion(model):\n", - " clf = model\n", - " clf.fit(X_train, y_train)\n", - " y_pred = clf.predict(X_test)\n", - " print('Confusion Matrix:', confusion_matrix(y_test, y_pred))\n", - " print('Classification Report:', classification_report(y_test, y_pred))\n", - " return clf" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Confusion Matrix: [[4094 21]\n", - " [ 63 297]]\n", - "Classification Report: precision recall f1-score support\n", - "\n", - " 0 0.98 0.99 0.99 4115\n", - " 1 0.93 0.82 0.88 360\n", - "\n", - "avg / total 0.98 0.98 0.98 4475\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", - " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", - " penalty='l2', random_state=0, solver='liblinear', tol=0.0001,\n", - " verbose=0, warm_start=False)" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "confusion(LogisticRegression(random_state = 0))" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Confusion Matrix: [[4077 38]\n", - " [ 69 291]]\n", - "Classification Report: precision recall f1-score support\n", - "\n", - " 0 0.98 0.99 0.99 4115\n", - " 1 0.88 0.81 0.84 360\n", - "\n", - "avg / total 0.98 0.98 0.98 4475\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n", - " metric_params=None, n_jobs=1, n_neighbors=5, p=2,\n", - " weights='uniform')" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "confusion(KNeighborsClassifier())" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Confusion Matrix: [[3946 169]\n", - " [ 52 308]]\n", - "Classification Report: precision recall f1-score support\n", - "\n", - " 0 0.99 0.96 0.97 4115\n", - " 1 0.65 0.86 0.74 360\n", - "\n", - "avg / total 0.96 0.95 0.95 4475\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "GaussianNB(priors=None)" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "confusion(GaussianNB())" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Confusion Matrix: [[4093 22]\n", - " [ 61 299]]\n", - "Classification Report: precision recall f1-score support\n", - "\n", - " 0 0.99 0.99 0.99 4115\n", - " 1 0.93 0.83 0.88 360\n", - "\n", - "avg / total 0.98 0.98 0.98 4475\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", - " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n", - " oob_score=False, random_state=0, verbose=0, warm_start=False)" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "confusion(RandomForestClassifier(random_state = 0))" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "#Exercise156 begins from here" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Scores: [0.97519692 0.98122695 0.97652976]\n", - "Mean score: 0.9776512086736252\n" - ] - } - ], - "source": [ - "from sklearn.ensemble import AdaBoostClassifier\n", - "clf_model(AdaBoostClassifier(random_state = 0))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Confusion Matrix: [[4094 21]\n", - " [ 63 297]]\n", - "Classification Report: precision recall f1-score support\n", - "\n", - " 0 0.98 0.99 0.99 4115\n", - " 1 0.93 0.82 0.88 360\n", - "\n", - "avg / total 0.98 0.98 0.98 4475\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,\n", - " learning_rate=1.0, n_estimators=50, random_state=0)" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "confusion(AdaBoostClassifier(random_state = 0))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python_Workshop", - "language": "python", - "name": "python_workshop" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}