diff --git a/sravansmart7733@gmail.com/Classification-Imbalance.ipynb b/sravansmart7733@gmail.com/Classification-Imbalance.ipynb new file mode 100644 index 000000000..558160b98 --- /dev/null +++ b/sravansmart7733@gmail.com/Classification-Imbalance.ipynb @@ -0,0 +1,944 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimeV1V2V3V4V5V6V7V8V9...V21V22V23V24V25V26V27V28AmountClass
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.363787...-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.620
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425...-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.690
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.514654...0.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.660
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024...-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.500
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.817739...-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.990
\n", + "

5 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " Time V1 V2 V3 V4 V5 V6 V7 \\\n", + "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n", + "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n", + "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n", + "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n", + "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n", + "\n", + " V8 V9 ... V21 V22 V23 V24 V25 \\\n", + "0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n", + "1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n", + "2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n", + "3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n", + "4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n", + "\n", + " V26 V27 V28 Amount Class \n", + "0 -0.189115 0.133558 -0.021053 149.62 0 \n", + "1 0.125895 -0.008983 0.014724 2.69 0 \n", + "2 -0.139097 -0.055353 -0.059752 378.66 0 \n", + "3 -0.221929 0.062723 0.061458 123.50 0 \n", + "4 0.502292 0.219422 0.215153 69.99 0 \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np # linear algebra\n", + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", + "import tensorflow as tf\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from sklearn.manifold import TSNE\n", + "from sklearn.decomposition import PCA, TruncatedSVD\n", + "import matplotlib.patches as mpatches\n", + "import time\n", + "\n", + "# Classifier Libraries\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "import collections\n", + "\n", + "\n", + "# Other Libraries\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.pipeline import make_pipeline\n", + "from imblearn.pipeline import make_pipeline as imbalanced_make_pipeline\n", + "from imblearn.over_sampling import SMOTE\n", + "from imblearn.under_sampling import NearMiss\n", + "from imblearn.metrics import classification_report_imbalanced\n", + "from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score, classification_report\n", + "from collections import Counter\n", + "from sklearn.model_selection import KFold, StratifiedKFold\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "\n", + "df = pd.read_csv('D:/creditcard.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No Frauds 99.83 % of the dataset\n", + "Frauds 0.17 % of the dataset\n" + ] + } + ], + "source": [ + "print('No Frauds', round(df['Class'].value_counts()[0]/len(df) * 100,2), '% of the dataset')\n", + "print('Frauds', round(df['Class'].value_counts()[1]/len(df) * 100,2), '% of the dataset')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Class Distributions \\n (0: No Fraud || 1: Fraud)')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEoCAYAAACU+rytAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAfBElEQVR4nO3de5xcRZ338c+XBBCvwCZAyIWgRuWiBpzFPIqKohBwXRRBIqtENhoeBAV1VfAGgq76Ei8gFxeWkMAqkQdU4gobI6DIisgEkFvEjIBhSEgC4RJEwMDv+aOq4aSnZ6ZnqO6ZTL7v16tfM12nTp3qJvR3zjnVVYoIzMzMStpkqDtgZmYjj8PFzMyKc7iYmVlxDhczMyvO4WJmZsU5XMzMrDiHi23QJN0t6d+Guh/9kTRZUkjqaEHbJ0q6tfJ8rqT/Ln2c3HbLXoeNLA4XG7YkbSvpVEl/lvSEpHslXS5p/6HuW03+oK09HpN0p6QfStqzruo9wDjgpibbHUhongK8ZQDdboqkX0k6va54QK/DNl4OFxuWJE0GbgD2BY4HXgO8Hfg58P0h61hjHyF94O4EzAKeBK6W9OlahYh4KiLui4h1pQ4qaRNJoyLi0Yh4oFS7fWnF67CRyeFiw9WZgICOiLgoIu6IiCURcTrw2t52kvRJSTdL+ms+0/lPSVtWtr9E0gWSVkl6PJ9pHFvZfoSkP+VtqyUtlDS6n74+lD9w/xIRV0XEh4CvA1+T9PLc7nqXkyRtKuk0ScvzWdk9kr6et/0K2AH4Zu2sKJd/SNKjkvbPl8GeBHaqvyxWeS1fkLQy73OepC0q23qclVQvp0maSzobOqpyZja50WUxSW+WdF1+z1ZK+o6kzeqOdaakf5d0f37vT5G0SaXOgfm/298krZH0a0nb9vO+2zDmcLFhR9LWwHTg9Ih4tH57RDzYx+5PA8cCuwCHAnsA36ts/wrwauCfgFcB/wrcm4/bAZwBfBl4JelM6X8G+TK+Rfr/6929bP848B5gBjAFOAS4I287EOgGTiKdEY2r7Pc84AvAEcDOwF96af8tpBDeG3gvsA/wjQH0/xjgWuC8Sh/uqa8kaTxwOXAjsBvpzO39wNfqqv4LsA54A3A06b/RIbmN7YD5wDzS2d+bgQsG0Fcbhvr7i8xsKLycdNayZKA7RsR3K0/vlvQZ4FJJMyPiadIZwY0R8ftanUr9ScBfgQURsZb0wf2HQfSfiHhA0irgpb1U2QH4E/CbSBP8LQN+m/ddI+kpYG1E3Fe33yjgYxGxuFYgqVH7TwGH53C+VdJngXMlHR8Rf22i/w9LehJ4rNqHBsf6KLAC+Gh+f5dIOg74D0lfjIjHcr3bI+JL+fc/SfoIKfguBLYHNgUujohaWPY4E7MNi89cbDhq+GnZ1I7S2yQtktQtaS3wY2AzYLtc5SzgfZL+kC/NVG+ELyIFyl2SfiBppqQXDbYvpNfR28ywc4GppA/aMyS9s3qZqA/raO5m+s11Z33Xkt6HlzWx70DsBFybg6Xmmnysl1f7U7ffcmCb/PsfgF+SQvASSUdKGlu4n9ZmDhcbjpaSPpR3GshOknYg3fBfAhwMvI502QvShx0RcTnprOEUYAzwc0nn5W1rgd2B95HOJI4H/ihp+4G+AEljgLHAnY22R8QNwGTgc6T/D+cBi5oImCci4qmB9qeBp+kZ4psOop2+ArRa/vcG2zaBNEiAdNluH1IIzQKWSur13poNfw4XG3YiYg2wEDha0gvrt1dv0NfpIIXIJyLi2oj4E+mSS33790fEBfnG+yxgpqTN87Z1EXFlRNRGqL2AdH9moD5F+gC/tLcKEbE2Iv5fRBwJvBN4G8/+tf8k6RLYYL1a0gsqz6flNv+cn69m/Xs50HOgRDN9uB34P3WhuGfdsfoVybUR8WXgH0lnNoc0u78NP77nYsPVR0n3IDolfZH0F62At5LOKCY12Gcp6Q+mYyX9mPSBemy1gqSTSEOcbyP9+z8QuDMinpD0T6TLRlcDa/KxXkT/9362zDela5edZgKHAZ+JiK5GO0j6JOlexU2kv+oPBR4h3ciHdC/oTZL+i3S2cn8/fag3GpiTX+/2pNFr51Tut1wJfFfSP5MGEhwBTGT9e1B3A3soDQt/lPSe1DuT9B6fKelU0j2mr5MGYzzWoH4PkqaRBk8sBFaSBgZMJAWXbaAcLjYsRcRdknYnXTb6BjAeeIB0ff6IXva5WdIxwGdJo8J+C/wb8KNKtSeArwI7Ao8DvwPelbc9RBrd9SXg+aS/vD8cEb/pp7vnVNpekdvcKyKu7mOftcCnSSPFgjTaar/KB/KXgP/Ifdicgd+H+jUpQK/Kr+US4DOV7XNIZ2Zz8vMzgZ+QLhXWnEK6XHc7sAXpPVtPRNwraT/gm6SgfAj4Iem/W7MeBt4IfAzYkjQq7eSI+K8BtGHDjLwSpZmZleZ7LmZmVpzDxczMinO4mJlZcQ4XMzMrzuFiZmbFOVys5fKsvXP6r2l9kfRHSV/oY/voPGPxhErZhyX9sj09HF4k7Znfj+3y84Py7M2Dnl7ImudwsZaStA3wSdL3TqrlH5V0V56mfbGkNw2i7bn5w+MLdeV75fIxve3bRNu1qeXrHz8dbJvDVX6/fqa0REFI+sAg2/lKL+/ZYGY4aIVLSDMuHDTUHdkYOFys1T4M/D4inpljS9IhwKnAv5O+jf1b4HJJjb5135/Hgc+0cKLD6Tw75fw44EONKikZzNxcw8ELSTMgfJw0bctzcRvrv1/jSBOC9lBd86Ud8uzTc0mv01rM4WKtdiiwoK7sk8DciDgnLwD2MdI3248cRPtXkaYp+WJfldTPglZ9eCAvBFZ7PJTbe3v+q3y6pE7St/P3ljRF0gI9u0jX4vwN9mpfulVZoCyXXSPpu5Xn2+Z2/qa05PHM5t6OgYuI/46Iz0fEJfQ+CWWz1tW9X/dFxBMAkuZLuljSFyUtJ889Junw/D6tlXRfrlebxZr8Hkd1njlJr8plu1bK3qW00NvfJF1F4+UOFgB7Vi8dWms4XKxllBb92hnorJRtRpqt+Bd11X9BWkiqVm+upLubOMzTwHHA/5XUcDp5Nb+g1WB8gzTX2atIr/NFpJmZ356PdSlpPZkpA2z3AtJ0K28jzX82izTf1pDIl7xKLG28L+lD/x1ALXQ3JU0X81rS9DsTGOBiYfm//SXAz0hLGZxDmuOs3lLSFDVvabDNCvLcYtZKk0hzYq2olI0hzbS7sq7uStIHcs0KmpxVNyIuk/S/pDnDZjSo0uyCVo1cLam6Vsl+dXONfSkiqpd97idNjFlzUp4c8r00/rDrQdLOpA/faRFxXS77ENBwEsw2WQ38sYl6r5ZUXUfmzxFRnW35EWB2RDwzBX9EnF3Zfqeko4EbJY0ZwISdRwF3RMSn8vM78vv4+WqliAhJK0jLHVgLOVyslWprtj/eYFv95Zf11gXJU94PxGeA30k6pcG2/ha0ql/IqupQ1l8V8d667Z3VJ/nSzYmkKfTHkf4fex7we5q3E2lRsGfajog7JdUHcttExKmk+2T9uQP458rz+ns4N1eDBUDSHqSJOl8DbMWzV1QmkcK6GTuRFkSrqn9e8zee/bdpLeJwsVaqfTBsxbNnL/eTluDdrq7uNvQ8m2laRFwv6RLSZaqT6zY3u6BVI929TZuf1S8Z/B3SpaxPk840HgN+QF6sLOtvoa7atg1xVtknB/J+Ka3Ns5B0OetfSGdI40krU9bes9ofBdX3rH7wxECGF2+dj2Mt5Hsu1kp/Jl0G2blWEBFPAotJl32q3kFeQ/45+BzwJtIIr6oiC1o1aU/SYIUfR8TNpEWv6m8sr7dQl6QtgFfU9Xc0afGzWp0dgW0L93U42IU0zf5nI+I3EfFHer7OWhBUFzebWlfndtL6PVX1z1FatnoS61+6tBZwuFjL5MtQvyR94FZ9G/hQ/oLfTkqLTG0PfL9WQdLXJF0xwON1AWcDx9RtOjO3f2Y+3jsZ4IJWA/An4EBJu0l6DemsZfO6OlcCH8wj2HYBzqOy4mNE3E56386RNE3SbrnO3wr3FUiX8iRNlTSVdAYwKT+fWKlzjKRbe29l0O4iLZb2cUkvzfenvlRX53bgPtL9qyl59N1xdXXOBHaS9E1Jr5Q0g2eXuK56I2n9mOuKvgrrweFirXY2cIik6ofnj0irF36BtMDUnsD+EfGXyn7jSKs6DtRJpPsVz4iIe0kjk3bLx5sDXMjAFrRq1jHAg8D/kkaNXU3PM7Kv5vKfkS4JXUXP+z6HkRbN+hVpxNm8/LwVppFG0t1IuhT11fz7CZU6Y0kj4oqKiOWkEJhBCpHjSUtEV+s8kbfvQnqfPk/df7v8h8XBwHtIC8p9tL5O9n7g/HwGbS3kxcKs5SRdC5wZEQMaXmoDI2k06SxgYkR057IPAzMi4u197rwRkLQ9aXDGa2rvj7WOz1ysHY7A/9Zs6E0GPuJgaQ+PFrOWyze2+xrua9ZyEfFcB4zYADhczEaOp4Evk0bo1dzAc58vzGzAfM/FzMyK85lLNmbMmJg8efJQd8PMbIOyePHi+yOix6zkDpds8uTJdHZ29l/RzMyeIekvjco9gsfMzIpzuJiZWXEOFzMzK87hYmZmxTlczMysOIeLmZkV53AxM7PiHC5mZlacw8XMzIrzN/QLmjTpmqHugg1Dy5bVL8RpNvL5zMXMzIpzuJiZWXEOFzMzK87hYmZmxTlczMysOIeLmZkV53AxM7PiHC5mZlacw8XMzIpzuJiZWXEOFzMzK87hYmZmxTlczMysOIeLmZkV53AxM7PiHC5mZlacw8XMzIpzuJiZWXEOFzMzK87hYmZmxbUsXCRNlHSVpCWSbpN0TC4/UdK9km7Kj/0r+xwvqUvSHZL2rZRPz2Vdko6rlO8o6TpJSyX9SNJmuXzz/Lwrb5/cqtdpZmY9tfLMZR3wqYjYCZgGHCVp57ztOxExNT8uA8jbZgC7ANOBMyWNkjQKOAPYD9gZeH+lnW/ktqYADwKzcvks4MGIeDnwnVzPzMzapGXhEhErIuKG/PtaYAkwvo9dDgDmR8QTEXEX0AXskR9dEXFnRDwJzAcOkCTgbcDFef95wLsrbc3Lv18M7J3rm5lZG7Tlnku+LLUbcF0uOlrSzZLmSNoql40H7qns1p3Leiv/B+ChiFhXV75eW3n7w7m+mZm1QcvDRdILgUuAYyPiEeAs4GXAVGAF8K1a1Qa7xyDK+2qrvm+zJXVK6ly9enWfr8PMzJrX0nCRtCkpWH4QET8GiIiVEfFURDwNnEO67AXpzGNiZfcJwPI+yu8HtpQ0uq58vbby9pcAa+r7FxFnR0RHRHSMHTv2ub5cMzPLWjlaTMC5wJKI+HalfFyl2nuAW/PvC4AZeaTXjsAU4PfA9cCUPDJsM9JN/wUREcBVwEF5/5nApZW2ZubfDwKuzPXNzKwNRvdfZdDeCHwQuEXSTbnsc6TRXlNJl6nuBo4AiIjbJF0E3E4aaXZURDwFIOloYCEwCpgTEbfl9j4LzJf0FeBGUpiRf14gqYt0xjKjha/TzMzqyH/QJx0dHdHZ2fmc2pg06ZpCvbGRZNmyPYe6C2YtI2lxRHTUl/sb+mZmVpzDxczMinO4mJlZcQ4XMzMrzuFiZmbFOVzMzKw4h4uZmRXncDEzs+IcLmZmVpzDxczMinO4mJlZcQ4XMzMrzuFiZmbFOVzMzKw4h4uZmRXncDEzs+IcLmZmVpzDxczMinO4mJlZcQ4XMzMrzuFiZmbFOVzMzKw4h4uZmRXncDEzs+IcLmZmVpzDxczMinO4mJlZcS0LF0kTJV0laYmk2yQdk8u3lrRI0tL8c6tcLkmnSeqSdLOk3Sttzcz1l0qaWSl/naRb8j6nSVJfxzAzs/Zo5ZnLOuBTEbETMA04StLOwHHAFRExBbgiPwfYD5iSH7OBsyAFBXAC8HpgD+CESliclevW9puey3s7hpmZtUHLwiUiVkTEDfn3tcASYDxwADAvV5sHvDv/fgBwfiS/A7aUNA7YF1gUEWsi4kFgETA9b3txRFwbEQGcX9dWo2OYmVkbtOWei6TJwG7AdcC2EbECUgAB2+Rq44F7Krt157K+yrsblNPHMczMrA1aHi6SXghcAhwbEY/0VbVBWQyifCB9my2pU1Ln6tWrB7KrmZn1oaXhImlTUrD8ICJ+nItX5kta5J+rcnk3MLGy+wRgeT/lExqU93WM9UTE2RHREREdY8eOHdyLNDOzHlo5WkzAucCSiPh2ZdMCoDbiayZwaaX8sDxqbBrwcL6ktRDYR9JW+Ub+PsDCvG2tpGn5WIfVtdXoGGZm1gajW9j2G4EPArdIuimXfQ74OnCRpFnAMuDgvO0yYH+gC3gMOBwgItZIOhm4Ptc7KSLW5N+PBOYCWwCX5wd9HMPMzNqgZeESEdfQ+L4IwN4N6gdwVC9tzQHmNCjvBHZtUP5Ao2OYmVl7+Bv6ZmZWnMPFzMyKc7iYmVlxDhczMyvO4WJmZsU5XMzMrDiHi5mZFedwMTOz4hwuZmZWnMPFzMyKc7iYmVlxDhczMyvO4WJmZsU5XMzMrDiHi5mZFedwMTOz4hwuZmZWnMPFzMyKc7iYmVlxDhczMyuuqXCRdEUzZWZmZgCj+9oo6XnA84ExkrYClDe9GNi+xX0zM7MNVJ/hAhwBHEsKksU8Gy6PAGe0sF9mZrYB6zNcIuJU4FRJH4uI77WpT2ZmtoHr78wFgIj4nqQ3AJOr+0TE+S3ql5mZbcCaChdJFwAvA24CnsrFAThczMysh6bCBegAdo6IaGVnzMxsZGj2ey63Atu1siNmZjZyNBsuY4DbJS2UtKD26GsHSXMkrZJ0a6XsREn3SropP/avbDteUpekOyTtWymfnsu6JB1XKd9R0nWSlkr6kaTNcvnm+XlX3j65yddoZmaFNHtZ7MRBtD0XOJ2e92W+ExGnVAsk7QzMAHYhDXv+paRX5M1nAO8AuoHrJS2IiNuBb+S25kv6PjALOCv/fDAiXi5pRq53yCD6b2Zmg9TsaLFfD7ThiLh6AGcNBwDzI+IJ4C5JXcAeeVtXRNwJIGk+cICkJcDbgENznXmkADwrt3ViLr8YOF2SfL/IzKx9mp3+Za2kR/LjcUlPSXpkkMc8WtLN+bLZVrlsPHBPpU53Luut/B+AhyJiXV35em3l7Q/n+mZm1iZNhUtEvCgiXpwfzwPeS7rkNVBnkYY0TwVWAN/K5WpQNwZR3ldbPUiaLalTUufq1av76reZmQ3AoGZFjoifki5LDXS/lRHxVEQ8DZzDs5e+uoGJlaoTgOV9lN8PbClpdF35em3l7S8B1vTSn7MjoiMiOsaOHTvQl2NmZr1o9kuUB1aebkL63suA72FIGhcRK/LT95CGOAMsAH4o6dukG/pTgN+TzkKmSNoRuJd00//QiAhJVwEHAfOBmcCllbZmAtfm7Vf6fouZWXs1O1rsXZXf1wF3k26c90rShcBepBmVu4ETgL0kTSUF092kiTGJiNskXQTcnts/KiKeyu0cDSwERgFzIuK2fIjPAvMlfQW4ETg3l58LXJAHBawhBZKZmbWR/Ed90tHREZ2dnc+pjUmTrinUGxtJli3bc6i7YNYykhZHREd9ebOjxSZI+kn+UuRKSZdImlC+m2ZmNhI0e0P/PNK9jO1JQ31/lsvMzMx6aDZcxkbEeRGxLj/mAh5eZWZmDTUbLvdL+oCkUfnxAeCBVnbMzMw2XM2Gy78C7wPuI3358SDg8FZ1yszMNmzNDkU+GZgZEQ8CSNoaOIUUOmZmZutp9szlNbVgAYiINcBuremSmZlt6JoNl00qk0zWzlyaPesxM7ONTLMB8S3gt5IuJn27/n3AV1vWKzMz26A1u57L+ZI6SZNVCjgwL9hlZmbWQ9OXtnKYOFDMzKxfg5py38zMrC8OFzMzK87hYmZmxTlczMysOIeLmZkV53AxM7PiHC5mZlacw8XMzIpzuJiZWXEOFzMzK87hYmZmxTlczMysOIeLmZkV53AxM7PiHC5mZlacw8XMzIprWbhImiNplaRbK2VbS1okaWn+uVUul6TTJHVJulnS7pV9Zub6SyXNrJS/TtIteZ/TJKmvY5iZWfu08sxlLjC9ruw44IqImAJckZ8D7AdMyY/ZwFmQggI4AXg9sAdwQiUszsp1a/tN7+cYZmbWJi0Ll4i4GlhTV3wAMC//Pg94d6X8/Eh+B2wpaRywL7AoItZExIPAImB63vbiiLg2IgI4v66tRscwM7M2afc9l20jYgVA/rlNLh8P3FOp153L+irvblDe1zHMzKxNhssNfTUoi0GUD+yg0mxJnZI6V69ePdDdzcysF+0Ol5X5khb556pc3g1MrNSbACzvp3xCg/K+jtFDRJwdER0R0TF27NhBvygzM1tfu8NlAVAb8TUTuLRSflgeNTYNeDhf0loI7CNpq3wjfx9gYd62VtK0PErssLq2Gh3DzMzaZHSrGpZ0IbAXMEZSN2nU19eBiyTNApYBB+fqlwH7A13AY8DhABGxRtLJwPW53kkRURskcCRpRNoWwOX5QR/HMDOzNmlZuETE+3vZtHeDugEc1Us7c4A5Dco7gV0blD/Q6BhmZtY+w+WGvpmZjSAOFzMzK87hYmZmxTlczMysOIeLmZkV53AxM7PiHC5mZlacw8XMzIpzuJiZWXEOFzMzK87hYmZmxTlczMysOIeLmZkV53AxM7PiHC5mZlacw8XMzIpzuJiZWXEOFzMzK87hYmZmxTlczMysOIeLmZkV53AxM7PiHC5mZlacw8XMzIpzuJiZWXEOFzMzK87hYmZmxQ1JuEi6W9Itkm6S1JnLtpa0SNLS/HOrXC5Jp0nqknSzpN0r7czM9ZdKmlkpf11uvyvvq/a/SjOzjddQnrm8NSKmRkRHfn4ccEVETAGuyM8B9gOm5Mds4CxIYQScALwe2AM4oRZIuc7syn7TW/9yzMysZjhdFjsAmJd/nwe8u1J+fiS/A7aUNA7YF1gUEWsi4kFgETA9b3txRFwbEQGcX2nLzMzaYKjCJYBfSFosaXYu2zYiVgDkn9vk8vHAPZV9u3NZX+XdDcrNzKxNRg/Rcd8YEcslbQMskvTHPuo2ul8Sgyjv2XAKttkAkyZN6rvHZmbWtCE5c4mI5fnnKuAnpHsmK/MlLfLPVbl6NzCxsvsEYHk/5RMalDfqx9kR0RERHWPHjn2uL8vMzLK2h4ukF0h6Ue13YB/gVmABUBvxNRO4NP++ADgsjxqbBjycL5stBPaRtFW+kb8PsDBvWytpWh4ldlilLTMza4OhuCy2LfCTPDp4NPDDiPgfSdcDF0maBSwDDs71LwP2B7qAx4DDASJijaSTgetzvZMiYk3+/UhgLrAFcHl+mJlZm7Q9XCLiTuC1DcofAPZuUB7AUb20NQeY06C8E9j1OXfWzMwGZTgNRTYzsxHC4WJmZsU5XMzMrDiHi5mZFedwMTOz4hwuZmZWnMPFzMyKc7iYmVlxDhczMyvO4WJmZsU5XMzMrDiHi5mZFedwMTOz4hwuZmZWnMPFzMyKc7iYmVlxDhczMyvO4WJmZsU5XMzMrDiHi5mZFedwMTOz4hwuZmZWnMPFzMyKc7iYmVlxDhczMyvO4WJmZsU5XMzMrDiHi5mZFTdiw0XSdEl3SOqSdNxQ98fMbGMyIsNF0ijgDGA/YGfg/ZJ2HtpemZltPEZkuAB7AF0RcWdEPAnMBw4Y4j6ZmW00Rg91B1pkPHBP5Xk38Poh6ovZkLtm0qSh7oINQ3suW9aytkdquKhBWfSoJM0GZuenj0q6o6W92riMAe4f6k4MB2r0r9GGkv9t1pT5x7lDo8KRGi7dwMTK8wnA8vpKEXE2cHa7OrUxkdQZER1D3Q+zev632R4j9Z7L9cAUSTtK2gyYASwY4j6ZmW00RuSZS0Ssk3Q0sBAYBcyJiNuGuFtmZhuNERkuABFxGXDZUPdjI+bLjTZc+d9mGyiix31uMzOz52Sk3nMxM7Mh5HCxojztjg1XkuZIWiXp1qHuy8bA4WLFeNodG+bmAtOHuhMbC4eLleRpd2zYioirgTVD3Y+NhcPFSmo07c74IeqLmQ0hh4uV1NS0O2Y28jlcrKSmpt0xs5HP4WIledodMwMcLlZQRKwDatPuLAEu8rQ7NlxIuhC4FnilpG5Js4a6TyOZv6FvZmbF+czFzMyKc7iYmVlxDhczMyvO4WJmZsU5XMzMrDiHi9kQkLSdpPmS/izpdkmXSXqFZ+y1kWLErkRpNlxJEvATYF5EzMhlU4Fth7RjZgX5zMWs/d4K/D0ivl8riIibqEz6KWmypN9IuiE/3pDLx0m6WtJNkm6V9CZJoyTNzc9vkfSJ9r8ks/X5zMWs/XYFFvdTZxXwjoh4XNIU4EKgAzgUWBgRX83r5zwfmAqMj4hdASRt2bqumzXH4WI2PG0KnJ4vlz0FvCKXXw/MkbQp8NOIuEnSncBLJX0P+DnwiyHpsVmFL4uZtd9twOv6qfMJYCXwWtIZy2bwzIJXbwbuBS6QdFhEPJjr/Qo4CvjP1nTbrHkOF7P2uxLYXNJHagWS/hHYoVLnJcCKiHga+CAwKtfbAVgVEecA5wK7SxoDbBIRlwBfBHZvz8sw650vi5m1WUSEpPcA35V0HPA4cDdwbKXamcAlkg4GrgL+msv3Aj4t6e/Ao8BhpNU+z5NU+2Px+Ja/CLN+eFZkMzMrzpfFzMysOIeLmZkV53AxM7PiHC5mZlacw8XMzIpzuJiZWXEOFzMzK87hYmZmxf1/ETBSFCgbbq8AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "colors = [\"#0101DF\", \"#DF0101\"]\n", + "\n", + "sns.countplot('Class', data=df, palette=colors)\n", + "plt.title('Class Distributions \\n (0: No Fraud || 1: Fraud)', fontsize=14)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Separate input features and target\n", + "y = df.Class\n", + "X = df.drop('Class', axis=1)\n", + "\n", + "# setting up testing and training sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=27)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy Score : 99.92135052386169 %\n" + ] + }, + { + "data": { + "text/plain": [ + "0 71108\n", + "1 94\n", + "Name: 0, dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lr = LogisticRegression(solver='liblinear').fit(X_train, y_train)\n", + " \n", + "# Predict on training set\n", + "lr_pred = lr.predict(X_test)\n", + "\n", + "# Checking accuracy\n", + "a = accuracy_score(y_test, lr_pred)\n", + "print(\"Accuracy Score : \", a*100,\"%\")\n", + "\n", + "\n", + "# Checking unique values\n", + "predictions = pd.DataFrame(lr_pred)\n", + "predictions[0].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "F1 Score : 0.7522123893805309\n", + "Recall Score : 0.6439393939393939\n" + ] + } + ], + "source": [ + "# f1 score\n", + "print(\"F1 Score : \", f1_score(y_test, lr_pred))\n", + "\n", + "# recall score\n", + "print(\"Recall Score : \", recall_score(y_test, lr_pred))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "# train model\n", + "rfc = RandomForestClassifier(n_estimators=10).fit(X_train, y_train)\n", + "\n", + "# predict on test set\n", + "rfc_pred = rfc.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9995365298727564" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy_score(y_test, rfc_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "F1 score : 0.8607594936708862\n", + "Recall score : 0.7727272727272727\n" + ] + } + ], + "source": [ + "print(\"F1 score : \",f1_score(y_test, rfc_pred))\n", + "\n", + "print(\"Recall score : \", recall_score(y_test, rfc_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy Score : 99.7415802926884\n" + ] + } + ], + "source": [ + "from sklearn.ensemble import GradientBoostingClassifier\n", + "\n", + "gb_clf = GradientBoostingClassifier(n_estimators=20, max_features=2, max_depth=2, random_state=0)\n", + "gb = gb_clf.fit(X_train, y_train)\n", + "gb_pred = gb.predict(X_test)\n", + "\n", + "print(\"Accuracy Score : \", accuracy_score(y_test, gb_pred)*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "F1 score : 0.17117117117117117\n", + "Recall score : 0.14393939393939395\n" + ] + } + ], + "source": [ + "print(\"F1 score : \",f1_score(y_test, gb_pred))\n", + "\n", + "print(\"Recall score : \", recall_score(y_test, gb_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimeV1V2V3V4V5V6V7V8V9...V21V22V23V24V25V26V27V28AmountClass
5931848789.0-0.8923441.7251852.0140722.969493-0.3581030.2082470.1731950.508835-1.421750...-0.219772-0.467348-0.1208250.3963100.1973820.1214020.2206450.0918218.270
7479455760.0-6.003422-3.930731-0.0070451.7146693.414667-2.329583-1.901512-2.7461110.887673...1.101671-0.992494-0.6982590.139898-0.205151-0.4724121.775378-0.104285311.911
3673438667.01.179743-1.1641411.015352-0.405885-1.850985-0.503236-1.1364120.0747760.133414...0.3446070.785058-0.1643740.4115980.369617-0.0512430.0222540.038167101.370
12083775978.0-5.1407233.568751-5.8962454.164720-4.091193-1.989960-5.4724362.422821-2.909735...1.1311300.118022-0.3327040.1399410.324758-0.1807690.1778100.66155599.901
64277610.00.7256462.300894-5.3299764.007683-1.730411-1.732193-3.9685931.063728-0.486097...0.5896690.1095410.601045-0.364700-1.8430780.3519090.5945500.0993721.001
\n", + "

5 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " Time V1 V2 V3 V4 V5 V6 \\\n", + "59318 48789.0 -0.892344 1.725185 2.014072 2.969493 -0.358103 0.208247 \n", + "74794 55760.0 -6.003422 -3.930731 -0.007045 1.714669 3.414667 -2.329583 \n", + "36734 38667.0 1.179743 -1.164141 1.015352 -0.405885 -1.850985 -0.503236 \n", + "120837 75978.0 -5.140723 3.568751 -5.896245 4.164720 -4.091193 -1.989960 \n", + "6427 7610.0 0.725646 2.300894 -5.329976 4.007683 -1.730411 -1.732193 \n", + "\n", + " V7 V8 V9 ... V21 V22 V23 \\\n", + "59318 0.173195 0.508835 -1.421750 ... -0.219772 -0.467348 -0.120825 \n", + "74794 -1.901512 -2.746111 0.887673 ... 1.101671 -0.992494 -0.698259 \n", + "36734 -1.136412 0.074776 0.133414 ... 0.344607 0.785058 -0.164374 \n", + "120837 -5.472436 2.422821 -2.909735 ... 1.131130 0.118022 -0.332704 \n", + "6427 -3.968593 1.063728 -0.486097 ... 0.589669 0.109541 0.601045 \n", + "\n", + " V24 V25 V26 V27 V28 Amount Class \n", + "59318 0.396310 0.197382 0.121402 0.220645 0.091821 8.27 0 \n", + "74794 0.139898 -0.205151 -0.472412 1.775378 -0.104285 311.91 1 \n", + "36734 0.411598 0.369617 -0.051243 0.022254 0.038167 101.37 0 \n", + "120837 0.139941 0.324758 -0.180769 0.177810 0.661555 99.90 1 \n", + "6427 -0.364700 -1.843078 0.351909 0.594550 0.099372 1.00 1 \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.sample(frac=1)\n", + "\n", + "# amount of fraud classes 492 rows.\n", + "fraud_df = df.loc[df['Class'] == 1]\n", + "non_fraud_df = df.loc[df['Class'] == 0][:492]\n", + "\n", + "normal_distributed_df = pd.concat([fraud_df, non_fraud_df])\n", + "\n", + "# Shuffle dataframe rows\n", + "new_df = normal_distributed_df.sample(frac=1, random_state=42)\n", + "\n", + "new_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Distribution of the Classes in the subsample dataset\n", + "1 0.5\n", + "0 0.5\n", + "Name: Class, dtype: float64\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAW7klEQVR4nO3de7RkZX3m8e/DPV5RaZFpkDaKGZCJiK1hvGR5iQaNCmPAaFRAmZCZkCxvE0VXombF64wRFW+DQQFXFB28gJdREcW7SKMICCotg9CCdBMuCggK/uaP/Z6X6tOnu6vbrlOHPt/PWrVO7Xe/e9evqrrrqf3uXXunqpAkCWCbaRcgSVo4DAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaC5kWSI5LcuL7peXj8s5K8cwLrXZakkixv049r07ts6cfaXEmWt5qWbYF1VZJDfveqtFAZCluxJCe2/8Szb9+edm1bwqzn95skq5N8OcnRSbaf1f2ZwCvHXO9rk1w4ZhlXALsB521C6ePUMK+h2R7zgUlOSHJFkluTXJbk1CSPms86NF2GwtbviwwfWqO3p061oi1r5vktA54MfAr4J+BrSe4606mqrq2qX27JB06yQ1XdXlU/r6rbtuS651vb0vku8BDgb4B9gGcA5wLHTbE0zTNDYet3a/vQGr1dOzMzyYPa0MotSX6U5GlJbkxyRJu/1vDIyHJrDSMkeVNb/lftG+b/TLLTOAW2x7h9jsf4qyTXJNlhjOf3s6o6r6reCjwO2B94+ci61ho+SvLMJOe3eq9N8pUku7bn/RrgISNbITOvRbWtkI8nuQl4w/peH+CAJOe11/XcJA8feex1tgJGh52SPA74AHDXkRpe2/rtkOTNSVYluSnJOUn+dNa6Dkzyw/bYXwMevIHXjyQBTgQuBR5dVZ+qqp9U1flV9UbgiRtYdoPve5I9kpzWXuObW13PHpn/6iQ/bVsmP09y8mhdSV6e5Cdt/Rcked6sx1/v8to82027AE1Pkm2ATwDXAf8ZuAvwdmDHzVjdTcALgZ8xfMt8L3Ar8I8bW7CqLkvyxbb8ipFZLwQ+WFW/3pRCqurCJJ8D/pzhA34tSe4HnMIwnPQx4G7AAW32R4B9gacxhAvADSOLvwZ4FfA/gA2dI+YtwIsYXo/XAJ9J8vtVdfMYT+GbwIuBNwAPbG0zIfKB1vaXwCqGrb5PJXlEVX0/yR7AJ4H3Ae8C/hB460Yebz+GLYTnVtXts2dW1fUbWHZj7/u7gZ2AxwO/AP5gZsEkf87wOj4HuAC4L3e8DwCvAw4BjgZ+xPBv9H1Jrquqz4yxvDZHVXnbSm8M3/5uY/hAGb29uc1/MnA7cP+RZR7D8GF3RJte1qaXz1p3AYds4LH/G7ByZPoI4MYNTB/CEE47tem922Psu5Hn9+n1zHsTcPPI9FnAO9v9/du691zPsq8FLpyjvYDjZrWt9fowBEkxfMDO9LkbcD3wX+d67rOW22UDfR4I/Hb0/WrtnwTe3e6/AfgxkJH5/9DWvWw9z/dZbf7Dxvg3tanv+/nAa9bT96UMH/bbzzHvrsCvgMfOan8b8NmNLe9t829uKWz9vgocNatt5pvf3sDPqurykXlnM3zwbJI2lPRi4EEMH4Lbttu4TmP4ZvtM4EMM3z6/U1Xj7vBdpyTW/03++wz7Ii5M8oV2/9SqWjPGeldsvAsA35q5U1U3JrmA4Zv072J/hud10TDi0+0IfKnd3xv4drVPzdm1rEc2Mn/9C278fX878N4kBwJnAp+oqnPbvP/DsDX1/5J8HvgccHpV3crwWu0EfC7J6HPZHrhsjOW1mdynsPW7uapWzrpd0+aN82EwExC9b2Yd2ZPkAIbhmM8DTwcexvDtdPYRQOtVVb8BTgZemGQ74PnACeMuP4d9GMbI53qs2xm2kp7M8E32SOCSJA8dY703/Q41zfgt677247xW2zAE3SMYhnxmbnszhChzrHccP25/996UhcZ536vqBOABDMNeDwa+ObN/pKquYBhO+muGoaV/Ac7NcIDAzGfT01n7uT6E4X3b2PLaTIbC4nYRsLSNQ894JGv/u5j59rzbSNt+s9bzaIYtjn+uqnOq6hJgz82o530MY89/A9yd4QNnkyXZFzgQOHV9fWrwrar6J4YP2SuBv2izf82mbeXMpY9ttw+pfYGLW9Ma4C5J7jHSf/ZrOlcN32P40L/fHEH/s9bnIuCPsvamxMbG2c9ry/19knWed5Kd17PcWO97Va2qquOr6lnAqxnZcq2qW6rqM1X1Eob34SFtvRcx7JvYc47n+tMxltdmcvho67dj27E66vY2VPJF4IfAyUleAvwecCzDfggAqupXGX7X8IokPwHuCbxx1vp+zBAuz2UYqvhThp1/m6Sqfpzk68D/Ak6pql9swvPbBljCcKTMqxgOpXzLXAu0b7h/wvAN92qGb7h7MHwQwTA8sWeS/YHLgV9uxpDEPyRZwxA2r2b4kP9Qm3c2wxbHG5McCzyUIQhHXQbslORJDGFwc3t9/g04McnLGA4hvTfD/ohLq+rjDDt6Xwa8Lcm7gf/EMM6/XlVVSV7A8O/hG0lexxBgdwGewrDPYfbRVTDG+57k7cD/bX3vwRDWF7V5RzB8Bp3NsK/rL4DfAJdU1S+TvAV4Swu4r3LHAQG/rarjN7T8hp6vNmLaOzW8Te7GsCO25ritGunzYOArDN/KLmE4Nv1G2o7m1mdv4BvAzQxHeTyWWTscGYJiTVv248B/H/559flHsIEdzSPth7V1//EmPr/bgGsYdij/HbDDrL5ncceO5r0ZPqiubs97JfDykb47MmxlXMfaO93X2cnK+nc0P4NhaOpWhg/vR8xa7iCGD8pfMYTT8xjZ0dz6vKc9pwJe29q2Z9gRfilD0PwcOB14+Mhyf8awA/aW9r49lw3saB5Zbi+GYZ5Vbd2Xt9fhgJE+m/q+H9f+Xd3S+p0CLG3zDmYIk+sZQvIc4Gkjy6a9lzNbDWuAM4AnjbO8t827pb24UteOof/bqjpxCo/9CuDIqtrgsfWSJsPhIy0ISe4G/EeGo0leP+VypEXLHc1aKN7JMNTxDeB/T7kWadFy+EiS1LmlIEnq7tT7FHbZZZdatmzZtMuQpDuVc88995qqWjLXvDt1KCxbtowVK8Y964AkCSDJT9c3z+EjSVJnKEiSuomGQrvoxgXtYiMrWtu9k5yR5JL2916tPUnekWRlhouf7D/J2iRJ65qPLYXHV9V+VTVz7pRjgDOrai+GU+ke09qfwvAz+70YTpj1nnmoTZI0YhrDRwcBJ7X7JzGcv2Sm/eQafBvYOcluc61AkjQZkw6FAr6Q4Rq1M6fL3bWqrgJof+/b2pcCV4wsu6q1rSXJUUlWJFmxZs0410SRJI1r0oekPrqqrkxyX+CMJD/cQN+5Lg6yzs+tq+p44HiA5cuX+3NsSdqCJrqlUFVXtr+rGS4Q/0jg6plhofZ3deu+iuGc9jN2ZzgXvSRpnkwsFJLcNcndZ+4zXELvQoZzvx/euh3OcG1eWvth7SikA4AbZoaZJEnzY5LDR7sCn2hXBdwO+FBVfS7JOcBHkxzJcBGPQ1v/zwJPZbjgyc3ACyZYW3f/+399Ph5GdzKXX/6YaZfA1+9//2mXoAXoMZdfPtH1TywUqupShssMzm7/d4ZLJs5uL+DoSdUjSdo4f9EsSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6iYeCkm2TfK9JJ9u0w9IcnaSS5J8JMkOrX3HNr2yzV826dokSWubjy2FFwEXj0y/GTi2qvYCrgOObO1HAtdV1YOAY1s/SdI8mmgoJNkd+DPgX9t0gCcAp7YuJwEHt/sHtWna/Ce2/pKkeTLpLYW3AS8Hftum7wNcX1W3telVwNJ2fylwBUCbf0Prv5YkRyVZkWTFmjVrJlm7JC06EwuFJE8DVlfVuaPNc3StMebd0VB1fFUtr6rlS5Ys2QKVSpJmbDfBdT8aeEaSpwI7Afdg2HLYOcl2bWtgd+DK1n8VsAewKsl2wD2BaydYnyRploltKVTVK6tq96paBjwb+FJVPRf4MnBI63Y4cFq7f3qbps3/UlWts6UgSZqcafxO4RXAS5OsZNhncEJrPwG4T2t/KXDMFGqTpEVtksNHXVWdBZzV7l8KPHKOPrcAh85HPZKkufmLZklSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVI3sVBIslOS7yT5fpIfJPmn1v6AJGcnuSTJR5Ls0Np3bNMr2/xlk6pNkjS3SW4p3Ao8oaoeCuwHHJjkAODNwLFVtRdwHXBk638kcF1VPQg4tvWTJM2jiYVCDW5sk9u3WwFPAE5t7ScBB7f7B7Vp2vwnJsmk6pMkrWui+xSSbJvkPGA1cAbwE+D6qrqtdVkFLG33lwJXALT5NwD3mWR9kqS1TTQUqur2qtoP2B14JLD3XN3a37m2Cmp2Q5KjkqxIsmLNmjVbrlhJ0vwcfVRV1wNnAQcAOyfZrs3aHbiy3V8F7AHQ5t8TuHaOdR1fVcuravmSJUsmXbokLSqTPPpoSZKd2/3fA/4EuBj4MnBI63Y4cFq7f3qbps3/UlWts6UgSZqc7TbeZbPtBpyUZFuG8PloVX06yUXAKUleB3wPOKH1PwH4YJKVDFsIz55gbZKkOUwsFKrqfOBhc7RfyrB/YXb7LcChk6pHkrRx/qJZktSNFQpJzhynTZJ057bB4aMkOwF3AXZJci/uOGz0HsB/mHBtkqR5trF9Cn8NvJghAM7ljlD4BfCuCdYlSZqCDYZCVb0deHuSv6uq4+apJknSlIx19FFVHZfkUcCy0WWq6uQJ1SVJmoKxQiHJB4EHAucBt7fmAgwFSdqKjPs7heXAPv7CWJK2buP+TuFC4H6TLESSNH3jbinsAlyU5DsMF88BoKqeMZGqJElTMW4ovHaSRUiSFoZxjz76yqQLkSRN37hHH/2SOy54swPDpTVvqqp7TKowSdL8G3dL4e6j00kOZo4znUqS7tw26yypVfVJ4AlbuBZJ0pSNO3z0zJHJbRh+t+BvFiRpKzPu0UdPH7l/G3AZcNAWr0aSNFXj7lN4waQLkSRN37gX2dk9ySeSrE5ydZKPJdl90sVJkubXuDuaPwCcznBdhaXAp1qbJGkrMm4oLKmqD1TVbe12IrBkgnVJkqZg3FC4Jsnzkmzbbs8D/n2ShUmS5t+4ofBC4FnAz4GrgEMAdz5L0lZm3ENS/xk4vKquA0hyb+AtDGEhSdpKjLul8IczgQBQVdcCD5tMSZKkaRk3FLZJcq+ZibalMO5WhiTpTmLcD/Z/Ab6Z5FSG01s8C3j9xKqSJE3FuL9oPjnJCoaT4AV4ZlVdNNHKJEnzbuwhoBYCBoEkbcU269TZkqStk6EgSeoMBUlSZyhIkrqJhUKSPZJ8OcnFSX6Q5EWt/d5JzkhySft7r9aeJO9IsjLJ+Un2n1RtkqS5TXJL4TbgZVW1N3AAcHSSfYBjgDOrai/gzDYN8BRgr3Y7CnjPBGuTJM1hYqFQVVdV1Xfb/V8CFzNci+Eg4KTW7STg4Hb/IODkGnwb2DnJbpOqT5K0rnnZp5BkGcO5ks4Gdq2qq2AIDuC+rdtS4IqRxVa1NknSPJl4KCS5G/Ax4MVV9YsNdZ2jreZY31FJViRZsWbNmi1VpiSJCYdCku0ZAuHfqurjrfnqmWGh9nd1a18F7DGy+O7AlbPXWVXHV9Xyqlq+ZIkXf5OkLWmSRx8FOAG4uKreOjLrdODwdv9w4LSR9sPaUUgHADfMDDNJkubHJE9//Wjg+cAFSc5rba8C3gR8NMmRwOXAoW3eZ4GnAiuBm/HKbpI07yYWClX1debeTwDwxDn6F3D0pOqRJG2cv2iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKmbWCgkeX+S1UkuHGm7d5IzklzS/t6rtSfJO5KsTHJ+kv0nVZckaf0muaVwInDgrLZjgDOrai/gzDYN8BRgr3Y7CnjPBOuSJK3HxEKhqr4KXDur+SDgpHb/JODgkfaTa/BtYOcku02qNknS3OZ7n8KuVXUVQPt739a+FLhipN+q1raOJEclWZFkxZo1ayZarCQtNgtlR3PmaKu5OlbV8VW1vKqWL1myZMJlSdLiMt+hcPXMsFD7u7q1rwL2GOm3O3DlPNcmSYvefIfC6cDh7f7hwGkj7Ye1o5AOAG6YGWaSJM2f7Sa14iQfBh4H7JJkFfAa4E3AR5McCVwOHNq6fxZ4KrASuBl4waTqkiSt38RCoaqes55ZT5yjbwFHT6oWSdJ4FsqOZknSAmAoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSt6BCIcmBSX6UZGWSY6ZdjyQtNgsmFJJsC7wLeAqwD/CcJPtMtypJWlwWTCgAjwRWVtWlVfVr4BTgoCnXJEmLynbTLmDEUuCKkelVwB/N7pTkKOCoNnljkh/NQ22LxS7ANdMuYiFIpl2BZvHf5owt849zz/XNWEihMNczrXUaqo4Hjp98OYtPkhVVtXzadUiz+W9z/iyk4aNVwB4j07sDV06pFklalBZSKJwD7JXkAUl2AJ4NnD7lmiRpUVkww0dVdVuSvwU+D2wLvL+qfjDlshYbh+W0UPlvc56kap1he0nSIrWQho8kSVNmKEiSOkNBnl5EC1aS9ydZneTCadeyWBgKi5ynF9ECdyJw4LSLWEwMBXl6ES1YVfVV4Npp17GYGAqa6/QiS6dUi6QpMxQ01ulFJC0OhoI8vYikzlCQpxeR1BkKi1xV3QbMnF7kYuCjnl5EC0WSDwPfAv4gyaokR067pq2dp7mQJHVuKUiSOkNBktQZCpKkzlCQJHWGgiSpMxSkMSW5X5JTkvwkyUVJPpvkwZ7BU1uTBXM5TmkhSxLgE8BJVfXs1rYfsOtUC5O2MLcUpPE8HvhNVb13pqGqzmPkZIJJliX5WpLvttujWvtuSb6a5LwkFyZ5bJJtk5zYpi9I8pL5f0rSutxSkMazL3DuRvqsBp5UVbck2Qv4MLAc+Evg81X1+nb9irsA+wFLq2pfgCQ7T650aXyGgrTlbA+8sw0r3Q48uLWfA7w/yfbAJ6vqvCSXAr+f5DjgM8AXplKxNIvDR9J4fgA8fCN9XgJcDTyUYQthB+gXivlj4GfAB5McVlXXtX5nAUcD/zqZsqVNYyhI4/kSsGOSv5ppSPIIYM+RPvcErqqq3wLPB7Zt/fYEVlfV+4ATgP2T7AJsU1UfA/4R2H9+noa0YQ4fSWOoqkryX4C3JTkGuAW4DHjxSLd3Ax9LcijwZeCm1v444O+T/Aa4ETiM4ep2H0gy88XslRN/EtIYPEuqJKlz+EiS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlS9/8B9MbyvBmR/l8AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "print('Distribution of the Classes in the subsample dataset')\n", + "print(new_df['Class'].value_counts()/len(new_df))\n", + "\n", + "\n", + "\n", + "sns.countplot('Class', data=new_df, palette=colors)\n", + "plt.title('Equally Distributed Classes', fontsize=14)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classifiers: LogisticRegression\n", + "Accuracy Score : 99.89993328885924\n", + "F1 score : 0.6459627329192548\n", + "Recall score : 0.5306122448979592\n", + "--------------------------------------------\n", + "Classifiers: RandomForestClassifier\n", + "Accuracy Score : 99.95611109160492\n", + "F1 score : 0.8587570621468926\n", + "Recall score : 0.7755102040816326\n", + "--------------------------------------------\n", + "Classifiers: GradientBoostingClassifier\n", + "Accuracy Score : 99.89466661985183\n", + "F1 score : 0.6629213483146067\n", + "Recall score : 0.6020408163265306\n", + "--------------------------------------------\n" + ] + } + ], + "source": [ + "# Our data is already scaled we should split our training and test sets\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# This is explicitly used for undersampling.\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Turn the values into an array for feeding the classification algorithms.\n", + "X_train = X_train.values\n", + "X_test = X_test.values\n", + "y_train = y_train.values\n", + "y_test = y_test.values\n", + "\n", + "# Let's implement simple classifiers\n", + "\n", + "classifiers = {\n", + " \"LogisiticRegression\": LogisticRegression(),\n", + " \"RandomTreeClassifier\": RandomForestClassifier(),\n", + " \"GradientBoostingClassifier\": GradientBoostingClassifier() \n", + "}\n", + "\n", + "# Wow our scores are getting even high scores even when applying cross validation.\n", + "from sklearn.model_selection import cross_val_score\n", + "\n", + "\n", + "for key, classifier in classifiers.items():\n", + " train = classifier.fit(X_train, y_train)\n", + " pred = train.predict(X_test)\n", + " print(\"Classifiers: \", classifier.__class__.__name__)\n", + " print(\"Accuracy Score : \", accuracy_score(y_test, pred)*100)\n", + " print(\"F1 score : \",f1_score(y_test, pred))\n", + " print(\"Recall score : \", recall_score(y_test, pred))\n", + " print(\"--------------------------------------------\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# Separate input features and target\n", + "y = df.Class\n", + "X = df.drop('Class', axis=1)\n", + "\n", + "# setting up testing and training sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=27)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "import imblearn" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Separate input features and target\n", + "y = df.Class\n", + "X = df.drop('Class', axis=1)\n", + "\n", + "# setting up testing and training sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=27)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from imblearn.over_sampling import SMOTE" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# SMOTE Technique (OverSampling) After splitting and Cross Validating\n", + "sm = SMOTE(ratio='minority', random_state=42)\n", + "# Xsm_train, ysm_train = sm.fit_sample(X_train, y_train)\n", + "\n", + "\n", + "# This will be the data were we are going to \n", + "Xsm_train, ysm_train = sm.fit_sample(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classifiers: LogisticRegression\n", + "Accuracy Score : 98.71492373809726\n", + "F1 score : 0.16893732970027248\n", + "Recall score : 0.8773584905660378\n", + "--------------------------------------------\n", + "Classifiers: RandomForestClassifier\n", + "Accuracy Score : 99.96067526193085\n", + "F1 score : 0.8600000000000001\n", + "Recall score : 0.8113207547169812\n", + "--------------------------------------------\n", + "Classifiers: GradientBoostingClassifier\n", + "Accuracy Score : 99.47332940085953\n", + "F1 score : 0.3267504488330341\n", + "Recall score : 0.8584905660377359\n", + "--------------------------------------------\n" + ] + } + ], + "source": [ + "\n", + "classifiers = {\n", + " \"LogisiticRegression\": LogisticRegression(),\n", + " \"RandomTreeClassifier\": RandomForestClassifier(),\n", + " \"GradientBoostingClassifier\": GradientBoostingClassifier() \n", + "}\n", + "\n", + "# Wow our scores are getting even high scores even when applying cross validation.\n", + "from sklearn.model_selection import cross_val_score\n", + "\n", + "\n", + "for key, classifier in classifiers.items():\n", + " train = classifier.fit(Xsm_train, ysm_train)\n", + " pred = train.predict(X_test)\n", + " print(\"Classifiers: \", classifier.__class__.__name__)\n", + " print(\"Accuracy Score : \", accuracy_score(y_test, pred)*100)\n", + " print(\"F1 score : \",f1_score(y_test, pred))\n", + " print(\"Recall score : \", recall_score(y_test, pred))\n", + " print(\"--------------------------------------------\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/sravansmart7733@gmail.com/Classification-ImbalanceData.pdf b/sravansmart7733@gmail.com/Classification-ImbalanceData.pdf new file mode 100644 index 000000000..41a69d9ce Binary files /dev/null and b/sravansmart7733@gmail.com/Classification-ImbalanceData.pdf differ