14783
14783
" return metrics"
14784
14784
]
14785
14785
},
14786
+ {
14787
+ "cell_type": "code",
14788
+ "execution_count": null,
14789
+ "metadata": {},
14790
+ "outputs": [],
14791
+ "source": [
14792
+ "#For hyperparameter tuning we will use optuna\n",
14793
+ "!pip install optuna\n",
14794
+ "import optuna"
14795
+ ]
14796
+ },
14786
14797
{
14787
14798
"cell_type": "code",
14788
14799
"execution_count": 801,
@@ -14800,18 +14811,40 @@
14800
14811
],
14801
14812
"source": [
14802
14813
"#DecisionTreeClassifier\n",
14814
+ "# Define the objective function\n",
14815
+ "def objective(trial):\n",
14816
+ " max_depth = trial.suggest_int('max_depth', 1, 20)\n",
14817
+ " min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 50)\n",
14818
+ "\n",
14819
+ " model = DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=min_samples_leaf)\n",
14820
+ " model.fit(X_train, y_train)\n",
14821
+ " \n",
14822
+ " y_pred = model.predict(X_test)\n",
14823
+ " accuracy = accuracy_score(y_test, y_pred)\n",
14824
+ " \n",
14825
+ " return accuracy\n",
14826
+ "\n",
14827
+ "# Create a study object and optimize the objective function\n",
14828
+ "study = optuna.create_study(direction='maximize')\n",
14829
+ "study.optimize(objective, n_trials=100)\n",
14830
+ "\n",
14831
+ "# Print the best hyperparameters\n",
14832
+ "print('Best hyperparameters: ', study.best_params)\n",
14833
+ "\n",
14834
+ "# Train the model with the best hyperparameters\n",
14835
+ "best_params = study.best_params\n",
14803
14836
"start = time.time()\n",
14804
- "modelDC = DecisionTreeClassifier(max_depth = 12, min_samples_leaf = 10 )\n",
14837
+ "modelDC = DecisionTreeClassifier(**best_params )\n",
14805
14838
"modelDC.fit(X_train, y_train)\n",
14806
14839
"end = time.time()\n",
14807
14840
"TimeDC = end - start\n",
14808
14841
"print('Time: ', TimeDC)\n",
14809
14842
"\n",
14810
- "#Evaluating model on test set\n",
14843
+ "# Evaluating model on test set\n",
14811
14844
"y_pred = modelDC.predict(X_test)\n",
14812
14845
"all_metrics.update(metrics_data(\"Decision Trees\", y_test, y_pred))\n",
14813
14846
"\n",
14814
- "#Evaluating model on train set\n",
14847
+ "# Evaluating model on train set\n",
14815
14848
"y_pred = modelDC.predict(X_train)\n",
14816
14849
"accuracyDC2 = accuracy_score(y_train, y_pred)\n",
14817
14850
"print('Accuracy on train set: {}'.format(accuracyDC2))"
@@ -14833,17 +14866,39 @@
14833
14866
],
14834
14867
"source": [
14835
14868
"#MultinomialNB\n",
14869
+ "# Define the objective function\n",
14870
+ "def objective(trial):\n",
14871
+ " alpha = trial.suggest_float('alpha', 1e-3, 1e-1, log=True)\n",
14872
+ "\n",
14873
+ " model = MultinomialNB(alpha=alpha)\n",
14874
+ " model.fit(X_train, y_train)\n",
14875
+ " \n",
14876
+ " y_pred = model.predict(X_test)\n",
14877
+ " accuracy = accuracy_score(y_test, y_pred)\n",
14878
+ " \n",
14879
+ " return accuracy\n",
14880
+ "\n",
14881
+ "# Create a study object and optimize the objective function\n",
14882
+ "study = optuna.create_study(direction='maximize')\n",
14883
+ "study.optimize(objective, n_trials=100)\n",
14884
+ "\n",
14885
+ "# Print the best hyperparameters\n",
14886
+ "print('Best hyperparameters: ', study.best_params)\n",
14887
+ "\n",
14888
+ "# Train the model with the best hyperparameters\n",
14889
+ "best_params = study.best_params\n",
14836
14890
"start = time.time()\n",
14837
- "modelNB = MultinomialNB(alpha=0.005 )\n",
14891
+ "modelNB = MultinomialNB(**best_params )\n",
14838
14892
"modelNB.fit(X_train, y_train)\n",
14839
14893
"end = time.time()\n",
14840
14894
"TimeNB = end - start\n",
14895
+ "print('Time: ', TimeNB)\n",
14841
14896
"\n",
14842
- "#Evaluating model on test set\n",
14897
+ "# Evaluating model on test set\n",
14843
14898
"y_pred = modelNB.predict(X_test)\n",
14844
14899
"all_metrics.update(metrics_data(\"Multinomial Naive Bayes\", y_test, y_pred))\n",
14845
14900
"\n",
14846
- "#Evaluating model on train set\n",
14901
+ "# Evaluating model on train set\n",
14847
14902
"y_pred = modelNB.predict(X_train)\n",
14848
14903
"accuracyNB2 = accuracy_score(y_train, y_pred)\n",
14849
14904
"print('Accuracy on train set: {}'.format(accuracyNB2))"
@@ -14931,18 +14986,40 @@
14931
14986
],
14932
14987
"source": [
14933
14988
"#GaussianNB\n",
14989
+ "\n",
14990
+ "# Define the objective function\n",
14991
+ "def objective(trial):\n",
14992
+ " var_smoothing = trial.suggest_float('var_smoothing', 1e-11, 1e-7, log=True)\n",
14993
+ "\n",
14994
+ " model = GaussianNB(var_smoothing=var_smoothing)\n",
14995
+ " model.fit(X_train, y_train)\n",
14996
+ " \n",
14997
+ " y_pred = model.predict(X_test)\n",
14998
+ " accuracy = accuracy_score(y_test, y_pred)\n",
14999
+ " \n",
15000
+ " return accuracy\n",
15001
+ "\n",
15002
+ "# Create a study object and optimize the objective function\n",
15003
+ "study = optuna.create_study(direction='maximize')\n",
15004
+ "study.optimize(objective, n_trials=100)\n",
15005
+ "\n",
15006
+ "# Print the best hyperparameters\n",
15007
+ "print('Best hyperparameters: ', study.best_params)\n",
15008
+ "\n",
15009
+ "# Train the model with the best hyperparameters\n",
15010
+ "best_params = study.best_params\n",
14934
15011
"start = time.time()\n",
14935
- "modelGNB = GaussianNB()\n",
15012
+ "modelGNB = GaussianNB(**best_params )\n",
14936
15013
"modelGNB.fit(X_train, y_train)\n",
14937
15014
"end = time.time()\n",
14938
15015
"TimeGNB = end - start\n",
14939
15016
"print('Time: ', TimeGNB)\n",
14940
15017
"\n",
14941
- "#Evaluating model on test set\n",
15018
+ "# Evaluating model on test set\n",
14942
15019
"y_pred = modelGNB.predict(X_test)\n",
14943
15020
"all_metrics.update(metrics_data(\"Gaussian Naive Bayes\", y_test, y_pred))\n",
14944
15021
"\n",
14945
- "#Evaluating model on train set\n",
15022
+ "# Evaluating model on train set\n",
14946
15023
"y_pred = modelGNB.predict(X_train)\n",
14947
15024
"accuracyGNB2 = accuracy_score(y_train, y_pred)\n",
14948
15025
"print('Accuracy on train set: {}'.format(accuracyGNB2))"
@@ -14965,18 +15042,41 @@
14965
15042
],
14966
15043
"source": [
14967
15044
"#Logistic Regression\n",
15045
+ "# Define the objective function\n",
15046
+ "def objective(trial):\n",
15047
+ " # Define the search space for hyperparameters\n",
15048
+ " C = trial.suggest_float('C', 1e-4, 1e2, log=True)\n",
15049
+ " solver = trial.suggest_categorical('solver', ['liblinear', 'lbfgs', 'newton-cg', 'sag', 'saga'])\n",
15050
+ "\n",
15051
+ " model = LogisticRegression(C=C, solver=solver, max_iter=1000)\n",
15052
+ " model.fit(X_train, y_train)\n",
15053
+ " \n",
15054
+ " y_pred = model.predict(X_test)\n",
15055
+ " accuracy = accuracy_score(y_test, y_pred)\n",
15056
+ " \n",
15057
+ " return accuracy\n",
15058
+ "\n",
15059
+ "# Create a study object and optimize the objective function\n",
15060
+ "study = optuna.create_study(direction='maximize')\n",
15061
+ "study.optimize(objective, n_trials=100)\n",
15062
+ "\n",
15063
+ "# Print the best hyperparameters\n",
15064
+ "print('Best hyperparameters: ', study.best_params)\n",
15065
+ "\n",
15066
+ "# Train the model with the best hyperparameters\n",
15067
+ "best_params = study.best_params\n",
14968
15068
"start = time.time()\n",
14969
- "modelLR = LogisticRegression()\n",
15069
+ "modelLR = LogisticRegression(**best_params, max_iter=1000 )\n",
14970
15070
"modelLR.fit(X_train, y_train)\n",
14971
15071
"end = time.time()\n",
14972
15072
"TimeLR = end - start\n",
14973
15073
"print('Time: ', TimeLR)\n",
14974
15074
"\n",
14975
- "#Evaluating model on test set\n",
15075
+ "# Evaluating model on test set\n",
14976
15076
"y_pred = modelLR.predict(X_test)\n",
14977
15077
"all_metrics.update(metrics_data(\"Logistic Regression\", y_test, y_pred))\n",
14978
15078
"\n",
14979
- "#Evaluating model on train set\n",
15079
+ "# Evaluating model on train set\n",
14980
15080
"y_pred = modelLR.predict(X_train)\n",
14981
15081
"accuracyLR2 = accuracy_score(y_train, y_pred)\n",
14982
15082
"print('Accuracy on train set: {}'.format(accuracyLR2))"
@@ -14999,18 +15099,51 @@
14999
15099
],
15000
15100
"source": [
15001
15101
"#RandomForestClassifier\n",
15102
+ "# Define the objective function\n",
15103
+ "def objective(trial):\n",
15104
+ " # Define the search space for hyperparameters\n",
15105
+ " n_estimators = trial.suggest_int('n_estimators', 100, 1000)\n",
15106
+ " max_depth = trial.suggest_int('max_depth', 10, 50)\n",
15107
+ " min_samples_split = trial.suggest_int('min_samples_split', 2, 10)\n",
15108
+ " min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)\n",
15109
+ " max_features = trial.suggest_categorical('max_features', ['sqrt', 'log2', None])\n",
15110
+ " \n",
15111
+ " model = RandomForestClassifier(\n",
15112
+ " n_estimators=n_estimators,\n",
15113
+ " max_depth=max_depth,\n",
15114
+ " min_samples_split=min_samples_split,\n",
15115
+ " min_samples_leaf=min_samples_leaf,\n",
15116
+ " max_features=max_features,\n",
15117
+ " random_state=42\n",
15118
+ " )\n",
15119
+ " model.fit(X_train, y_train)\n",
15120
+ " \n",
15121
+ " y_pred = model.predict(X_test)\n",
15122
+ " accuracy = accuracy_score(y_test, y_pred)\n",
15123
+ " \n",
15124
+ " return accuracy\n",
15125
+ "\n",
15126
+ "# Create a study object and optimize the objective function\n",
15127
+ "study = optuna.create_study(direction='maximize')\n",
15128
+ "study.optimize(objective, n_trials=100)\n",
15129
+ "\n",
15130
+ "# Print the best hyperparameters\n",
15131
+ "print('Best hyperparameters: ', study.best_params)\n",
15132
+ "\n",
15133
+ "# Train the model with the best hyperparameters\n",
15134
+ "best_params = study.best_params\n",
15002
15135
"start = time.time()\n",
15003
- "rfc = RandomForestClassifier()\n",
15136
+ "rfc = RandomForestClassifier(**best_params, random_state=42 )\n",
15004
15137
"rfc.fit(X_train, y_train)\n",
15005
15138
"end = time.time()\n",
15006
15139
"TimeRFC = end - start\n",
15007
15140
"print('Time: ', TimeRFC)\n",
15008
15141
"\n",
15009
- "#Evaluating model on test set\n",
15142
+ "# Evaluating model on test set\n",
15010
15143
"y_pred = rfc.predict(X_test)\n",
15011
15144
"all_metrics.update(metrics_data(\"Random Forest\", y_test, y_pred))\n",
15012
15145
"\n",
15013
- "#Evaluating model on train set\n",
15146
+ "# Evaluating model on train set\n",
15014
15147
"y_pred = rfc.predict(X_train)\n",
15015
15148
"accuracyRFC2 = accuracy_score(y_train, y_pred)\n",
15016
15149
"print('Accuracy on train set: {}'.format(accuracyRFC2))"
@@ -15033,18 +15166,41 @@
15033
15166
],
15034
15167
"source": [
15035
15168
"#LinearSVC\n",
15169
+ "def objective(trial):\n",
15170
+ " # Define the search space for hyperparameters\n",
15171
+ " C = trial.suggest_float('C', 1e-4, 1e2, log=True)\n",
15172
+ " max_iter = trial.suggest_int('max_iter', 1000, 10000)\n",
15173
+ " loss = trial.suggest_categorical('loss', ['hinge', 'squared_hinge'])\n",
15174
+ " \n",
15175
+ " model = LinearSVC(C=C, max_iter=max_iter, loss=loss, random_state=42)\n",
15176
+ " model.fit(X_train, y_train)\n",
15177
+ " \n",
15178
+ " y_pred = model.predict(X_test)\n",
15179
+ " accuracy = accuracy_score(y_test, y_pred)\n",
15180
+ " \n",
15181
+ " return accuracy\n",
15182
+ "\n",
15183
+ "# Create a study object and optimize the objective function\n",
15184
+ "study = optuna.create_study(direction='maximize')\n",
15185
+ "study.optimize(objective, n_trials=100)\n",
15186
+ "\n",
15187
+ "# Print the best hyperparameters\n",
15188
+ "print('Best hyperparameters: ', study.best_params)\n",
15189
+ "\n",
15190
+ "# Train the model with the best hyperparameters\n",
15191
+ "best_params = study.best_params\n",
15036
15192
"start = time.time()\n",
15037
- "svc = LinearSVC()\n",
15038
- "svc.fit(X_train, y_train) \n",
15193
+ "svc = LinearSVC(**best_params, random_state=42 )\n",
15194
+ "svc.fit(X_train, y_train)\n",
15039
15195
"end = time.time()\n",
15040
15196
"TimeSVC = end - start\n",
15041
15197
"print('Time: ', TimeSVC)\n",
15042
15198
"\n",
15043
- "#Evaluating model on test set\n",
15199
+ "# Evaluating model on test set\n",
15044
15200
"y_pred = svc.predict(X_test)\n",
15045
15201
"all_metrics.update(metrics_data(\"LinearSVC\", y_test, y_pred))\n",
15046
15202
"\n",
15047
- "#Evaluating model on train set\n",
15203
+ "# Evaluating model on train set\n",
15048
15204
"y_pred = svc.predict(X_train)\n",
15049
15205
"accuracySVC2 = accuracy_score(y_train, y_pred)\n",
15050
15206
"print('Accuracy on train set: {}'.format(accuracySVC2))"
@@ -15067,20 +15223,52 @@
15067
15223
],
15068
15224
"source": [
15069
15225
"#Gradient Boosting Classifier\n",
15070
- "start = time.time()\n",
15226
+ "ef objective(trial):\n",
15227
+ " # Define the search space for hyperparameters\n",
15228
+ " n_estimators = trial.suggest_int('n_estimators', 100, 1000)\n",
15229
+ " learning_rate = trial.suggest_float('learning_rate', 0.01, 0.3, log=True)\n",
15230
+ " max_depth = trial.suggest_int('max_depth', 3, 20)\n",
15231
+ " min_samples_split = trial.suggest_int('min_samples_split', 2, 10)\n",
15232
+ " min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)\n",
15233
+ " subsample = trial.suggest_float('subsample', 0.5, 1.0)\n",
15234
+ " \n",
15235
+ " model = GradientBoostingClassifier(\n",
15236
+ " n_estimators=n_estimators,\n",
15237
+ " learning_rate=learning_rate,\n",
15238
+ " max_depth=max_depth,\n",
15239
+ " min_samples_split=min_samples_split,\n",
15240
+ " min_samples_leaf=min_samples_leaf,\n",
15241
+ " subsample=subsample,\n",
15242
+ " random_state=42\n",
15243
+ " )\n",
15244
+ " model.fit(X_train, y_train)\n",
15245
+ " \n",
15246
+ " y_pred = model.predict(X_test)\n",
15247
+ " accuracy = accuracy_score(y_test, y_pred)\n",
15248
+ " \n",
15249
+ " return accuracy\n",
15071
15250
"\n",
15072
- "grb= GradientBoostingClassifier()\n",
15073
- "grb.fit(X_train,y_train)\n",
15251
+ "# Create a study object and optimize the objective function\n",
15252
+ "study = optuna.create_study(direction='maximize')\n",
15253
+ "study.optimize(objective, n_trials=100)\n",
15254
+ "\n",
15255
+ "# Print the best hyperparameters\n",
15256
+ "print('Best hyperparameters: ', study.best_params)\n",
15257
+ "\n",
15258
+ "# Train the model with the best hyperparameters\n",
15259
+ "best_params = study.best_params\n",
15260
+ "start = time.time()\n",
15261
+ "grb = GradientBoostingClassifier(**best_params, random_state=42)\n",
15262
+ "grb.fit(X_train, y_train)\n",
15074
15263
"end = time.time()\n",
15075
15264
"Timegrb = end - start\n",
15076
15265
"print('Time: ', Timegrb)\n",
15077
15266
"\n",
15078
- "#Evaluating model on test set\n",
15267
+ "# Evaluating model on test set\n",
15079
15268
"y_pred = grb.predict(X_test)\n",
15080
15269
"all_metrics.update(metrics_data(\"Gradient Boosting Classifier\", y_test, y_pred))\n",
15081
15270
"\n",
15082
- "\n",
15083
- "#Evaluating model on train set\n",
15271
+ "# Evaluating model on train set\n",
15084
15272
"y_pred = grb.predict(X_train)\n",
15085
15273
"accuracygrb2 = accuracy_score(y_train, y_pred)\n",
15086
15274
"print('Accuracy on train set: {}'.format(accuracygrb2))\n"
21615
21803
"name": "python",
21616
21804
"nbconvert_exporter": "python",
21617
21805
"pygments_lexer": "ipython3",
21618
- "version": "3.11.5 "
21806
+ "version": "3.11.3 "
21619
21807
}
21620
21808
},
21621
21809
"nbformat": 4,
0 commit comments