|
5 | 5 | "execution_count": 1,
|
6 | 6 | "metadata": {
|
7 | 7 | "ExecuteTime": {
|
8 |
| - "end_time": "2021-12-27T11:58:43.079438Z", |
9 |
| - "start_time": "2021-12-27T11:58:41.821923Z" |
| 8 | + "end_time": "2021-12-27T12:22:23.721251Z", |
| 9 | + "start_time": "2021-12-27T12:22:22.456359Z" |
10 | 10 | }
|
11 | 11 | },
|
12 | 12 | "outputs": [],
|
|
29 | 29 | },
|
30 | 30 | {
|
31 | 31 | "cell_type": "code",
|
32 |
| - "execution_count": 9, |
| 32 | + "execution_count": 2, |
33 | 33 | "metadata": {
|
34 | 34 | "ExecuteTime": {
|
35 |
| - "end_time": "2021-12-27T12:07:47.437564Z", |
36 |
| - "start_time": "2021-12-27T12:07:47.428224Z" |
| 35 | + "end_time": "2021-12-27T12:22:23.745780Z", |
| 36 | + "start_time": "2021-12-27T12:22:23.722805Z" |
37 | 37 | }
|
38 | 38 | },
|
39 | 39 | "outputs": [],
|
|
128 | 128 | },
|
129 | 129 | {
|
130 | 130 | "cell_type": "code",
|
131 |
| - "execution_count": 16, |
| 131 | + "execution_count": 3, |
132 | 132 | "metadata": {
|
133 | 133 | "ExecuteTime": {
|
134 |
| - "end_time": "2021-12-27T12:10:02.283742Z", |
135 |
| - "start_time": "2021-12-27T12:10:02.097688Z" |
| 134 | + "end_time": "2021-12-27T12:22:28.522606Z", |
| 135 | + "start_time": "2021-12-27T12:22:28.338619Z" |
136 | 136 | }
|
137 | 137 | },
|
138 | 138 | "outputs": [],
|
|
209 | 209 | }
|
210 | 210 | ],
|
211 | 211 | "source": [
|
| 212 | + "# here we use LogisticRegressor in sklearn\n", |
| 213 | + "# reg_lambda corresponds to parameter \"C\", which is inverse of regularization strength.\n", |
212 | 214 | "clf = GLMTreeClassifier(max_depth=3, min_samples_leaf=50, reg_lambda=np.logspace(-5, 5, 10).tolist(),\n",
|
213 | 215 | " n_split_grid=20, n_screen_grid=5, n_feature_search=10)\n",
|
214 | 216 | "clf.fit(train_x, train_y)\n",
|
|
217 | 219 | "roc_auc_score(train_y, pred_train.ravel()), roc_auc_score(test_y, pred_test.ravel())"
|
218 | 220 | ]
|
219 | 221 | },
|
220 |
| - { |
221 |
| - "cell_type": "code", |
222 |
| - "execution_count": 18, |
223 |
| - "metadata": { |
224 |
| - "ExecuteTime": { |
225 |
| - "end_time": "2021-12-27T12:11:03.435992Z", |
226 |
| - "start_time": "2021-12-27T12:10:29.328703Z" |
227 |
| - } |
228 |
| - }, |
229 |
| - "outputs": [ |
230 |
| - { |
231 |
| - "ename": "TypeError", |
232 |
| - "evalue": "__init__() got an unexpected keyword argument 'alpha'", |
233 |
| - "output_type": "error", |
234 |
| - "traceback": [ |
235 |
| - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
236 |
| - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", |
237 |
| - "\u001b[0;32m/tmp/ipykernel_37837/3864539870.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m clf = GLMTreeClassifier(max_depth=1, min_samples_leaf=50, reg_lambda=[0],\n\u001b[1;32m 2\u001b[0m n_split_grid=20, n_screen_grid=5, n_feature_search=10)\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_x\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_y\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mpred_train\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_proba\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_x\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mpred_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_proba\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_x\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
238 |
| - "\u001b[0;32m~/anaconda3/envs/py37/lib/python3.7/site-packages/simtree/mobtree.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y)\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_leaf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 294\u001b[0m node_id = self.add_node(parent_id, is_left, is_leaf, depth,\n\u001b[0;32m--> 295\u001b[0;31m None, None, impurity, sample_indice)\n\u001b[0m\u001b[1;32m 296\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 297\u001b[0m node_id = self.add_node(parent_id, is_left, is_leaf, depth,\n", |
239 |
| - "\u001b[0;32m~/anaconda3/envs/py37/lib/python3.7/site-packages/simtree/mobtree.py\u001b[0m in \u001b[0;36madd_node\u001b[0;34m(self, parent_id, is_left, is_leaf, depth, feature, threshold, impurity, sample_indice)\u001b[0m\n\u001b[1;32m 231\u001b[0m \u001b[0mn_samples\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msample_indice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_leaf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 233\u001b[0;31m \u001b[0mpredict_func\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbest_impurity\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild_leaf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msample_indice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 234\u001b[0m node = {\"node_id\": node_id, \"parent_id\": parent_id, \"depth\": depth, \"feature\": feature, \"impurity\": best_impurity,\n\u001b[1;32m 235\u001b[0m \u001b[0;34m\"n_samples\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mn_samples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"is_left\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mis_left\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"is_leaf\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mis_leaf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"value\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msample_indice\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
240 |
| - "\u001b[0;32m~/anaconda3/envs/py37/lib/python3.7/site-packages/simtree/glmtree.py\u001b[0m in \u001b[0;36mbuild_leaf\u001b[0;34m(self, sample_indice)\u001b[0m\n\u001b[1;32m 96\u001b[0m cv=5, random_state=self.random_state)\n\u001b[1;32m 97\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 98\u001b[0;31m \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLogisticRegression\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreg_lambda\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprecompute\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[0mmx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msample_indice\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
241 |
| - "\u001b[0;31mTypeError\u001b[0m: __init__() got an unexpected keyword argument 'alpha'" |
242 |
| - ] |
243 |
| - } |
244 |
| - ], |
245 |
| - "source": [ |
246 |
| - "clf = GLMTreeClassifier(max_depth=1, min_samples_leaf=50, reg_lambda=[0],\n", |
247 |
| - " n_split_grid=20, n_screen_grid=5, n_feature_search=10)\n", |
248 |
| - "clf.fit(train_x, train_y)\n", |
249 |
| - "pred_train = clf.predict_proba(train_x)[:, 1]\n", |
250 |
| - "pred_test = clf.predict_proba(test_x)[:, 1]\n", |
251 |
| - "roc_auc_score(train_y, pred_train.ravel()), roc_auc_score(test_y, pred_test.ravel())" |
252 |
| - ] |
253 |
| - }, |
254 | 222 | {
|
255 | 223 | "cell_type": "code",
|
256 | 224 | "execution_count": null,
|
257 | 225 | "metadata": {
|
258 | 226 | "ExecuteTime": {
|
259 |
| - "end_time": "2021-12-27T12:11:03.437503Z", |
260 |
| - "start_time": "2021-12-27T12:11:03.437482Z" |
| 227 | + "start_time": "2021-12-27T12:27:37.691Z" |
261 | 228 | }
|
262 | 229 | },
|
263 | 230 | "outputs": [],
|
264 | 231 | "source": [
|
265 |
| - "clf = SIMTreeClassifier(max_depth=1, min_samples_leaf=50, knot_num=30,\n", |
266 |
| - " n_split_grid=20, n_screen_grid=5, n_feature_search=10,\n", |
267 |
| - " reg_lambda=[0],\n", |
268 |
| - " reg_gamma=[1e-3, 1e-5, 1e-7])\n", |
| 232 | + "clf = GLMTreeClassifier(max_depth=1, min_samples_leaf=50, reg_lambda=[1e4],\n", |
| 233 | + " n_split_grid=20, n_screen_grid=5, n_feature_search=10)\n", |
269 | 234 | "clf.fit(train_x, train_y)\n",
|
270 |
| - "clf.plot_tree()\n", |
271 | 235 | "pred_train = clf.predict_proba(train_x)[:, 1]\n",
|
272 | 236 | "pred_test = clf.predict_proba(test_x)[:, 1]\n",
|
273 | 237 | "roc_auc_score(train_y, pred_train.ravel()), roc_auc_score(test_y, pred_test.ravel())"
|
|
0 commit comments