Skip to content

Commit 2e6aaac

Browse files
committed
Note for size analysis
1 parent ecf1cba commit 2e6aaac

File tree

2 files changed

+125
-0
lines changed

2 files changed

+125
-0
lines changed

option_columns.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

size_analysis.ipynb

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import pandas as pd\n",
10+
"import json\n",
11+
"with open(\"option_columns.json\",\"r\") as f:\n",
12+
" option_columns = json.load(f)\n",
13+
"\n",
14+
"#Find the dataset here http://37.187.140.181/tuxml_dataset/\n",
15+
"df = pd.read_csv(\"../tuxml-datasets/dataset_encoded_size.csv\", dtype={k:\"int8\" for k in option_columns})"
16+
]
17+
},
18+
{
19+
"cell_type": "code",
20+
"execution_count": 2,
21+
"metadata": {},
22+
"outputs": [
23+
{
24+
"data": {
25+
"text/plain": [
26+
"(92473, 12638)"
27+
]
28+
},
29+
"execution_count": 2,
30+
"metadata": {},
31+
"output_type": "execute_result"
32+
}
33+
],
34+
"source": [
35+
"#Filter configurations\n",
36+
"df.query(\"cid >= 30000\", inplace=True)\n",
37+
"df.query(\"kernel_size >= 0\", inplace=True)\n",
38+
"\n",
39+
"df.fillna(-1, inplace=True)\n",
40+
"\n",
41+
"df.shape"
42+
]
43+
},
44+
{
45+
"cell_type": "code",
46+
"execution_count": 3,
47+
"metadata": {},
48+
"outputs": [],
49+
"source": [
50+
"from sklearn.model_selection import train_test_split\n",
51+
"from sklearn import tree"
52+
]
53+
},
54+
{
55+
"cell_type": "code",
56+
"execution_count": 4,
57+
"metadata": {},
58+
"outputs": [],
59+
"source": [
60+
"X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=\"cid\").drop(columns=\"kernel_size\"), df[\"kernel_size\"], test_size=0.9)\n",
61+
"reg = tree.DecisionTreeRegressor()\n",
62+
"reg.fit(X_train, y_train)\n",
63+
"y_pred = reg.predict(X_test)"
64+
]
65+
},
66+
{
67+
"cell_type": "code",
68+
"execution_count": 5,
69+
"metadata": {},
70+
"outputs": [
71+
{
72+
"data": {
73+
"text/plain": [
74+
"count 83226.000000\n",
75+
"mean 20.660668\n",
76+
"std 26.782288\n",
77+
"min 0.000392\n",
78+
"25% 6.686377\n",
79+
"50% 14.923435\n",
80+
"75% 26.849332\n",
81+
"max 1678.388727\n",
82+
"Name: % error, dtype: float64"
83+
]
84+
},
85+
"execution_count": 5,
86+
"metadata": {},
87+
"output_type": "execute_result"
88+
}
89+
],
90+
"source": [
91+
"dfErrors = pd.DataFrame({'Actual':y_test, 'Predicted':y_pred, \"error\":(y_pred - y_test).abs(), \"% error\":((y_pred - y_test)/y_test).abs()*100})\n",
92+
"dfErrors[\"% error\"].describe()"
93+
]
94+
},
95+
{
96+
"cell_type": "code",
97+
"execution_count": null,
98+
"metadata": {},
99+
"outputs": [],
100+
"source": []
101+
}
102+
],
103+
"metadata": {
104+
"kernelspec": {
105+
"display_name": "Python 3",
106+
"language": "python",
107+
"name": "python3"
108+
},
109+
"language_info": {
110+
"codemirror_mode": {
111+
"name": "ipython",
112+
"version": 3
113+
},
114+
"file_extension": ".py",
115+
"mimetype": "text/x-python",
116+
"name": "python",
117+
"nbconvert_exporter": "python",
118+
"pygments_lexer": "ipython3",
119+
"version": "3.6.8"
120+
}
121+
},
122+
"nbformat": 4,
123+
"nbformat_minor": 2
124+
}

0 commit comments

Comments
 (0)