Skip to content

Commit 7868232

Browse files
Performance config (#114)
* move&refactor xpu * disable kd_tree gpu * empty line * fix ridge * pca float32 enable * kd_tree + higs for train_test_split * fix higgs * fix loading file * rem extra higgs * config fix * disable airline for gpu * skip zero cases * disable gpu airline * enable float64 for knn * remove extra dtype specification
1 parent ee41cba commit 7868232

23 files changed

+1336
-229
lines changed

Diff for: configs/xpu/dbscan.json renamed to configs/sklearn/performance/dbscan.json

+33-1
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,42 @@
44
"algorithm": "dbscan",
55
"data-format": "pandas",
66
"data-order": "F",
7-
"dtype": ["float64"],
7+
"dtype": ["float32", "float64"],
88
"device": ["host", "cpu", "gpu", "none"]
99
},
1010
"cases": [
11+
{
12+
"dataset": [
13+
{
14+
"source": "synthetic",
15+
"type": "blobs",
16+
"n_clusters": 50,
17+
"n_features": 3,
18+
"training": {
19+
"n_samples": 500000
20+
}
21+
},
22+
{
23+
"source": "synthetic",
24+
"type": "blobs",
25+
"n_clusters": 50,
26+
"n_features": 10,
27+
"training": {
28+
"n_samples": 500000
29+
}
30+
},
31+
{
32+
"source": "synthetic",
33+
"type": "blobs",
34+
"n_clusters": 100,
35+
"n_features": 50,
36+
"training": {
37+
"n_samples": 500000
38+
}
39+
}
40+
],
41+
"workload-size": "medium"
42+
},
1143
{
1244
"dataset": [
1345
{

Diff for: configs/xpu/df_clsf.json renamed to configs/sklearn/performance/df_clsf.json

+47
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,53 @@
99
"device": ["host", "cpu", "gpu", "none"]
1010
},
1111
"cases": [
12+
{
13+
"dataset": [
14+
{
15+
"source": "npy",
16+
"name": "higgs1m",
17+
"training":
18+
{
19+
"x": "data/higgs1m_x_train.npy",
20+
"y": "data/higgs1m_y_train.npy"
21+
},
22+
"testing":
23+
{
24+
"x": "data/higgs1m_x_test.npy",
25+
"y": "data/higgs1m_y_test.npy"
26+
}
27+
}
28+
],
29+
"workload-size": "medium",
30+
"num-trees": 50,
31+
"max-depth": 16,
32+
"max-leaf-nodes": 131072,
33+
"max-features": 0.2
34+
},
35+
{
36+
"device": "none",
37+
"dataset": [
38+
{
39+
"source": "npy",
40+
"name": "airline-ohe",
41+
"training":
42+
{
43+
"x": "data/airline-ohe_x_train.npy",
44+
"y": "data/airline-ohe_y_train.npy"
45+
},
46+
"testing":
47+
{
48+
"x": "data/airline-ohe_x_test.npy",
49+
"y": "data/airline-ohe_y_test.npy"
50+
}
51+
}
52+
],
53+
"workload-size": "medium",
54+
"num-trees": 50,
55+
"max-depth": 16,
56+
"max-leaf-nodes": 131072,
57+
"max-features": 0.2
58+
},
1259
{
1360
"dataset": [
1461
{

Diff for: configs/xpu/df_regr.json renamed to configs/sklearn/performance/df_regr.json

+51-2
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,52 @@
55
"data-format": "pandas",
66
"data-order": "F",
77
"dtype": ["float32", "float64"],
8-
"max-features": 0.33,
98
"device": ["host", "cpu", "gpu", "none"]
109
},
1110
"cases": [
11+
{
12+
"device": "none",
13+
"dataset": [
14+
{
15+
"source": "npy",
16+
"name": "airline_regression",
17+
"training":
18+
{
19+
"x": "data/airline_regression_x_train.npy",
20+
"y": "data/airline_regression_y_train.npy"
21+
},
22+
"testing":
23+
{
24+
"x": "data/airline_regression_x_test.npy",
25+
"y": "data/airline_regression_y_test.npy"
26+
}
27+
}
28+
],
29+
"workload-size": "large",
30+
"num-trees": 100
31+
},
1232
{
33+
"dataset": [
34+
{
35+
"source": "npy",
36+
"name": "year_prediction_msd",
37+
"training":
38+
{
39+
"x": "data/year_prediction_msd_x_train.npy",
40+
"y": "data/year_prediction_msd_y_train.npy"
41+
},
42+
"testing":
43+
{
44+
"x": "data/year_prediction_msd_x_test.npy",
45+
"y": "data/year_prediction_msd_y_test.npy"
46+
}
47+
}
48+
],
49+
"workload-size": "large",
50+
"num-trees": 100
51+
},
52+
{
53+
"max-features": 0.33,
1354
"dataset": [
1455
{
1556
"source": "npy",
@@ -31,6 +72,7 @@
3172
"max-depth": 5
3273
},
3374
{
75+
"max-features": 0.33,
3476
"dataset": [
3577
{
3678
"source": "npy",
@@ -48,10 +90,11 @@
4890
}
4991
],
5092
"workload-size": "large",
51-
"num-trees": 10,
93+
"num-trees": 100,
5294
"max-depth": 5
5395
},
5496
{
97+
"max-features": 0.33,
5598
"dataset": [
5699
{
57100
"source": "npy",
@@ -73,6 +116,7 @@
73116
"max-depth": 8
74117
},
75118
{
119+
"max-features": 0.33,
76120
"dataset": [
77121
{
78122
"source": "npy",
@@ -94,6 +138,7 @@
94138
"max-depth": 8
95139
},
96140
{
141+
"max-features": 0.33,
97142
"dataset": [
98143
{
99144
"source": "npy",
@@ -115,6 +160,7 @@
115160
"max-depth": 16
116161
},
117162
{
163+
"max-features": 0.33,
118164
"dataset": [
119165
{
120166
"source": "npy",
@@ -136,6 +182,7 @@
136182
"max-depth": 8
137183
},
138184
{
185+
"max-features": 0.33,
139186
"dataset": [
140187
{
141188
"source": "npy",
@@ -157,6 +204,7 @@
157204
"max-depth": 8
158205
},
159206
{
207+
"max-features": 0.33,
160208
"dataset": [
161209
{
162210
"source": "npy",
@@ -178,6 +226,7 @@
178226
"max-depth": 8
179227
},
180228
{
229+
"max-features": 0.33,
181230
"dataset": [
182231
{
183232
"source": "npy",

Diff for: configs/sklearn/performance/elasticnet.json

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{
2+
"common": {
3+
"lib": "sklearn",
4+
"algorithm": "elasticnet",
5+
"data-format": "pandas",
6+
"data-order": "F",
7+
"dtype": ["float32", "float64"],
8+
"device": "none"
9+
},
10+
"cases": [
11+
{
12+
"dataset": [
13+
{
14+
"source": "npy",
15+
"name": "year_prediction_msd",
16+
"training":
17+
{
18+
"x": "data/year_prediction_msd_x_train.npy",
19+
"y": "data/year_prediction_msd_y_train.npy"
20+
},
21+
"testing":
22+
{
23+
"x": "data/year_prediction_msd_x_test.npy",
24+
"y": "data/year_prediction_msd_y_test.npy"
25+
}
26+
}
27+
],
28+
"workload-size": "medium",
29+
"alpha": 2.0,
30+
"l1_ratio": 0.5,
31+
"tol": 1e-4
32+
}
33+
]
34+
}

Diff for: configs/xpu/kmeans.json renamed to configs/sklearn/performance/kmeans.json

+60-1
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,70 @@
33
"lib": "sklearn",
44
"algorithm": "kmeans",
55
"data-format": "pandas",
6-
"data-order": "C",
6+
"data-order": "F",
77
"dtype": ["float32", "float64"],
88
"device": ["host", "cpu", "gpu", "none"]
99
},
1010
"cases": [
11+
{
12+
"dataset": [
13+
{
14+
"source": "synthetic",
15+
"type": "blobs",
16+
"n_clusters": 1000,
17+
"n_features": 20,
18+
"training": {
19+
"n_samples": 1000000
20+
}
21+
}
22+
],
23+
"workload-size": "medium",
24+
"time-method": "box_filter",
25+
"time-limit": 50,
26+
"n-clusters": 1000,
27+
"maxiter": 50,
28+
"tol": 0.0
29+
},
30+
{
31+
"device": "none",
32+
"dataset": [
33+
{
34+
"source": "synthetic",
35+
"type": "blobs",
36+
"n_clusters": 5,
37+
"n_features": 50,
38+
"training": {
39+
"n_samples": 10000000
40+
}
41+
}
42+
],
43+
"workload-size": "medium",
44+
"time-method": "box_filter",
45+
"time-limit": 50,
46+
"n-clusters": 5,
47+
"maxiter": 50,
48+
"init": "k-means++",
49+
"tol": 0.0
50+
},
51+
{
52+
"dataset": [
53+
{
54+
"source": "synthetic",
55+
"type": "blobs",
56+
"n_clusters": 20,
57+
"n_features": 50,
58+
"training": {
59+
"n_samples": 3000000
60+
}
61+
}
62+
],
63+
"workload-size": "medium",
64+
"time-method": "box_filter",
65+
"time-limit": 50,
66+
"n-clusters": 20,
67+
"maxiter": 50,
68+
"tol": 0.0
69+
},
1170
{
1271
"dataset": [
1372
{

0 commit comments

Comments
 (0)