Skip to content

Commit 30b0b80

Browse files
authored
initial alignment of configs to final results (#176)
* initial alignment of configs to final results * logic updates * fix large scale CI * black * minimize incremental * black * align custom function with skl estimator * Update sklbench/utils/measurement.py * Update sklbench/utils/measurement.py * Update sklbench/utils/measurement.py * Update sklbench/utils/measurement.py
1 parent 72d65c1 commit 30b0b80

32 files changed

+273
-490
lines changed

configs/incremental.json

-100
This file was deleted.

configs/spmd/large_scale/basic_stats.json

+8-7
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44
"spmd basicstats parameters": {
55
"algorithm": {
66
"estimator": "BasicStatistics",
7-
"estimator_methods": { "training": "fit" }
7+
"estimator_methods": { "training": "fit" },
8+
"sklearnex_context": { "use_raw_input": true }
89
},
9-
"data": {
10-
"split_kwargs": { "test_size": 0.0001 }
11-
}
10+
"data": {
11+
"split_kwargs": { "test_size": 0.0001 }
12+
}
1213
},
1314
"synthetic data": {
1415
"data": [
15-
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } },
16-
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } }
16+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000, "n_features": 10, "centers": 1 } },
17+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 1000, "centers": 1 } }
1718
]
1819
}
1920
},
@@ -22,7 +23,7 @@
2223
"SETS": [
2324
"sklearnex spmd implementation",
2425
"large scale 2k parameters",
25-
"synthetic data",
26+
"synthetic data",
2627
"spmd basicstats parameters"
2728
]
2829
}

configs/spmd/large_scale/basic_stats_strong.json

+7-6
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44
"spmd basicstats parameters": {
55
"algorithm": {
66
"estimator": "BasicStatistics",
7-
"estimator_methods": { "training": "fit" }
7+
"estimator_methods": { "training": "fit" },
8+
"sklearnex_context": { "use_raw_input": true }
89
},
9-
"data": {
10-
"split_kwargs": { "test_size": 0.0001 }
11-
}
10+
"data": {
11+
"split_kwargs": { "test_size": 0.0001 }
12+
}
1213
},
1314
"synthetic data": {
1415
"data": [
@@ -20,8 +21,8 @@
2021
"basicstats": {
2122
"SETS": [
2223
"sklearnex spmd implementation",
23-
"large scale strong 2k parameters",
24-
"synthetic data",
24+
"large scale strong <=64 parameters",
25+
"synthetic data",
2526
"spmd basicstats parameters"
2627
]
2728
}

configs/spmd/large_scale/covariance.json

+5-4
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44
"spmd basicstats parameters": {
55
"algorithm": {
66
"estimator": "EmpiricalCovariance",
7-
"estimator_methods": { "training": "fit" }
7+
"estimator_methods": { "training": "fit" },
8+
"sklearnex_context": { "use_raw_input": true }
89
},
910
"data": {
1011
"split_kwargs": { "test_size": 0.0001 }
1112
}
1213
},
1314
"synthetic data": {
1415
"data": [
15-
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } },
16-
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } }
16+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000, "n_features": 10, "centers": 1 } },
17+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 1000, "centers": 1 } }
1718
]
1819
}
1920
},
@@ -22,7 +23,7 @@
2223
"SETS": [
2324
"sklearnex spmd implementation",
2425
"large scale 2k parameters",
25-
"synthetic data",
26+
"synthetic data",
2627
"spmd basicstats parameters"
2728
]
2829
}

configs/spmd/large_scale/covariance_strong.json

+4-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
"spmd basicstats parameters": {
55
"algorithm": {
66
"estimator": "EmpiricalCovariance",
7-
"estimator_methods": { "training": "fit" }
7+
"estimator_methods": { "training": "fit" },
8+
"sklearnex_context": { "use_raw_input": true }
89
},
910
"data": {
1011
"split_kwargs": { "test_size": 0.0001 }
@@ -20,8 +21,8 @@
2021
"covariance": {
2122
"SETS": [
2223
"sklearnex spmd implementation",
23-
"large scale strong 2k parameters",
24-
"synthetic data",
24+
"large scale strong <=64 parameters",
25+
"synthetic data",
2526
"spmd basicstats parameters"
2627
]
2728
}

configs/spmd/large_scale/dbscan.json

+3-2
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,16 @@
99
},
1010
"estimator_params" : {
1111
"eps": 10, "min_samples": 5
12-
}
12+
},
13+
"sklearnex_context": { "use_raw_input": true }
1314
},
1415
"data": {
1516
"dtype": "float64"
1617
}
1718
},
1819
"synthetic dataset": {
1920
"data": [
20-
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 100, "centers": 10 } }
21+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 40000, "n_features": 100, "centers": 10 } }
2122
]
2223
}
2324
},

configs/spmd/large_scale/dbscan_strong.json

+8-7
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,22 @@
33
"PARAMETERS_SETS": {
44
"spmd dbscan parameters": {
55
"algorithm": {
6-
"estimator": "DBSCAN",
7-
"estimator_methods": {
8-
"training": "fit"
6+
"estimator": "DBSCAN",
7+
"estimator_methods": {
8+
"training": "fit"
99
},
1010
"estimator_params" : {
11-
"eps": 10, "min_samples": 5
12-
}
11+
"eps": 15, "min_samples": 50
12+
},
13+
"sklearnex_context": { "use_raw_input": true }
1314
},
1415
"data": {
1516
"dtype": "float64"
1617
}
1718
},
1819
"synthetic dataset": {
1920
"data": [
20-
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 500000, "n_features": 100, "centers": 10 } }
21+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 4000000, "n_features": 100, "centers": 10 } }
2122
]
2223
}
2324
},
@@ -27,7 +28,7 @@
2728
"common dbscan parameters",
2829
"synthetic dataset",
2930
"sklearnex spmd implementation",
30-
"large scale strong <=64 parameters",
31+
"large scale strong <=64 parameters",
3132
"spmd dbscan parameters"
3233
]
3334
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd forest classification parameters": {
5+
"algorithm": {
6+
"estimator": "RandomForestClassifier",
7+
"estimator_methods": { "training": "fit" },
8+
"estimator_params": { "n_estimators": 20, "max_depth": 10 },
9+
"sklearnex_context": { "use_raw_input": true }
10+
}
11+
},
12+
"synthetic data": {
13+
"data": [
14+
{ "source": "make_classification", "split_kwargs": { "train_size": 1000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 1001000, "n_features": 100, "n_classes": 2 } }
15+
]
16+
}
17+
},
18+
"TEMPLATES": {
19+
"forestCls": {
20+
"SETS": [
21+
"sklearnex spmd implementation",
22+
"large scale 32 parameters",
23+
"synthetic data",
24+
"spmd forest classification parameters"
25+
]
26+
}
27+
}
28+
}

configs/spmd/large_scale/forest.json configs/spmd/large_scale/forest_no_max_samples.json

+4-5
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,13 @@
44
"spmd forest classification parameters": {
55
"algorithm": {
66
"estimator": "RandomForestClassifier",
7-
"estimator_methods": { "training": "fit" },
8-
"estimator_params": { "n_estimators": 20, "max_depth": 4 }
7+
"estimator_params": { "n_estimators": 100, "max_depth": 7 },
8+
"sklearnex_context": { "use_raw_input": true }
99
}
1010
},
1111
"synthetic data": {
1212
"data": [
13-
{ "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 501000, "n_features": 10, "n_classes": 2 } },
14-
{ "source": "make_classification", "split_kwargs": { "train_size": 10000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 11000, "n_features": 1000, "n_classes": 2 } }
13+
{ "source": "make_classification", "split_kwargs": { "train_size": 1000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 1001000, "n_features": 100, "n_classes": 2 }, "n_informative": "[SPECIAL_VALUE]0.5" }
1514
]
1615
}
1716
},
@@ -20,7 +19,7 @@
2019
"SETS": [
2120
"sklearnex spmd implementation",
2221
"large scale 2k parameters",
23-
"synthetic data",
22+
"synthetic data",
2423
"spmd forest classification parameters"
2524
]
2625
}

configs/spmd/large_scale/forest_strong.json

+6-5
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,23 @@
44
"spmd forest classification parameters": {
55
"algorithm": {
66
"estimator": "RandomForestClassifier",
7-
"estimator_methods": { "training": "fit" },
8-
"estimator_params": { "n_estimators": 20, "max_depth": 4 }
7+
"estimator_methods": { "training": "fit" },
8+
"estimator_params": { "n_estimators": 100, "max_depth": 8 },
9+
"sklearnex_context": {"use_raw_input": true}
910
}
1011
},
1112
"synthetic data": {
1213
"data": [
13-
{ "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 10001000, "n_features": 100, "n_classes": 2 } }
14+
{ "source": "make_classification", "split_kwargs": { "train_size": 20000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 21000, "n_features": 200, "n_classes": 2 } }
1415
]
1516
}
1617
},
1718
"TEMPLATES": {
1819
"forestCls": {
1920
"SETS": [
2021
"sklearnex spmd implementation",
21-
"large scale strong 2k parameters",
22-
"synthetic data",
22+
"large scale strong <=64 parameters",
23+
"synthetic data",
2324
"spmd forest classification parameters"
2425
]
2526
}

0 commit comments

Comments
 (0)