Skip to content

Commit fc29011

Browse files
committed
initial alignment of configs to final results
1 parent 72d65c1 commit fc29011

18 files changed

+164
-88
lines changed

configs/spmd/large_scale/basic_stats.json

+8-7
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44
"spmd basicstats parameters": {
55
"algorithm": {
66
"estimator": "BasicStatistics",
7-
"estimator_methods": { "training": "fit" }
7+
"estimator_methods": { "training": "fit" },
8+
"sklearnex_context": { "use_raw_input": true }
89
},
9-
"data": {
10-
"split_kwargs": { "test_size": 0.0001 }
11-
}
10+
"data": {
11+
"split_kwargs": { "test_size": 0.0001 }
12+
}
1213
},
1314
"synthetic data": {
1415
"data": [
15-
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } },
16-
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } }
16+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000, "n_features": 10, "centers": 1 } },
17+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 1000, "centers": 1 } }
1718
]
1819
}
1920
},
@@ -22,7 +23,7 @@
2223
"SETS": [
2324
"sklearnex spmd implementation",
2425
"large scale 2k parameters",
25-
"synthetic data",
26+
"synthetic data",
2627
"spmd basicstats parameters"
2728
]
2829
}

configs/spmd/large_scale/basic_stats_strong.json

+7-6
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44
"spmd basicstats parameters": {
55
"algorithm": {
66
"estimator": "BasicStatistics",
7-
"estimator_methods": { "training": "fit" }
7+
"estimator_methods": { "training": "fit" },
8+
"sklearnex_context": { "use_raw_input": true }
89
},
9-
"data": {
10-
"split_kwargs": { "test_size": 0.0001 }
11-
}
10+
"data": {
11+
"split_kwargs": { "test_size": 0.0001 }
12+
}
1213
},
1314
"synthetic data": {
1415
"data": [
@@ -20,8 +21,8 @@
2021
"basicstats": {
2122
"SETS": [
2223
"sklearnex spmd implementation",
23-
"large scale strong 2k parameters",
24-
"synthetic data",
24+
"large scale strong <=64 parameters",
25+
"synthetic data",
2526
"spmd basicstats parameters"
2627
]
2728
}

configs/spmd/large_scale/covariance.json

+5-4
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44
"spmd basicstats parameters": {
55
"algorithm": {
66
"estimator": "EmpiricalCovariance",
7-
"estimator_methods": { "training": "fit" }
7+
"estimator_methods": { "training": "fit" },
8+
"sklearnex_context": { "use_raw_input": true }
89
},
910
"data": {
1011
"split_kwargs": { "test_size": 0.0001 }
1112
}
1213
},
1314
"synthetic data": {
1415
"data": [
15-
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } },
16-
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } }
16+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000, "n_features": 10, "centers": 1 } },
17+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 1000, "centers": 1 } }
1718
]
1819
}
1920
},
@@ -22,7 +23,7 @@
2223
"SETS": [
2324
"sklearnex spmd implementation",
2425
"large scale 2k parameters",
25-
"synthetic data",
26+
"synthetic data",
2627
"spmd basicstats parameters"
2728
]
2829
}

configs/spmd/large_scale/covariance_strong.json

+4-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
"spmd basicstats parameters": {
55
"algorithm": {
66
"estimator": "EmpiricalCovariance",
7-
"estimator_methods": { "training": "fit" }
7+
"estimator_methods": { "training": "fit" },
8+
"sklearnex_context": { "use_raw_input": true }
89
},
910
"data": {
1011
"split_kwargs": { "test_size": 0.0001 }
@@ -20,8 +21,8 @@
2021
"covariance": {
2122
"SETS": [
2223
"sklearnex spmd implementation",
23-
"large scale strong 2k parameters",
24-
"synthetic data",
24+
"large scale strong <=64 parameters",
25+
"synthetic data",
2526
"spmd basicstats parameters"
2627
]
2728
}

configs/spmd/large_scale/dbscan.json

+3-2
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,16 @@
99
},
1010
"estimator_params" : {
1111
"eps": 10, "min_samples": 5
12-
}
12+
},
13+
"sklearnex_context": { "use_raw_input": true }
1314
},
1415
"data": {
1516
"dtype": "float64"
1617
}
1718
},
1819
"synthetic dataset": {
1920
"data": [
20-
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 100, "centers": 10 } }
21+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 40000, "n_features": 100, "centers": 10 } }
2122
]
2223
}
2324
},

configs/spmd/large_scale/dbscan_strong.json

+8-7
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,22 @@
33
"PARAMETERS_SETS": {
44
"spmd dbscan parameters": {
55
"algorithm": {
6-
"estimator": "DBSCAN",
7-
"estimator_methods": {
8-
"training": "fit"
6+
"estimator": "DBSCAN",
7+
"estimator_methods": {
8+
"training": "fit"
99
},
1010
"estimator_params" : {
11-
"eps": 10, "min_samples": 5
12-
}
11+
"eps": 15, "min_samples": 50
12+
},
13+
"sklearnex_context": { "use_raw_input": true }
1314
},
1415
"data": {
1516
"dtype": "float64"
1617
}
1718
},
1819
"synthetic dataset": {
1920
"data": [
20-
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 500000, "n_features": 100, "centers": 10 } }
21+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 4000000, "n_features": 100, "centers": 10 } }
2122
]
2223
}
2324
},
@@ -27,7 +28,7 @@
2728
"common dbscan parameters",
2829
"synthetic dataset",
2930
"sklearnex spmd implementation",
30-
"large scale strong <=64 parameters",
31+
"large scale strong <=64 parameters",
3132
"spmd dbscan parameters"
3233
]
3334
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd forest classification parameters": {
5+
"algorithm": {
6+
"estimator": "RandomForestClassifier",
7+
"estimator_methods": { "training": "fit" },
8+
"estimator_params": { "n_estimators": 20, "max_depth": 10 },
9+
"sklearnex_context": { "use_raw_input": true }
10+
}
11+
},
12+
"synthetic data": {
13+
"data": [
14+
{ "source": "make_classification", "split_kwargs": { "train_size": 1000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 1001000, "n_features": 100, "n_classes": 2 } }
15+
]
16+
}
17+
},
18+
"TEMPLATES": {
19+
"forestCls": {
20+
"SETS": [
21+
"sklearnex spmd implementation",
22+
"large scale 32 parameters",
23+
"synthetic data",
24+
"spmd forest classification parameters"
25+
]
26+
}
27+
}
28+
}

configs/spmd/large_scale/forest.json renamed to configs/spmd/large_scale/forest_no_max_samples.json

+4-5
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,13 @@
44
"spmd forest classification parameters": {
55
"algorithm": {
66
"estimator": "RandomForestClassifier",
7-
"estimator_methods": { "training": "fit" },
8-
"estimator_params": { "n_estimators": 20, "max_depth": 4 }
7+
"estimator_params": { "n_estimators": 100, "max_depth": 7 },
8+
"sklearnex_context": { "use_raw_input": true }
99
}
1010
},
1111
"synthetic data": {
1212
"data": [
13-
{ "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 501000, "n_features": 10, "n_classes": 2 } },
14-
{ "source": "make_classification", "split_kwargs": { "train_size": 10000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 11000, "n_features": 1000, "n_classes": 2 } }
13+
{ "source": "make_classification", "split_kwargs": { "train_size": 1000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 1001000, "n_features": 100, "n_classes": 2 }, "n_informative": "[SPECIAL_VALUE]0.5" }
1514
]
1615
}
1716
},
@@ -20,7 +19,7 @@
2019
"SETS": [
2120
"sklearnex spmd implementation",
2221
"large scale 2k parameters",
23-
"synthetic data",
22+
"synthetic data",
2423
"spmd forest classification parameters"
2524
]
2625
}

configs/spmd/large_scale/forest_strong.json

+6-5
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,23 @@
44
"spmd forest classification parameters": {
55
"algorithm": {
66
"estimator": "RandomForestClassifier",
7-
"estimator_methods": { "training": "fit" },
8-
"estimator_params": { "n_estimators": 20, "max_depth": 4 }
7+
"estimator_methods": { "training": "fit" },
8+
"estimator_params": { "n_estimators": 100, "max_depth": 8 },
9+
"sklearnex_context": {"use_raw_input": true}
910
}
1011
},
1112
"synthetic data": {
1213
"data": [
13-
{ "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 10001000, "n_features": 100, "n_classes": 2 } }
14+
{ "source": "make_classification", "split_kwargs": { "train_size": 20000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 21000, "n_features": 200, "n_classes": 2 } }
1415
]
1516
}
1617
},
1718
"TEMPLATES": {
1819
"forestCls": {
1920
"SETS": [
2021
"sklearnex spmd implementation",
21-
"large scale strong 2k parameters",
22-
"synthetic data",
22+
"large scale strong <=64 parameters",
23+
"synthetic data",
2324
"spmd forest classification parameters"
2425
]
2526
}

configs/spmd/large_scale/knn_strong.json

+9-8
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,24 @@
33
"PARAMETERS_SETS": {
44
"spmd knn cls parameters": {
55
"algorithm": {
6-
"estimator": "KNeighborsClassifier",
6+
"estimator": "KNeighborsClassifier",
77
"estimator_params": {
88
"algorithm": "brute",
99
"metric": "minkowski",
1010
"p": 2,
1111
"weights": "uniform",
12-
"n_neighbors": 5
12+
"n_neighbors": 100
1313
},
14-
"estimator_methods": {
15-
"training": "fit",
16-
"inference": "predict"
17-
}
14+
"estimator_methods": {
15+
"training": "fit",
16+
"inference": "predict"
17+
},
18+
"sklearnex_context": { "use_raw_input": true }
1819
}
1920
},
2021
"synthetic classification data": {
2122
"data": [
22-
{ "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000000 }, "generation_kwargs": { "n_samples": 1500000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }
23+
{ "source": "make_classification", "split_kwargs": { "train_size": 3000000, "test_size": 2000000 }, "generation_kwargs": { "n_samples": 5000000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }
2324
]
2425
}
2526
},
@@ -28,7 +29,7 @@
2829
"SETS": [
2930
"synthetic classification data",
3031
"sklearnex spmd implementation",
31-
"large scale strong <=64 parameters",
32+
"large scale strong <=64 parameters",
3233
"spmd knn cls parameters"
3334
]
3435
}
+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd knn cls parameters": {
5+
"algorithm": {
6+
"estimator": "KNeighborsClassifier",
7+
"estimator_params": {
8+
"algorithm": "brute",
9+
"metric": "minkowski",
10+
"p": 2,
11+
"weights": "uniform"
12+
},
13+
"estimator_methods": {
14+
"training": "fit",
15+
"inference": "predict"
16+
},
17+
"sklearnex_context": { "use_raw_input": true }
18+
}
19+
},
20+
"synthetic classification data": [
21+
{ "data": { "source": "make_classification", "split_kwargs": { "train_size": 100000, "test_size": 100000}, "generation_kwargs": { "n_samples": 2000000, "n_features": 10, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }, "algorithm": { "estimator_params": { "n_neighbors": 5 } } },
22+
{ "data": { "source": "make_classification", "split_kwargs": { "train_size": 100000, "test_size": 10000}, "generation_kwargs": { "n_samples": 2000000, "n_features": 1000, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }, "algorithm": { "estimator_params": { "n_neighbors": 100 } } }
23+
]
24+
},
25+
"TEMPLATES": {
26+
"knn classifier": {
27+
"SETS": [
28+
"synthetic classification data",
29+
"sklearnex spmd implementation",
30+
"large scale 32 parameters",
31+
"spmd knn cls parameters"
32+
]
33+
}
34+
}
35+
}

configs/spmd/large_scale/knn.json renamed to configs/spmd/large_scale/knn_tier2.json

+10-9
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,24 @@
33
"PARAMETERS_SETS": {
44
"spmd knn cls parameters": {
55
"algorithm": {
6-
"estimator": "KNeighborsClassifier",
6+
"estimator": "KNeighborsClassifier",
77
"estimator_params": {
88
"algorithm": "brute",
99
"metric": "minkowski",
1010
"p": 2,
1111
"weights": "uniform",
12-
"n_neighbors": 5
12+
"n_neighbors": 5
1313
},
14-
"estimator_methods": {
15-
"training": "fit",
16-
"inference": "predict"
17-
}
18-
}
14+
"estimator_methods": {
15+
"training": "fit",
16+
"inference": "predict"
17+
},
18+
"sklearnex_context": { "use_raw_input": true }
19+
}
1920
},
2021
"synthetic classification data": {
2122
"data": [
22-
{ "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 5000 }, "generation_kwargs": { "n_samples": 5005000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }
23+
{ "source": "make_classification", "split_kwargs": { "train_size": 100, "test_size": 100}, "generation_kwargs": { "n_samples": 200, "n_features": 10, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }
2324
]
2425
}
2526
},
@@ -28,7 +29,7 @@
2829
"SETS": [
2930
"synthetic classification data",
3031
"sklearnex spmd implementation",
31-
"large scale 2k parameters",
32+
"large scale 2k parameters",
3233
"spmd knn cls parameters"
3334
]
3435
}

configs/spmd/large_scale/linreg.json

+5-4
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
"spmd linear parameters": {
55
"algorithm": {
66
"estimator": "LinearRegression",
7-
"estimator_methods": { "training": "fit" }
7+
"estimator_methods": { "training": "fit" },
8+
"sklearnex_context": { "use_raw_input": true }
89
}
910
},
1011
"synthetic data": {
1112
"data": [
12-
{ "source": "make_regression", "generation_kwargs": { "n_samples": 30005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 30000000, "test_size": 5000 } },
13-
{ "source": "make_regression", "generation_kwargs": { "n_samples": 305000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 300000, "test_size": 5000 } }
13+
{ "source": "make_regression", "generation_kwargs": { "n_samples": 100005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 100000000, "test_size": 5000 } },
14+
{ "source": "make_regression", "generation_kwargs": { "n_samples": 3005000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 3000000, "test_size": 5000 } }
1415
]
1516
}
1617
},
@@ -19,7 +20,7 @@
1920
"SETS": [
2021
"sklearnex spmd implementation",
2122
"large scale 2k parameters",
22-
"synthetic data",
23+
"synthetic data",
2324
"spmd linear parameters"
2425
]
2526
}

0 commit comments

Comments
 (0)