diff --git a/configs/README.md b/configs/README.md index 8d3c5ac2..e1cf8390 100644 --- a/configs/README.md +++ b/configs/README.md @@ -104,7 +104,7 @@ Configs have the three highest parameter keys: | `data`:`format` | `pandas` | `pandas`, `numpy`, `cudf` | Data format to use in benchmark. | | `data`:`order` | `F` | `C`, `F` | Data order to use in benchmark: contiguous(C) or Fortran. | | `data`:`dtype` | `float64` | | Data type to use in benchmark. | -| `data`:`distributed_split` | None | None, `rank_based` | Split type used to distribute data between machines in distributed algorithm. `None` type means usage of all data without split on all machines. `rank_based` type splits the data equally between machines with split sequence based on rank id from MPI. | +| `data`:`distributed_split` | None | None, `rank_based`, `sample_shift` | Split type used to distribute data between machines in distributed algorithm. `sample_shift`: Shift each data point in each rank by sqrt (rank id) * 0.003) + 1. `None` type means usage of all data without split on all machines. `rank_based` type splits the data equally between machines with split sequence based on rank id from MPI. | |

Algorithm parameters

|||| | `algorithm`:`library` | None | | Python module containing measured entity (class or function). | | `algorithm`:`device` | `default` | `default`, `cpu`, `gpu` | Device selected for computation. | diff --git a/configs/common/sklearn.json b/configs/common/sklearn.json index d7b13188..43051093 100644 --- a/configs/common/sklearn.json +++ b/configs/common/sklearn.json @@ -12,6 +12,11 @@ { "library": "sklearnex", "device": ["cpu", "gpu"] } ] }, + "sklearn-ex[gpu] implementations": { + "algorithm": [ + { "library": "sklearnex", "device": ["gpu"] } + ] + }, "sklearn-ex[preview] implementations": { "algorithm": [ { "library": "sklearn", "device": "cpu" }, diff --git a/configs/regular/batch_for_online.json b/configs/regular/batch_for_online.json new file mode 100644 index 00000000..973c4ed4 --- /dev/null +++ b/configs/regular/batch_for_online.json @@ -0,0 +1,85 @@ +{ + "INCLUDE": ["../common/sklearn.json"], + "PARAMETERS_SETS": { + "common": {"bench": {"n_runs": 10}}, + "basic_statistics data": { + "data": { + "source": "make_blobs", + "generation_kwargs": { + "centers": 1, + "n_samples": 12000000, + "n_features": [10, 100] + }, + "split_kwargs": {"ignore": true} + } + }, + "linear_regression data": { + "data": { + "source": "make_regression", + "split_kwargs": {"train_size": 0.2, "test_size": 0.8}, + "generation_kwargs": { + "n_samples": 12000000, + "n_features": [10, 100], + "n_informative": 5, + "noise": 2.0 + } + } + }, + "pca data": { + "data": { + "source": "make_blobs", + "generation_kwargs": { + "centers": 1, + "n_samples": 12000000, + "n_features": [10, 100] + }, + "split_kwargs": {"ignore": true} + } + }, + "basic_statistics": { + "algorithm": [ + { + "estimator": "BasicStatistics", + "library": "sklearnex.basic_statistics", + "estimator_methods": {"training": "fit"} + } + ] + }, + "covariance": { + "algorithm": [ + { + "estimator": "EmpiricalCovariance", + "library": "sklearnex.preview.covariance", + "estimator_methods": {"training": "fit"} + } + ] + }, + "linear_regression": { + "algorithm": [ + { + "estimator": "LinearRegression", + "library": "sklearnex.linear_model", + "estimator_methods": {"training": "fit"} + } + ] + }, + "pca": { + "algorithm": [ + { + "estimator": "PCA", + "library": "sklearnex.decomposition", + "estimator_methods": {"training": "fit"} + } + ] + } + }, + "TEMPLATES": { + "basic_statistics": {"SETS": ["common", "basic_statistics", "basic_statistics data", "sklearn-ex[gpu] implementations"]}, + "covariance": {"SETS": ["common", "basic_statistics data", "sklearn-ex[gpu] implementations", "covariance"]}, + "linear_regression": { + "SETS": ["common", "linear_regression", "linear_regression data", "sklearn-ex[gpu] implementations"] + }, + "pca": {"SETS": ["common", "pca", "pca data", "sklearn-ex[gpu] implementations"]} + } +} + diff --git a/configs/regular/bf16/basic_statistics.json b/configs/regular/bf16/basic_statistics.json new file mode 100644 index 00000000..671521ab --- /dev/null +++ b/configs/regular/bf16/basic_statistics.json @@ -0,0 +1,27 @@ +{ + "INCLUDE": ["../../common/sklearn.json"], + "PARAMETERS_SETS": { + "basic stats parameters": { + "algorithm": { + "estimator": "BasicStatistics" + }, + "data": { + "dtype": ["float32"] + } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } } + ] + } + }, + "TEMPLATES": { + "basic_statistics": { + "SETS": [ + "sklearn-ex[gpu] implementations", + "basic stats parameters", + "synthetic data" + ] + } + } +} diff --git a/configs/regular/bf16/covariance.json b/configs/regular/bf16/covariance.json new file mode 100644 index 00000000..1cd6ef4a --- /dev/null +++ b/configs/regular/bf16/covariance.json @@ -0,0 +1,28 @@ +{ + "INCLUDE": ["../../common/sklearn.json"], + "PARAMETERS_SETS": { + "covariance parameters": { + "algorithm": { + "estimator": "EmpiricalCovariance", + "library": "sklearnex.preview.covariance" + }, + "data": { + "dtype": ["float32"] + } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } } + ] + } + }, + "TEMPLATES": { + "covariance": { + "SETS": [ + "sklearn-ex[gpu] implementations", + "covariance parameters", + "synthetic data" + ] + } + } +} diff --git a/configs/regular/bf16/dbscan.json b/configs/regular/bf16/dbscan.json new file mode 100644 index 00000000..b91120e8 --- /dev/null +++ b/configs/regular/bf16/dbscan.json @@ -0,0 +1,41 @@ +{ + "INCLUDE": ["../../common/sklearn.json"], + "PARAMETERS_SETS": { + "common dbscan parameters": { + "algorithm": { + "estimator": "DBSCAN", + "estimator_params": { + "eps": "[SPECIAL_VALUE]distances_quantile:0.01", + "min_samples": 5, + "metric": "euclidean" + } + }, + "data": { + "dtype": ["float32"] + } + }, + "sklearn dbscan parameters": { + "algorithm": { + "estimator_params": { + "algorithm": "brute", + "n_jobs": "[SPECIAL_VALUE]physical_cpus" + } + } + }, + "synthetic dataset": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 10, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } } + ] + } + }, + "TEMPLATES": { + "sklearn dbscan": { + "SETS": [ + "sklearn-ex[gpu] implementations", + "common dbscan parameters", + "sklearn dbscan parameters", + "synthetic dataset" + ] + } + } +} diff --git a/configs/regular/bf16/forest.json b/configs/regular/bf16/forest.json new file mode 100644 index 00000000..845b73a2 --- /dev/null +++ b/configs/regular/bf16/forest.json @@ -0,0 +1,34 @@ +{ + "INCLUDE": ["../../common/sklearn.json"], + "PARAMETERS_SETS": { + "common forest params": { + "data": { + "dtype": ["float32"] + } + }, + "forest classifier params": { + "algorithm": {"estimator": "RandomForestClassifier"}, + "data": { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 501000, "n_features": 10, "n_classes": 2 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } } + }, + "forest regression params": { + "algorithm": {"estimator": "RandomForestRegressor"}, + "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 501000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } }} + } + }, + "TEMPLATES": { + "forest cls": { + "SETS": [ + "sklearn-ex[gpu] implementations", + "common forest params", + "forest classifier params" + ] + }, + "forest reg": { + "SETS": [ + "sklearn-ex[gpu] implementations", + "common forest params", + "forest regression params" + ] + } + } +} diff --git a/configs/regular/bf16/kmeans.json b/configs/regular/bf16/kmeans.json new file mode 100644 index 00000000..8a5323c5 --- /dev/null +++ b/configs/regular/bf16/kmeans.json @@ -0,0 +1,40 @@ +{ + "INCLUDE": ["../../common/sklearn.json"], + "PARAMETERS_SETS": { + "common kmeans parameters": { + "algorithm": { + "estimator": "KMeans", + "estimator_params": { + "n_clusters": "[SPECIAL_VALUE]auto", + "n_init": 1, + "max_iter": 30, + "tol": 1e-3, + "random_state": 42 + }, + "estimator_methods": { "inference": "predict" } + }, + "data": { + "dtype": ["float32"], + "preprocessing_kwargs": { "normalize": true } + } + }, + "sklearn kmeans parameters": { + "algorithm": { "estimator_params": { "init": "k-means++", "algorithm": "lloyd" } } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 100, "centers": 100 }, "algorithm": { "n_clusters": 100, "max_iter": 100 } } + ] + } + }, + "TEMPLATES": { + "sklearn kmeans": { + "SETS": [ + "sklearn-ex[gpu] implementations", + "common kmeans parameters", + "sklearn kmeans parameters", + "synthetic data" + ] + } + } +} diff --git a/configs/regular/bf16/knn.json b/configs/regular/bf16/knn.json new file mode 100644 index 00000000..fabf6d6d --- /dev/null +++ b/configs/regular/bf16/knn.json @@ -0,0 +1,56 @@ +{ + "INCLUDE": ["../../common/sklearn.json"], + "PARAMETERS_SETS": { + "common knn parameters": { + "algorithm": { + "estimator_params": { + "n_neighbors": [10, 100], + "weights": "uniform" + } + }, + "data": { + "dtype": ["float32"], + "preprocessing_kwargs": { "normalize": true } + } + }, + "sklearn knn parameters": { + "algorithm": { "estimator_params": { "n_jobs": "[SPECIAL_VALUE]physical_cpus" } } + }, + "synthetic classification data": { + "algorithm": { + "estimator": "KNeighborsClassifier", + "estimator_params": { "algorithm": "brute", "metric": "minkowski", "p": [1, 2] } + }, + "data": [ + { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 5001000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } } + ] + }, + "synthetic regression data": { + "algorithm": { + "estimator": "KNeighborsRegressor", + "estimator_params": { "algorithm": "brute", "metric": "minkowski", "p": [1, 2] } + }, + "data": [ + { "source": "make_regression", "split_kwargs": { "train_size": 5000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 5001000, "n_features": 100, "noise":1.5 } } + ] + } + }, + "TEMPLATES": { + "sklearn brute knn clsf": { + "SETS": [ + "sklearn-ex[gpu] implementations", + "common knn parameters", + "sklearn knn parameters", + "synthetic classification data" + ] + }, + "sklearn brute knn reg": { + "SETS": [ + "sklearn-ex[gpu] implementations", + "common knn parameters", + "sklearn knn parameters", + "synthetic regression data" + ] + } + } +} diff --git a/configs/regular/bf16/linear_model.json b/configs/regular/bf16/linear_model.json new file mode 100644 index 00000000..23aa49c0 --- /dev/null +++ b/configs/regular/bf16/linear_model.json @@ -0,0 +1,33 @@ +{ + "INCLUDE": ["../../common/sklearn.json"], + "PARAMETERS_SETS": { + "synthetic data": { + "data": [ + { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 3000000, "test_size": 5000 } } + ] + }, + "common linear parameters": { + "algorithm": { + "estimator": "LinearRegression", + "estimator_params": { "fit_intercept": true, "copy_X": true } + }, + "data": { + "dtype": ["float32"], + "order": "C" + } + }, + "sklearn linear parameters": { + "estimator_params": { "n_jobs": "[SPECIAL_VALUE]physical_cpus" } + } + }, + "TEMPLATES": { + "sklearn linear": { + "SETS": [ + "sklearn-ex[gpu] implementations", + "common linear parameters", + "sklearn linear parameters", + "synthetic data" + ] + } + } +} diff --git a/configs/regular/bf16/logreg.json b/configs/regular/bf16/logreg.json new file mode 100644 index 00000000..863d67f9 --- /dev/null +++ b/configs/regular/bf16/logreg.json @@ -0,0 +1,45 @@ +{ + "INCLUDE": ["../../common/sklearn.json"], + "PARAMETERS_SETS": { + "common logreg parameters": { + "algorithm": { + "estimator": "LogisticRegression", + "estimator_methods": { "inference": "predict" }, + "estimator_params": { + "penalty": "l2", + "tol": 1e-4, + "C": 1.0, + "l1_ratio": null, + "max_iter": 20 + } + }, + "data": { + "dtype": ["float32"] + } + }, + "sklearn logreg parameters": { + "algorithm": { + "estimator_params": { + "solver": "newton-cg", + "n_jobs": "[SPECIAL_VALUE]physical_cpus", + "random_state": 42 + } + } + }, + "synthetic data": { + "data": [ + { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 5001000, "n_features": 10, "n_classes": 2 } } + ] + } + }, + "TEMPLATES": { + "sklearn logreg": { + "SETS": [ + "sklearn-ex[gpu] implementations", + "common logreg parameters", + "sklearn logreg parameters", + "synthetic data" + ] + } + } +} diff --git a/configs/regular/bf16/pca.json b/configs/regular/bf16/pca.json new file mode 100644 index 00000000..e5113261 --- /dev/null +++ b/configs/regular/bf16/pca.json @@ -0,0 +1,36 @@ +{ + "INCLUDE": ["../../common/sklearn.json"], + "PARAMETERS_SETS": { + "pca parameters": { + "algorithm": { + "estimator": "PCA", + "estimator_params": { + "n_components": 3, + "copy": true, + "whiten": false, + "svd_solver": "covariance_eigh", + "tol": 0.0, + "iterated_power": 15, + "random_state": 42 + } + }, + "data": { + "dtype": ["float32"] + } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } } + ] + } + }, + "TEMPLATES": { + "sklearn pca": { + "SETS": [ + "sklearn-ex[gpu] implementations", + "pca parameters", + "synthetic data" + ] + } + } +} diff --git a/configs/regular/dbscan.json b/configs/regular/dbscan.json index 71dcdc9b..711c15cd 100644 --- a/configs/regular/dbscan.json +++ b/configs/regular/dbscan.json @@ -58,19 +58,11 @@ "TEMPLATES": { "sklearn dbscan": { "SETS": [ - "sklearn-ex[cpu,gpu] implementations", + "sklearn-ex[gpu] implementations", "common dbscan parameters", "sklearn dbscan parameters", "dbscan datasets" ] - }, - "cuml dbscan": { - "SETS": [ - "cuml implementation", - "common dbscan parameters", - "cuml dbscan parameters", - "dbscan datasets" - ] } } } diff --git a/configs/regular/ensemble.json b/configs/regular/ensemble.json index 56e37e77..f01c1383 100644 --- a/configs/regular/ensemble.json +++ b/configs/regular/ensemble.json @@ -90,7 +90,7 @@ "TEMPLATES": { "sklearn ensemble classification": { "SETS": [ - "sklearn-ex[cpu,gpu] implementations", + "sklearn-ex[gpu] implementations", "common ensemble params", "sklearn ensemble classifier params", "ensemble classification data" @@ -98,27 +98,11 @@ }, "sklearn ensemble regression": { "SETS": [ - "sklearn-ex[cpu,gpu] implementations", + "sklearn-ex[gpu] implementations", "common ensemble params", "sklearn ensemble regressor params", "ensemble regression data" ] - }, - "cuml ensemble classification": { - "SETS": [ - "cuml implementation", - "common ensemble params", - "cuml ensemble classifier params", - "ensemble classification data" - ] - }, - "cuml ensemble regression": { - "SETS": [ - "cuml implementation", - "common ensemble params", - "cuml ensemble regressor params", - "ensemble regression data" - ] } } } diff --git a/configs/regular/kmeans.json b/configs/regular/kmeans.json index bcb7026f..756e2bab 100644 --- a/configs/regular/kmeans.json +++ b/configs/regular/kmeans.json @@ -70,19 +70,11 @@ "TEMPLATES": { "sklearn kmeans": { "SETS": [ - "sklearn-ex[cpu,gpu] implementations", + "sklearn-ex[gpu] implementations", "common kmeans parameters", "sklearn kmeans parameters", "kmeans datasets" ] - }, - "cuml kmeans": { - "SETS": [ - "cuml implementation", - "common kmeans parameters", - "cuml kmeans parameters", - "kmeans datasets" - ] } } } diff --git a/configs/regular/knn.json b/configs/regular/knn.json index e1cd8a75..a69c6864 100644 --- a/configs/regular/knn.json +++ b/configs/regular/knn.json @@ -74,47 +74,17 @@ "TEMPLATES": { "sklearn brute knn clsf": { "SETS": [ - "sklearn-ex[cpu,gpu] implementations", + "sklearn-ex[gpu] implementations", "common knn parameters", "sklearn knn parameters", "brute knn algorithm - classification data" ] }, - "sklearn kd_tree knn clsf": { - "SETS": [ - "sklearn-ex[cpu] implementations", - "common knn parameters", - "sklearn knn parameters", - "kd_tree knn algorithm - classification data" - ] - }, "sklearn brute knn regr": { "SETS": [ - "sklearn-ex[cpu,gpu] implementations", - "common knn parameters", - "sklearn knn parameters", - "brute knn algorithm - regression data" - ] - }, - "sklearn kd_tree knn regr": { - "SETS": [ - "sklearn-ex[cpu] implementations", + "sklearn-ex[gpu] implementations", "common knn parameters", "sklearn knn parameters", - "kd_tree knn algorithm - regression data" - ] - }, - "cuml brute knn clsf": { - "SETS": [ - "cuml implementation", - "common knn parameters", - "brute knn algorithm - classification data" - ] - }, - "cuml brute knn regr": { - "SETS": [ - "cuml implementation", - "common knn parameters", "brute knn algorithm - regression data" ] } diff --git a/configs/regular/linear_model.json b/configs/regular/linear_model.json index eb1b79ba..3040c82d 100644 --- a/configs/regular/linear_model.json +++ b/configs/regular/linear_model.json @@ -85,34 +85,12 @@ "TEMPLATES": { "sklearn linear": { "SETS": [ - "sklearn-ex[cpu,gpu] implementations", + "sklearn-ex[gpu] implementations", "common linear parameters", "sklearn linear parameters", "regression datasets" ] }, - "sklearn ridge": { - "SETS": [ - "sklearn-ex[cpu] implementations", - "common ridge parameters", - "sklearn ridge parameters", - "regression datasets" - ] - }, - "sklearn lasso": { - "SETS": [ - "sklearn-ex[cpu] implementations", - "common lasso parameters", - "regression datasets" - ] - }, - "sklearn elasticnet": { - "SETS": [ - "sklearn-ex[cpu] implementations", - "common elasticnet parameters", - "regression datasets" - ] - }, "cuml linear": { "SETS": [ "cuml implementation", @@ -120,30 +98,6 @@ "cuml L2 parameters", "regression datasets" ] - }, - "cuml ridge": { - "SETS": [ - "cuml implementation", - "common ridge parameters", - "cuml L2 parameters", - "regression datasets" - ] - }, - "cuml lasso": { - "SETS": [ - "cuml implementation", - "common lasso parameters", - "cuml L1 parameters", - "regression datasets" - ] - }, - "cuml elasticnet": { - "SETS": [ - "cuml implementation", - "common elasticnet parameters", - "cuml L1 parameters", - "regression datasets" - ] } } } diff --git a/configs/regular/logreg.json b/configs/regular/logreg.json index a94a7fcf..a8323b02 100644 --- a/configs/regular/logreg.json +++ b/configs/regular/logreg.json @@ -54,19 +54,11 @@ "TEMPLATES": { "sklearn logreg": { "SETS": [ - "sklearn-ex[cpu,gpu] implementations", + "sklearn-ex[gpu] implementations", "common logreg parameters", "sklearn logreg parameters", "logreg datasets" ] - }, - "cuml logreg": { - "SETS": [ - "cuml implementation", - "common logreg parameters", - "cuml logreg parameters", - "logreg datasets" - ] } } } diff --git a/configs/regular/pca.json b/configs/regular/pca.json index 582acc9e..e26d3f44 100644 --- a/configs/regular/pca.json +++ b/configs/regular/pca.json @@ -46,14 +46,7 @@ "TEMPLATES": { "sklearn pca": { "SETS": [ - "sklearn-ex[cpu,gpu] implementations", - "pca parameters", - "pca datasets" - ] - }, - "cuml pca": { - "SETS": [ - "cuml implementation", + "sklearn-ex[gpu] implementations", "pca parameters", "pca datasets" ] diff --git a/configs/spmd/large_scale/basic_stats.json b/configs/spmd/large_scale/basic_stats.json new file mode 100644 index 00000000..f8f44e4e --- /dev/null +++ b/configs/spmd/large_scale/basic_stats.json @@ -0,0 +1,31 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd basicstats parameters": { + "algorithm": { + "estimator": "BasicStatistics", + "estimator_methods": { "training": "fit" }, + "sklearnex_context": { "use_raw_input": true } + }, + "data": { + "split_kwargs": { "test_size": 0.0001 } + } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000, "n_features": 10, "centers": 1 } }, + { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 1000, "centers": 1 } } + ] + } + }, + "TEMPLATES": { + "basicstats": { + "SETS": [ + "sklearnex spmd implementation", + "large scale 2k parameters", + "synthetic data", + "spmd basicstats parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/basic_stats_strong.json b/configs/spmd/large_scale/basic_stats_strong.json new file mode 100644 index 00000000..0c7c671e --- /dev/null +++ b/configs/spmd/large_scale/basic_stats_strong.json @@ -0,0 +1,30 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd basicstats parameters": { + "algorithm": { + "estimator": "BasicStatistics", + "estimator_methods": { "training": "fit" }, + "sklearnex_context": { "use_raw_input": true } + }, + "data": { + "split_kwargs": { "test_size": 0.0001 } + } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 25000000, "n_features": 100, "centers": 1 } } + ] + } + }, + "TEMPLATES": { + "basicstats": { + "SETS": [ + "sklearnex spmd implementation", + "large scale strong <=64 parameters", + "synthetic data", + "spmd basicstats parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/covariance.json b/configs/spmd/large_scale/covariance.json new file mode 100644 index 00000000..7f4d6d7d --- /dev/null +++ b/configs/spmd/large_scale/covariance.json @@ -0,0 +1,31 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd basicstats parameters": { + "algorithm": { + "estimator": "EmpiricalCovariance", + "estimator_methods": { "training": "fit" }, + "sklearnex_context": { "use_raw_input": true } + }, + "data": { + "split_kwargs": { "test_size": 0.0001 } + } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000, "n_features": 10, "centers": 1 } }, + { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 1000, "centers": 1 } } + ] + } + }, + "TEMPLATES": { + "covariance": { + "SETS": [ + "sklearnex spmd implementation", + "large scale 2k parameters", + "synthetic data", + "spmd basicstats parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/covariance_strong.json b/configs/spmd/large_scale/covariance_strong.json new file mode 100644 index 00000000..8e388801 --- /dev/null +++ b/configs/spmd/large_scale/covariance_strong.json @@ -0,0 +1,30 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd basicstats parameters": { + "algorithm": { + "estimator": "EmpiricalCovariance", + "estimator_methods": { "training": "fit" }, + "sklearnex_context": { "use_raw_input": true } + }, + "data": { + "split_kwargs": { "test_size": 0.0001 } + } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 25000000, "n_features": 100, "centers": 1 } } + ] + } + }, + "TEMPLATES": { + "covariance": { + "SETS": [ + "sklearnex spmd implementation", + "large scale strong <=64 parameters", + "synthetic data", + "spmd basicstats parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/dbscan.json b/configs/spmd/large_scale/dbscan.json new file mode 100644 index 00000000..bf60b7cc --- /dev/null +++ b/configs/spmd/large_scale/dbscan.json @@ -0,0 +1,36 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "../../regular/dbscan.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd dbscan parameters": { + "algorithm": { + "estimator": "DBSCAN", + "estimator_methods": { + "training": "fit" + }, + "estimator_params" : { + "eps": 10, "min_samples": 5 + }, + "sklearnex_context": { "use_raw_input": true } + }, + "data": { + "dtype": "float64" + } + }, + "synthetic dataset": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 40000, "n_features": 100, "centers": 10 } } + ] + } + }, + "TEMPLATES": { + "dbscan": { + "SETS": [ + "common dbscan parameters", + "synthetic dataset", + "sklearnex spmd implementation", + "large scale <=64 parameters", + "spmd dbscan parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/dbscan_strong.json b/configs/spmd/large_scale/dbscan_strong.json new file mode 100644 index 00000000..5e7ab322 --- /dev/null +++ b/configs/spmd/large_scale/dbscan_strong.json @@ -0,0 +1,36 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "../../regular/dbscan.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd dbscan parameters": { + "algorithm": { + "estimator": "DBSCAN", + "estimator_methods": { + "training": "fit" + }, + "estimator_params" : { + "eps": 15, "min_samples": 50 + }, + "sklearnex_context": { "use_raw_input": true } + }, + "data": { + "dtype": "float64" + } + }, + "synthetic dataset": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 4000000, "n_features": 100, "centers": 10 } } + ] + } + }, + "TEMPLATES": { + "dbscan": { + "SETS": [ + "common dbscan parameters", + "synthetic dataset", + "sklearnex spmd implementation", + "large scale strong <=64 parameters", + "spmd dbscan parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/forest_max_samples.json b/configs/spmd/large_scale/forest_max_samples.json new file mode 100644 index 00000000..95affb16 --- /dev/null +++ b/configs/spmd/large_scale/forest_max_samples.json @@ -0,0 +1,28 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd forest classification parameters": { + "algorithm": { + "estimator": "RandomForestClassifier", + "estimator_methods": { "training": "fit" }, + "estimator_params": { "n_estimators": 20, "max_depth": 10 }, + "sklearnex_context": { "use_raw_input": true } + } + }, + "synthetic data": { + "data": [ + { "source": "make_classification", "split_kwargs": { "train_size": 1000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 1001000, "n_features": 100, "n_classes": 2 } } + ] + } + }, + "TEMPLATES": { + "forestCls": { + "SETS": [ + "sklearnex spmd implementation", + "large scale 32 parameters", + "synthetic data", + "spmd forest classification parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/forest_no_max_samples.json b/configs/spmd/large_scale/forest_no_max_samples.json new file mode 100644 index 00000000..c371371b --- /dev/null +++ b/configs/spmd/large_scale/forest_no_max_samples.json @@ -0,0 +1,27 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd forest classification parameters": { + "algorithm": { + "estimator": "RandomForestClassifier", + "estimator_params": { "n_estimators": 100, "max_depth": 7 }, + "sklearnex_context": { "use_raw_input": true } + } + }, + "synthetic data": { + "data": [ + { "source": "make_classification", "split_kwargs": { "train_size": 1000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 1001000, "n_features": 100, "n_classes": 2 }, "n_informative": "[SPECIAL_VALUE]0.5" } + ] + } + }, + "TEMPLATES": { + "forestCls": { + "SETS": [ + "sklearnex spmd implementation", + "large scale 2k parameters", + "synthetic data", + "spmd forest classification parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/forest_strong.json b/configs/spmd/large_scale/forest_strong.json new file mode 100644 index 00000000..653c70dc --- /dev/null +++ b/configs/spmd/large_scale/forest_strong.json @@ -0,0 +1,28 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd forest classification parameters": { + "algorithm": { + "estimator": "RandomForestClassifier", + "estimator_methods": { "training": "fit" }, + "estimator_params": { "n_estimators": 100, "max_depth": 8 }, + "sklearnex_context": {"use_raw_input": true} + } + }, + "synthetic data": { + "data": [ + { "source": "make_classification", "split_kwargs": { "train_size": 20000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 21000, "n_features": 200, "n_classes": 2 } } + ] + } + }, + "TEMPLATES": { + "forestCls": { + "SETS": [ + "sklearnex spmd implementation", + "large scale strong <=64 parameters", + "synthetic data", + "spmd forest classification parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/incremental.json b/configs/spmd/large_scale/incremental.json new file mode 100644 index 00000000..195074ee --- /dev/null +++ b/configs/spmd/large_scale/incremental.json @@ -0,0 +1,77 @@ +{ "INCLUDE": [ ], + "PARAMETERS_SETS": { + "common incremental raw gpu params": { + "algorithm": { + "device": "gpu", + "sklearnex_context": { "use_raw_input": true } + }, + "data": { + "format":"dpctl", + "order": "C" + } + }, + "statistical batches and data": [ + { "algorithm": { "num_batches": { "training": [1,2,6,12]} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000, "n_features": 10, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": 2} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 50000000, "n_features": 10, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": 6} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 16666667, "n_features": 10, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": 12} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 8333333, "n_features": 10, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": [1,2,6,12]} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 1000, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": 2} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 500000, "n_features": 1000, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": 6} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 166667, "n_features": 1000, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": 12} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 83333, "n_features": 1000, "centers": 1 } } } + ], + "regression batches and data": [ + { "algorithm": { "num_batches": { "training": [1,2,6,12]} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 100000000, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": 2} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 50000000, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": 6} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 16666667, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": 12} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 8333333, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": [1,2,6,12]} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 3000000, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": 2} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 1500000, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": 6} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 500000, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": 12} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 250000, "test_size": 5000 } } } + ], + "covariance": { + "algorithm": { + "estimator": "IncrementalEmpiricalCovariance", + "library": "sklearnex", + "estimator_methods": {"training": "partial_fit"} + }, + "data": { + "split_kwargs": { "test_size": 0.0001 } + } + }, + "basic_statistics": { + "algorithm": { + "estimator": "IncrementalBasicStatistics", + "library": "sklearnex", + "estimator_methods": {"training": "partial_fit"} + }, + "data": { + "split_kwargs": { "test_size": 0.0001 } + } + }, + "linear_regression": { + "algorithm": { + "estimator": "IncrementalLinearRegression", + "library": "sklearnex", + "estimator_methods": {"training": "partial_fit"} + } + }, + "pca": { + "algorithm": { + "estimator": "IncrementalPCA", + "library": "sklearnex.preview", + "estimator_methods": {"training": "partial_fit"} + }, + "data": { + "split_kwargs": { "test_size": 0.0001 } + } + } + }, + "TEMPLATES": { + "basic_statistics": { "SETS": ["common incremental raw gpu params", "basic_statistics", "statistical batches and data"] }, + "covariance": { "SETS": ["common incremental raw gpu params", "covariance", "statistical batches and data"] }, + "linear_regression": { "SETS": ["common incremental raw gpu params", "linear_regression", "regression batches and data"] }, + "pca": { "SETS": ["common incremental raw gpu params", "pca", "statistical batches and data"] } + } +} diff --git a/configs/spmd/large_scale/kmeans_narrow_weak.json b/configs/spmd/large_scale/kmeans_narrow_weak.json new file mode 100644 index 00000000..69f0b6ac --- /dev/null +++ b/configs/spmd/large_scale/kmeans_narrow_weak.json @@ -0,0 +1,33 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd kmeans parameters": { + "algorithm": { + "estimator": "KMeans", + "estimator_params": { + "algorithm": "lloyd", + "max_iter": 20, + "n_clusters": 10, + "random_state": 42 + }, + "estimator_methods": { "training": "fit", "inference": "predict" }, + "sklearnex_context": { "use_raw_input": true } + } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 2000000, "n_features": 100, "centers": 2000, "cluster_std": 3, "center_box": 100.0}} + ] + } + }, + "TEMPLATES": { + "kmeans": { + "SETS": [ + "synthetic data", + "sklearnex spmd implementation", + "large scale 2k parameters sample shift", + "spmd kmeans parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/kmeans_strong.json b/configs/spmd/large_scale/kmeans_strong.json new file mode 100644 index 00000000..90a1ea3f --- /dev/null +++ b/configs/spmd/large_scale/kmeans_strong.json @@ -0,0 +1,33 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd kmeans parameters": { + "algorithm": { + "estimator": "KMeans", + "estimator_params": { + "algorithm": "lloyd", + "max_iter": 20, + "n_clusters": 100 + }, + "estimator_methods": { "training": "fit", "inference": "predict" }, + "sklearnex_context": { "use_raw_input": true } + } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 25000000, "n_features": 100, "centers": 100 }} + ] + } + }, + "TEMPLATES": { + "kmeans": { + "SETS": [ + "synthetic data", + "sklearnex spmd implementation", + "large scale strong <=64 parameters", + "spmd kmeans parameters" + ] + } + } +} + diff --git a/configs/spmd/large_scale/kmeans_wide_weak.json b/configs/spmd/large_scale/kmeans_wide_weak.json new file mode 100644 index 00000000..5520f10a --- /dev/null +++ b/configs/spmd/large_scale/kmeans_wide_weak.json @@ -0,0 +1,34 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd kmeans parameters": { + "algorithm": { + "estimator": "KMeans", + "estimator_params": { + "algorithm": "lloyd", + "max_iter": 20, + "n_clusters": 10, + "random_state": 42 + }, + "estimator_methods": { "training": "fit", "inference": "predict" }, + "sklearnex_context": { "use_raw_input": true } + } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 1000, "centers": 2000}} + ] + } + }, + "TEMPLATES": { + "kmeans": { + "SETS": [ + "synthetic data", + "sklearnex spmd implementation", + "large scale 2k parameters", + "spmd kmeans parameters" + ] + } + } +} + diff --git a/configs/spmd/large_scale/knn_strong.json b/configs/spmd/large_scale/knn_strong.json new file mode 100644 index 00000000..36daf3f1 --- /dev/null +++ b/configs/spmd/large_scale/knn_strong.json @@ -0,0 +1,37 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd knn cls parameters": { + "algorithm": { + "estimator": "KNeighborsClassifier", + "estimator_params": { + "algorithm": "brute", + "metric": "minkowski", + "p": 2, + "weights": "uniform", + "n_neighbors": 100 + }, + "estimator_methods": { + "training": "fit", + "inference": "predict" + }, + "sklearnex_context": { "use_raw_input": true } + } + }, + "synthetic classification data": { + "data": [ + { "source": "make_classification", "split_kwargs": { "train_size": 3000000, "test_size": 2000000 }, "generation_kwargs": { "n_samples": 5000000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } } + ] + } + }, + "TEMPLATES": { + "knn classifier": { + "SETS": [ + "synthetic classification data", + "sklearnex spmd implementation", + "large scale strong <=64 parameters", + "spmd knn cls parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/knn_tier1.json b/configs/spmd/large_scale/knn_tier1.json new file mode 100644 index 00000000..c230cc4e --- /dev/null +++ b/configs/spmd/large_scale/knn_tier1.json @@ -0,0 +1,35 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd knn cls parameters": { + "algorithm": { + "estimator": "KNeighborsClassifier", + "estimator_params": { + "algorithm": "brute", + "metric": "minkowski", + "p": 2, + "weights": "uniform" + }, + "estimator_methods": { + "training": "fit", + "inference": "predict" + }, + "sklearnex_context": { "use_raw_input": true } + } + }, + "synthetic classification data": [ + { "data": { "source": "make_classification", "split_kwargs": { "train_size": 100000, "test_size": 100000}, "generation_kwargs": { "n_samples": 2000000, "n_features": 10, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }, "algorithm": { "estimator_params": { "n_neighbors": 5 } } }, + { "data": { "source": "make_classification", "split_kwargs": { "train_size": 100000, "test_size": 10000}, "generation_kwargs": { "n_samples": 2000000, "n_features": 1000, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }, "algorithm": { "estimator_params": { "n_neighbors": 100 } } } + ] + }, + "TEMPLATES": { + "knn classifier": { + "SETS": [ + "synthetic classification data", + "sklearnex spmd implementation", + "large scale 32 parameters", + "spmd knn cls parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/knn_tier2.json b/configs/spmd/large_scale/knn_tier2.json new file mode 100644 index 00000000..ff0032e2 --- /dev/null +++ b/configs/spmd/large_scale/knn_tier2.json @@ -0,0 +1,37 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd knn cls parameters": { + "algorithm": { + "estimator": "KNeighborsClassifier", + "estimator_params": { + "algorithm": "brute", + "metric": "minkowski", + "p": 2, + "weights": "uniform", + "n_neighbors": 5 + }, + "estimator_methods": { + "training": "fit", + "inference": "predict" + }, + "sklearnex_context": { "use_raw_input": true } + } + }, + "synthetic classification data": { + "data": [ + { "source": "make_classification", "split_kwargs": { "train_size": 100, "test_size": 100}, "generation_kwargs": { "n_samples": 200, "n_features": 10, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } } + ] + } + }, + "TEMPLATES": { + "knn classifier": { + "SETS": [ + "synthetic classification data", + "sklearnex spmd implementation", + "large scale 2k parameters", + "spmd knn cls parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/large_scale.json b/configs/spmd/large_scale/large_scale.json new file mode 100644 index 00000000..28626dc9 --- /dev/null +++ b/configs/spmd/large_scale/large_scale.json @@ -0,0 +1,85 @@ +{ + "PARAMETERS_SETS": { + "large scale default parameters": { + "data": { + "dtype": "float64", + "distributed_split": "None" + }, + "bench": { + "mpi_params": {"n": [1,2,6,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + } + }, + "large scale strong parameters": { + "data": { + "dtype": "float64", + "distributed_split": "rank_based" + }, + "bench": { + "mpi_params": {"n": [1,2,6,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + } + }, + "large scale 2k parameters": { + "data": { + "dtype": "float64", + "distributed_split": "None" + }, + "bench": { + "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + } + }, + "large scale 2k parameters sample shift": { + "data": { + "dtype": "float64", + "distributed_split": "sample_shift" + }, + "bench": { + "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + } + }, + "large scale 32 parameters": { + "data": { + "dtype": "float64", + "distributed_split": "None" + }, + "bench": { + "mpi_params": {"n": [1,2,6,12,24,48,96,192,384], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + } + }, + "large scale <=64 parameters": { + "data": { + "dtype": "float64", + "distributed_split": "None" + }, + "bench": { + "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + } + }, + "large scale strong 2k parameters": { + "data": { + "dtype": "float64", + "distributed_split": "rank_based" + }, + "bench": { + "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + } + }, + "large scale strong <=64 parameters": { + "data": { + "dtype": "float64", + "distributed_split": "rank_based" + }, + "bench": { + "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + } + }, + "large scale impi parameters": { + "data": { + "dtype": "float64", + "distributed_split": "no" + }, + "bench": { + "mpi_params": {"n": [1,2,4,6,9,12], "ppn": 12} + } + } + } +} diff --git a/configs/spmd/large_scale/linreg.json b/configs/spmd/large_scale/linreg.json new file mode 100644 index 00000000..7c7fb035 --- /dev/null +++ b/configs/spmd/large_scale/linreg.json @@ -0,0 +1,28 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd linear parameters": { + "algorithm": { + "estimator": "LinearRegression", + "estimator_methods": { "training": "fit" }, + "sklearnex_context": { "use_raw_input": true } + } + }, + "synthetic data": { + "data": [ + { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 100000000, "test_size": 5000 } }, + { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 3000000, "test_size": 5000 } } + ] + } + }, + "TEMPLATES": { + "linreg": { + "SETS": [ + "sklearnex spmd implementation", + "large scale 2k parameters", + "synthetic data", + "spmd linear parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/linreg_strong.json b/configs/spmd/large_scale/linreg_strong.json new file mode 100644 index 00000000..ac5a6c7a --- /dev/null +++ b/configs/spmd/large_scale/linreg_strong.json @@ -0,0 +1,27 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd linear parameters": { + "algorithm": { + "estimator": "LinearRegression", + "estimator_methods": { "training": "fit" }, + "sklearnex_context": { "use_raw_input": true } + } + }, + "synthetic data": { + "data": [ + { "source": "make_regression", "generation_kwargs": { "n_samples": 25005000, "n_features": 100, "noise": 1.25 }, "split_kwargs": { "train_size": 25000000, "test_size": 5000 } } + ] + } + }, + "TEMPLATES": { + "linreg": { + "SETS": [ + "sklearnex spmd implementation", + "large scale strong <=64 parameters", + "synthetic data", + "spmd linear parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/logreg.json b/configs/spmd/large_scale/logreg.json new file mode 100644 index 00000000..b7b4b998 --- /dev/null +++ b/configs/spmd/large_scale/logreg.json @@ -0,0 +1,30 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "../logreg.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd logreg2 parameters": { + "algorithm":{ + "estimator": "LogisticRegression", + "estimator_methods": { "inference": "predict" }, + "estimator_params": { "max_iter": 10 }, + "sklearnex_context": { "use_raw_input": true } + } + }, + "synthetic data": { + "data": [ + { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 5001000, "n_features": 10, "n_classes": 2 } }, + { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 501000, "n_features": 1000, "n_classes": 2, "n_informative": 40, "n_clusters_per_class": 3, "flip_y": 0.05 } } + ] + } + }, + "TEMPLATES": { + "logreg": { + "SETS": [ + "sklearnex spmd implementation", + "large scale 2k parameters", + "spmd logreg parameters", + "synthetic data", + "spmd logreg2 parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/logreg_strong.json b/configs/spmd/large_scale/logreg_strong.json new file mode 100644 index 00000000..219840ea --- /dev/null +++ b/configs/spmd/large_scale/logreg_strong.json @@ -0,0 +1,29 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "../logreg.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd logreg2 parameters": { + "algorithm":{ + "estimator": "LogisticRegression", + "estimator_methods": { "inference": "predict" }, + "estimator_params": { "max_iter": 16 }, + "sklearnex_context": { "use_raw_input": true } + } + }, + "synthetic data": { + "data": [ + { "source": "make_classification", "split_kwargs": { "train_size": 12000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 12001000, "n_features": 200, "n_classes": 2, "n_informative": 40, "n_clusters_per_class": 3, "flip_y": 0.05 } } + ] + } + }, + "TEMPLATES": { + "logreg": { + "SETS": [ + "sklearnex spmd implementation", + "large scale strong 64 parameters", + "spmd logreg parameters", + "synthetic data", + "spmd logreg2 parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/pca.json b/configs/spmd/large_scale/pca.json new file mode 100644 index 00000000..ce56bd8a --- /dev/null +++ b/configs/spmd/large_scale/pca.json @@ -0,0 +1,31 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd pca parameters": { + "algorithm": { + "estimator": "PCA", + "estimator_methods": { "training": "fit", "inference": "" }, + "sklearnex_context": { "use_raw_input": true } + }, + "data": { + "split_kwargs": { "test_size": 0.0001 } + } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000, "n_features": 10, "centers": 1 } }, + { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 1000, "centers": 1 } } + ] + } + }, + "TEMPLATES": { + "linreg": { + "SETS": [ + "sklearnex spmd implementation", + "large scale 2k parameters", + "synthetic data", + "spmd pca parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/pca_strong.json b/configs/spmd/large_scale/pca_strong.json new file mode 100644 index 00000000..70461ba7 --- /dev/null +++ b/configs/spmd/large_scale/pca_strong.json @@ -0,0 +1,30 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd pca parameters": { + "algorithm": { + "estimator": "PCA", + "estimator_methods": { "training": "fit", "inference": "" }, + "sklearnex_context": { "use_raw_input": true } + }, + "data": { + "split_kwargs": { "test_size": 0.0001 } + } + }, + "synthetic data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 25000000, "n_features": 100, "centers": 1 } } + ] + } + }, + "TEMPLATES": { + "linreg": { + "SETS": [ + "sklearnex spmd implementation", + "large scale strong <=64 parameters", + "synthetic data", + "spmd pca parameters" + ] + } + } +} diff --git a/sklbench/benchmarks/custom_function.py b/sklbench/benchmarks/custom_function.py index 25abb900..34b223ed 100644 --- a/sklbench/benchmarks/custom_function.py +++ b/sklbench/benchmarks/custom_function.py @@ -64,9 +64,13 @@ def get_function_args(bench_case: BenchCase, x_train, y_train, x_test, y_test) - def measure_function_instance(bench_case, function_instance, args: Tuple, kwargs: Dict): metrics = dict() - metrics["time[ms]"], metrics["time std[ms]"], _ = measure_case( - bench_case, function_instance, *args, **kwargs - ) + ( + metrics["time[ms]"], + metrics["time std[ms]"], + metrics["first iter[ms]"], + metrics["box filter mean[ms]"], + metrics["box filter std[ms]"], + ) = measure_case(bench_case, function_instance, *args, **kwargs) return metrics diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index f9c0a75e..819f5fb5 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -66,15 +66,15 @@ def get_estimator(library_name: str, estimator_name: str): f"Using first {classes_map[estimator_name][0]}." ) estimator = classes_map[estimator_name][0] - if not issubclass(estimator, BaseEstimator): - logger.info(f"{estimator} estimator is not derived from sklearn's BaseEstimator") + # if not issubclass(estimator, BaseEstimator): + # logger.info(f"{estimator} estimator is not derived from sklearn's BaseEstimator") return estimator def get_estimator_methods(bench_case: BenchCase) -> Dict[str, List[str]]: # default estimator methods estimator_methods = { - "training": ["fit"], + "training": ["partial_fit", "fit"], "inference": ["predict", "predict_proba", "transform"], } for stage in estimator_methods.keys(): @@ -134,6 +134,9 @@ def get_subset_metrics_of_estimator( and isinstance(iterations[0], Union[Numeric, NumpyNumeric].__args__) ): metrics.update({"iterations": int(iterations[0])}) + if hasattr(estimator_instance, "_n_inner_iter"): + inner_iters = estimator_instance._n_inner_iter + metrics.update({"inner_iters": int(inner_iters)}) if task == "classification": y_pred = convert_to_numpy(estimator_instance.predict(x)) metrics.update( @@ -142,7 +145,7 @@ def get_subset_metrics_of_estimator( "balanced accuracy": float(balanced_accuracy_score(y_compat, y_pred)), } ) - if hasattr(estimator_instance, "predict_proba") and not ( + """if hasattr(estimator_instance, "predict_proba") and not ( hasattr(estimator_instance, "probability") and getattr(estimator_instance, "probability") == False ): @@ -162,7 +165,7 @@ def get_subset_metrics_of_estimator( ), "logloss": float(log_loss(y_compat, y_pred_proba)), } - ) + )""" elif task == "regression": y_pred = convert_to_numpy(estimator_instance.predict(x)) metrics.update( @@ -188,19 +191,6 @@ def get_subset_metrics_of_estimator( } ) elif task == "clustering": - if hasattr(estimator_instance, "inertia_"): - # compute inertia manually using distances to cluster centers - # provided by KMeans.transform - metrics.update( - { - "inertia": float( - np.power( - convert_to_numpy(estimator_instance.transform(x)).min(axis=1), - 2, - ).sum() - ) - } - ) if hasattr(estimator_instance, "predict"): y_pred = convert_to_numpy(estimator_instance.predict(x)) metrics.update( @@ -334,34 +324,43 @@ def verify_patching(stream: io.StringIO, function_name) -> bool: return acceleration_lines > 0 and fallback_lines == 0 -def create_online_function(method_instance, data_args, batch_size): - n_batches = data_args[0].shape[0] // batch_size +def create_online_function( + estimator_instance, method_instance, data_args, num_batches, batch_size +): if "y" in list(inspect.signature(method_instance).parameters): def ndarray_function(x, y): - for i in range(n_batches): + for i in range(num_batches): method_instance( x[i * batch_size : (i + 1) * batch_size], y[i * batch_size : (i + 1) * batch_size], ) + if hasattr(estimator_instance, "_onedal_finalize_fit"): + estimator_instance._onedal_finalize_fit() def dataframe_function(x, y): - for i in range(n_batches): + for i in range(num_batches): method_instance( x.iloc[i * batch_size : (i + 1) * batch_size], y.iloc[i * batch_size : (i + 1) * batch_size], ) + if hasattr(estimator_instance, "_onedal_finalize_fit"): + estimator_instance._onedal_finalize_fit() else: def ndarray_function(x): - for i in range(n_batches): + for i in range(num_batches): method_instance(x[i * batch_size : (i + 1) * batch_size]) + if hasattr(estimator_instance, "_onedal_finalize_fit"): + estimator_instance._onedal_finalize_fit() def dataframe_function(x): - for i in range(n_batches): + for i in range(num_batches): method_instance(x.iloc[i * batch_size : (i + 1) * batch_size]) + if hasattr(estimator_instance, "_onedal_finalize_fit"): + estimator_instance._onedal_finalize_fit() if "ndarray" in str(type(data_args[0])): return ndarray_function @@ -414,12 +413,28 @@ def measure_sklearn_estimator( data_args = (x_train,) else: data_args = (x_test,) - batch_size = get_bench_case_value( - bench_case, f"algorithm:batch_size:{stage}" - ) - if batch_size is not None: + + if method == "partial_fit": + num_batches = get_bench_case_value(bench_case, "data:num_batches") + batch_size = get_bench_case_value(bench_case, "data:batch_size") + + if batch_size is None: + if num_batches is None: + num_batches = 5 + batch_size = ( + data_args[0].shape[0] + num_batches - 1 + ) // num_batches + if num_batches is None: + num_batches = ( + data_args[0].shape[0] + batch_size - 1 + ) // batch_size + method_instance = create_online_function( - method_instance, data_args, batch_size + estimator_instance, + method_instance, + data_args, + num_batches, + batch_size, ) # daal4py model builders enabling branch if enable_modelbuilders and stage == "inference": @@ -429,17 +444,14 @@ def measure_sklearn_estimator( estimator_instance.get_booster() ) method_instance = getattr(daal_model, method) - metrics[method] = dict() ( metrics[method]["time[ms]"], metrics[method]["time std[ms]"], - _, + metrics[method]["first iter[ms]"], + metrics[method]["box filter mean[ms]"], + metrics[method]["box filter std[ms]"], ) = measure_case(bench_case, method_instance, *data_args) - if batch_size is not None: - metrics[method]["throughput[samples/ms]"] = ( - (data_args[0].shape[0] // batch_size) * batch_size - ) / metrics[method]["time[ms]"] if ensure_sklearnex_patching: full_method_name = f"{estimator_class.__name__}.{method}" sklearnex_logging_stream.seek(0) @@ -490,7 +502,18 @@ def main(bench_case: BenchCase, filters: List[BenchCase]): estimator_params = get_bench_case_value( bench_case, "algorithm:estimator_params", dict() ) + # logger.debug("estimator params: " + str(estimator_params)) + if ( + "DBSCAN" in str(estimator_name) + and get_bench_case_value(bench_case, "data:distributed_split", None) + != "rank_based" + ): + if "min_samples" in estimator_params: + from mpi4py import MPI + estimator_params["min_samples"] = ( + MPI.COMM_WORLD.Get_size() * estimator_params["min_samples"] + ) # get estimator methods for measurement estimator_methods = get_estimator_methods(bench_case) @@ -521,12 +544,12 @@ def main(bench_case: BenchCase, filters: List[BenchCase]): result_template = enrich_result(result_template, bench_case) if "assume_finite" in context_params: result_template["assume_finite"] = context_params["assume_finite"] - if hasattr(estimator_instance, "get_params"): - estimator_params = estimator_instance.get_params() + # if hasattr(estimator_instance, "get_params"): + # estimator_params = estimator_instance.get_params() # note: "handle" is not JSON-serializable if "handle" in estimator_params: del estimator_params["handle"] - logger.debug(f"Estimator parameters:\n{custom_format(estimator_params)}") + # logger.debug(f"Estimator parameters:\n{custom_format(estimator_params)}") result_template.update(estimator_params) data_descs = { diff --git a/sklbench/datasets/__init__.py b/sklbench/datasets/__init__.py index 093875c4..d4bddca1 100644 --- a/sklbench/datasets/__init__.py +++ b/sklbench/datasets/__init__.py @@ -67,6 +67,11 @@ def load_data(bench_case: BenchCase) -> Tuple[Dict, Dict]: generation_kwargs = get_bench_case_value( bench_case, "data:generation_kwargs", dict() ) + if "center_box" in generation_kwargs: + generation_kwargs["center_box"] = ( + -1 * generation_kwargs["center_box"], + generation_kwargs["center_box"], + ) return load_sklearn_synthetic_data( function_name=source, input_kwargs=generation_kwargs, diff --git a/sklbench/datasets/common.py b/sklbench/datasets/common.py index e7ed0160..28b62fe6 100644 --- a/sklbench/datasets/common.py +++ b/sklbench/datasets/common.py @@ -136,11 +136,11 @@ def cache_wrapper(**kwargs): data_name = kwargs["data_name"] data_cache = kwargs["data_cache"] if len(get_filenames_by_prefix(data_cache, data_name)) > 0: - logger.info(f'Loading "{data_name}" dataset from cache files') + # logger.info(f'Loading "{data_name}" dataset from cache files') data = load_data_from_cache(data_cache, data_name) data_desc = load_data_description(data_cache, data_name) else: - logger.info(f'Loading "{data_name}" dataset from scratch') + # logger.info(f'Loading "{data_name}" dataset from scratch') data, data_desc = function(**kwargs) save_data_to_cache(data, data_cache, data_name) save_data_description(data_desc, data_cache, data_name) diff --git a/sklbench/datasets/transformer.py b/sklbench/datasets/transformer.py index d2e63e9e..c63d3b20 100644 --- a/sklbench/datasets/transformer.py +++ b/sklbench/datasets/transformer.py @@ -14,6 +14,7 @@ # limitations under the License. # =============================================================================== +import math import os import numpy as np @@ -109,7 +110,23 @@ def split_and_transform_data(bench_case, data, data_description): y_train, y_test = None, None distributed_split = get_bench_case_value(bench_case, "data:distributed_split", None) - if distributed_split == "rank_based": + # knn_split_train = ( + # "KNeighbors" in get_bench_case_value(bench_case, "algorithm:estimator", "") + # and int(get_bench_case_value(bench_case, "bench:mpi_params:n", 1)) > 1 + # ) + if distributed_split == "sample_shift": + from mpi4py import MPI + + rank = MPI.COMM_WORLD.Get_rank() + # This approach was chosen to shift the distribution of synthetic data on each rank + # for KMeans weak scaling tests. When testing with a large number of tiles, this method avoids duplication of data on each rank. + # For example, if there are 24,576 tiles being used, each data point in the 24,576th tile would be multiplied by 1.47. + # The factor 0.003 was chosen arbitrarily and can be fine-tuned for other datasets and algorithms if needed. + adjust_number = (math.sqrt(rank) * 0.003) + 1 + x_test = x_test * adjust_number + x_train = x_train * adjust_number + + elif distributed_split == "rank_based": from mpi4py import MPI comm = MPI.COMM_WORLD @@ -129,10 +146,12 @@ def split_and_transform_data(bench_case, data, data_description): x_train[train_start:train_end], y_train[train_start:train_end], ) - x_test, y_test = x_test[test_start:test_end], y_test[test_start:test_end] + if distributed_split == "rank_based": + x_test, y_test = x_test[test_start:test_end], y_test[test_start:test_end] else: x_train = x_train[train_start:train_end] - x_test = x_test[test_start:test_end] + if distributed_split == "rank_based": + x_test = x_test[test_start:test_end] device = get_bench_case_value(bench_case, "algorithm:device", None) common_data_format = get_bench_case_value(bench_case, "data:format", "pandas") @@ -178,7 +197,7 @@ def split_and_transform_data(bench_case, data, data_description): "format": data_format, "order": data_order, "dtype": data_dtype, - "samples": converted_data.shape[0], + "samples (per rank)": converted_data.shape[0], } if len(converted_data.shape) == 2 and converted_data.shape[1] > 1: data_description[subset_name]["features"] = converted_data.shape[1] diff --git a/sklbench/report/implementation.py b/sklbench/report/implementation.py index 28fa2bb0..2bc3a05e 100644 --- a/sklbench/report/implementation.py +++ b/sklbench/report/implementation.py @@ -16,7 +16,7 @@ import argparse import json -from typing import Dict, List +from typing import Dict, Hashable, List import openpyxl as xl import pandas as pd @@ -32,6 +32,9 @@ METRICS = { "lower is better": [ "time[ms]", + "first iter[ms]", + "box filter mean[ms]", + "box filter std[ms]", "iterations", # classification "logloss", @@ -239,6 +242,7 @@ def get_result_tables_as_df( bench_cases = pd.DataFrame( [flatten_dict(bench_case) for bench_case in results["bench_cases"]] ) + bench_cases = bench_cases.map(lambda x: str(x) if not isinstance(x, Hashable) else x) if compatibility_mode: bench_cases = transform_results_to_compatible(bench_cases) @@ -248,7 +252,7 @@ def get_result_tables_as_df( bench_cases.drop(columns=[column], inplace=True) diffby_columns.remove(column) - return split_df_by_columns(bench_cases, splitby_columns) + return split_df_by_columns(bench_cases, splitby_columns, False) def get_summary_from_df(df: pd.DataFrame, df_name: str) -> pd.DataFrame: @@ -258,7 +262,10 @@ def get_summary_from_df(df: pd.DataFrame, df_name: str) -> pd.DataFrame: # only relative improvements are included in summary currently if len(column) > 1 and column[1] == f"{metric_name} relative improvement": metric_columns.append(column) - summary = df[metric_columns].aggregate(geomean_wrapper, axis=0).to_frame().T + if metric_columns: + summary = df[metric_columns].aggregate(geomean_wrapper, axis=0).to_frame().T + else: + summary = pd.DataFrame() summary.index = pd.Index([df_name]) return summary diff --git a/sklbench/runner/commands_helper.py b/sklbench/runner/commands_helper.py index 09e61369..aace5643 100644 --- a/sklbench/runner/commands_helper.py +++ b/sklbench/runner/commands_helper.py @@ -45,6 +45,10 @@ def generate_benchmark_command( mpi_prefix = "mpirun" for mpi_param_name, mpi_param_value in mpi_params.items(): mpi_prefix += f" -{mpi_param_name} {mpi_param_value}" + if mpi_param_name == "-hostfile": + import os + + mpi_prefix += os.environ.get("PBS_NODEFILE") command_prefix = f"{mpi_prefix} {command_prefix}" # 3. Intel(R) VTune* profiling command prefix vtune_profiling = get_bench_case_value(bench_case, "bench:vtune_profiling") diff --git a/sklbench/utils/logger.py b/sklbench/utils/logger.py index 90940630..250c5fa6 100644 --- a/sklbench/utils/logger.py +++ b/sklbench/utils/logger.py @@ -19,7 +19,7 @@ logger = logging.Logger("sklbench") logging_channel = logging.StreamHandler() -logging_formatter = logging.Formatter("%(levelname)s:%(name)s: %(message)s") +logging_formatter = logging.Formatter("%(asctime)s - %(levelname)s:%(name)s: %(message)s") logging_channel.setFormatter(logging_formatter) logger.addHandler(logging_channel) diff --git a/sklbench/utils/measurement.py b/sklbench/utils/measurement.py index 989daefd..3677e760 100644 --- a/sklbench/utils/measurement.py +++ b/sklbench/utils/measurement.py @@ -40,6 +40,22 @@ def box_filter(timing, left=0.2, right=0.8): return np.mean(result) * 1000, np.std(result) * 1000 +def large_scale_measurements(timing): + first_iter = timing[0] * 1000 + mean = np.mean(timing[1:]) * 1000 + stdev = np.std(timing[1:]) * 1000 + timing_sorted = np.sort(timing) + Q1, Q3 = np.percentile(timing_sorted, [25, 75]) + IQ = Q3 - Q1 + lower, upper = Q1 - 1.5 * IQ, Q3 + 1.5 * IQ + + filtered_times = timing_sorted[(timing_sorted >= lower) & (timing_sorted <= upper)] + + box_filter_mean = np.mean(filtered_times) * 1000 if filtered_times.size > 0 else 0 + box_filter_stdev = np.std(filtered_times) * 1000 if filtered_times.size > 0 else 0 + return mean, stdev, first_iter, box_filter_mean, box_filter_stdev + + def measure_time( func, *args, @@ -56,12 +72,16 @@ def measure_time( ) times = [] func_return_value = None + inners, iters = [], [] while len(times) < n_runs: if enable_itt and itt_is_available: itt.resume() t0 = timeit.default_timer() func_return_value = func(*args, **kwargs) t1 = timeit.default_timer() + if hasattr(func, "__self__") and hasattr(func.__self__, "_n_inner_iter"): + inners.append(func.__self__._n_inner_iter) + iters.append(func.__self__.n_iter_) if enable_itt and itt_is_available: itt.pause() times.append(t1 - t0) @@ -72,13 +92,27 @@ def measure_time( f"exceeded time limit ({time_limit} seconds)" ) break - mean, std = box_filter(times) - if std / mean > std_mean_ratio: - logger.warning( - f'Measured "std / mean" time ratio of "{str(func)}" function is higher ' - f"than threshold ({round(std / mean, 3)} vs. {std_mean_ratio})" - ) - return mean, std, func_return_value + + try: + from mpi4py import MPI + + if MPI.COMM_WORLD.Get_rank() == 0: + logger.debug( + "iters across n runs: " + + str(iters) + + ", inner iters across n runs: " + + str(inners) + ) + logger.debug(f"Runtime for all {n_runs} iterations: {times}") + except ModuleNotFoundError: + logger.debug(f"Runtime for all {n_runs} iterations: {times}") + # mean, std = box_filter(times) + # if std / mean > std_mean_ratio: + # logger.warning( + # f'Measured "std / mean" time ratio of "{str(func)}" function is higher ' + # f"than threshold ({round(std / mean, 3)} vs. {std_mean_ratio})" + # ) + return large_scale_measurements(times) # wrapper to get measurement params from benchmarking case