IntelPython · ethanglaser · Aug 30, 2024 · Sep 18, 2024 · Sep 18, 2024 · Sep 21, 2024
@@ -104,7 +104,7 @@ Configs have the three highest parameter keys:
 | `data`:`format` | `pandas` | `pandas`, `numpy`, `cudf` | Data format to use in benchmark. |
 | `data`:`order` | `F` | `C`, `F` | Data order to use in benchmark: contiguous(C) or Fortran. |
 | `data`:`dtype` | `float64` |  | Data type to use in benchmark. |
-| `data`:`distributed_split` | None | None, `rank_based` | Split type used to distribute data between machines in distributed algorithm. `None` type means usage of all data without split on all machines. `rank_based` type splits the data equally between machines with split sequence based on rank id from MPI. |
+| `data`:`distributed_split` | None | None, `rank_based`, `sample_shift` | Split type used to distribute data between machines in distributed algorithm. `sample_shift`: Shift each data point in each rank by sqrt (rank id) * 0.003) + 1. `None` type means usage of all data without split on all machines. `rank_based` type splits the data equally between machines with split sequence based on rank id from MPI. |
 |<h3>Algorithm parameters</h3>||||
 | `algorithm`:`library` | None |  | Python module containing measured entity (class or function). |
 | `algorithm`:`device` | `default` | `default`, `cpu`, `gpu` | Device selected for computation. |

@@ -12,6 +12,11 @@
                 { "library": "sklearnex", "device": ["cpu", "gpu"] }
             ]
         },
+        "sklearn-ex[gpu] implementations": {
+            "algorithm": [
+                { "library": "sklearnex", "device": ["gpu"] }
+            ]
+        },
         "sklearn-ex[preview] implementations": {
             "algorithm": [
                 { "library": "sklearn", "device": "cpu" },

@@ -0,0 +1,85 @@
+{
+    "INCLUDE": ["../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common": {"bench": {"n_runs": 10}},
+        "basic_statistics data": {
+            "data": {
+                "source": "make_blobs",
+                "generation_kwargs": {
+                    "centers": 1,
+                    "n_samples": 12000000,
+                    "n_features": [10, 100]
+                },
+                "split_kwargs": {"ignore": true}
+            }
+        },
+        "linear_regression data": {
+            "data": {
+                "source": "make_regression",
+                "split_kwargs": {"train_size": 0.2, "test_size": 0.8},
+                "generation_kwargs": {
+                    "n_samples": 12000000,
+                    "n_features": [10, 100],
+                    "n_informative": 5,
+                    "noise": 2.0
+                }
+            }
+        },
+        "pca data": {
+            "data": {
+                "source": "make_blobs",
+                "generation_kwargs": {
+                    "centers": 1,
+                    "n_samples": 12000000,
+                    "n_features": [10, 100]
+                },
+                "split_kwargs": {"ignore": true}
+            }
+        },
+        "basic_statistics": {
+            "algorithm": [
+                {
+                    "estimator": "BasicStatistics",
+                    "library": "sklearnex.basic_statistics",
+                    "estimator_methods": {"training": "fit"}
+                }
+            ]
+        },
+        "covariance": {
+            "algorithm": [
+                {
+                    "estimator": "EmpiricalCovariance",
+                    "library": "sklearnex.preview.covariance",
+                    "estimator_methods": {"training": "fit"}
+                }
+            ]
+        },
+        "linear_regression": {
+            "algorithm": [
+                {
+                    "estimator": "LinearRegression",
+                    "library": "sklearnex.linear_model",
+                    "estimator_methods": {"training": "fit"}
+                }
+            ]
+        },
+        "pca": {
+            "algorithm": [
+                {
+                    "estimator": "PCA",
+                    "library": "sklearnex.decomposition",
+                    "estimator_methods": {"training": "fit"}
+                }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "basic_statistics": {"SETS": ["common", "basic_statistics", "basic_statistics data", "sklearn-ex[gpu] implementations"]},
+        "covariance": {"SETS": ["common", "basic_statistics data", "sklearn-ex[gpu] implementations", "covariance"]},
+        "linear_regression": {
+            "SETS": ["common", "linear_regression", "linear_regression data", "sklearn-ex[gpu] implementations"]
+        },
+        "pca": {"SETS": ["common", "pca", "pca data", "sklearn-ex[gpu] implementations"]}
+    }
+}
+
@@ -0,0 +1,27 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "basic stats parameters": {
+            "algorithm": {
+                "estimator": "BasicStatistics"
+            },
+            "data": {
+                "dtype": ["float32"]
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000,  "n_features": 10, "centers": 1 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "basic_statistics": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "basic stats parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
@@ -0,0 +1,28 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "covariance parameters": {
+            "algorithm": {
+                "estimator": "EmpiricalCovariance",
+		"library": "sklearnex.preview.covariance"
+            },
+            "data": {
+                "dtype": ["float32"]
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000,  "n_features": 10, "centers": 1 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "covariance": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "covariance parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
@@ -0,0 +1,41 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common dbscan parameters": {
+            "algorithm": {
+                "estimator": "DBSCAN",
+                "estimator_params": {
+                    "eps": "[SPECIAL_VALUE]distances_quantile:0.01",
+                    "min_samples": 5,
+                    "metric": "euclidean"
+                }
+            },
+            "data": {
+                "dtype": ["float32"]
+            }
+        },
+        "sklearn dbscan parameters": {
+            "algorithm": {
+                "estimator_params": {
+                    "algorithm": "brute",
+                    "n_jobs": "[SPECIAL_VALUE]physical_cpus"
+                }
+            }
+        },
+        "synthetic dataset": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000,  "n_features": 10, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } }
+            ]
+	    }
+    },
+    "TEMPLATES": {
+        "sklearn dbscan": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common dbscan parameters",
+                "sklearn dbscan parameters",
+                "synthetic dataset"
+            ]
+        }
+    }
+}
@@ -0,0 +1,34 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common forest params": {
+            "data": {
+                "dtype": ["float32"]
+            }
+        },
+        "forest classifier params": {
+            "algorithm": {"estimator": "RandomForestClassifier"},
+	    "data": { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 501000, "n_features": 10, "n_classes": 2 },  "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } }
+        },
+	"forest regression params": {
+            "algorithm": {"estimator": "RandomForestRegressor"},
+            "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 501000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } }}
+        }
+    },
+    "TEMPLATES": {
+        "forest cls": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common forest params",
+                "forest classifier params"
+            ]
+        },
+	"forest reg": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common forest params",
+                "forest regression params"
+            ]
+        }
+    }
+}
@@ -0,0 +1,40 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common kmeans parameters": {
+            "algorithm": {
+                "estimator": "KMeans",
+                "estimator_params": {
+                    "n_clusters": "[SPECIAL_VALUE]auto",
+                    "n_init": 1,
+                    "max_iter": 30,
+                    "tol": 1e-3,
+                    "random_state": 42
+                },
+                "estimator_methods": { "inference": "predict" }
+            },
+            "data": {
+                "dtype": ["float32"],
+                "preprocessing_kwargs": { "normalize": true }
+            }
+        },
+        "sklearn kmeans parameters": {
+            "algorithm": { "estimator_params": { "init": "k-means++", "algorithm": "lloyd" } }
+        },
+        "synthetic data": {
+                "data": [
+                    { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000,  "n_features": 100, "centers": 100 }, "algorithm": { "n_clusters": 100, "max_iter": 100 } }
+                ]
+        }
+    },
+    "TEMPLATES": {
+        "sklearn kmeans": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common kmeans parameters",
+                "sklearn kmeans parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
@@ -0,0 +1,56 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common knn parameters": {
+            "algorithm": {
+                "estimator_params": {
+                    "n_neighbors": [10, 100],
+                    "weights": "uniform"
+                }
+            },
+            "data": {
+                "dtype": ["float32"],
+                "preprocessing_kwargs": { "normalize": true }
+            }
+        },
+        "sklearn knn parameters": {
+            "algorithm": { "estimator_params": { "n_jobs": "[SPECIAL_VALUE]physical_cpus" } }
+        },
+        "synthetic classification data": {
+	    "algorithm": {
+                "estimator": "KNeighborsClassifier",
+                "estimator_params": { "algorithm": "brute", "metric": "minkowski", "p": [1, 2] }
+            },
+            "data": [
+		        { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 },   "generation_kwargs": {  "n_samples": 5001000,  "n_features": 100, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } }
+            ]
+        },
+        "synthetic regression data": {
+	    "algorithm": {
+                "estimator": "KNeighborsRegressor",
+                "estimator_params": { "algorithm": "brute", "metric": "minkowski", "p": [1, 2] }
+            },
+            "data": [
+		        { "source": "make_regression", "split_kwargs": { "train_size": 5000000, "test_size": 1000 },   "generation_kwargs": {  "n_samples": 5001000,  "n_features": 100, "noise":1.5 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "sklearn brute knn clsf": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common knn parameters",
+                "sklearn knn parameters",
+                "synthetic classification data"
+            ]
+        },
+	"sklearn brute knn reg": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common knn parameters",
+                "sklearn knn parameters",
+                "synthetic regression data"
+            ]
+        }
+    }
+}
@@ -0,0 +1,33 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "synthetic data": {
+            "data": [
+                { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 3000000, "test_size": 5000 } }
+            ]
+        },
+        "common linear parameters": {
+            "algorithm": {
+                "estimator": "LinearRegression",
+                "estimator_params": { "fit_intercept": true, "copy_X": true }
+            },
+            "data": {
+                "dtype": ["float32"],
+                "order": "C"
+            }
+        },
+        "sklearn linear parameters": {
+            "estimator_params": { "n_jobs": "[SPECIAL_VALUE]physical_cpus" }
+        }
+    },
+    "TEMPLATES": {
+        "sklearn linear": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common linear parameters",
+                "sklearn linear parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}