diff --git a/configs/README.md b/configs/README.md
index 8d3c5ac2..e1cf8390 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -104,7 +104,7 @@ Configs have the three highest parameter keys:
 | `data`:`format` | `pandas` | `pandas`, `numpy`, `cudf` | Data format to use in benchmark. |
 | `data`:`order` | `F` | `C`, `F` | Data order to use in benchmark: contiguous(C) or Fortran. |
 | `data`:`dtype` | `float64` |  | Data type to use in benchmark. |
-| `data`:`distributed_split` | None | None, `rank_based` | Split type used to distribute data between machines in distributed algorithm. `None` type means usage of all data without split on all machines. `rank_based` type splits the data equally between machines with split sequence based on rank id from MPI. |
+| `data`:`distributed_split` | None | None, `rank_based`, `sample_shift` | Split type used to distribute data between machines in distributed algorithm. `sample_shift`: Shift each data point in each rank by sqrt (rank id) * 0.003) + 1. `None` type means usage of all data without split on all machines. `rank_based` type splits the data equally between machines with split sequence based on rank id from MPI. |
 |<h3>Algorithm parameters</h3>||||
 | `algorithm`:`library` | None |  | Python module containing measured entity (class or function). |
 | `algorithm`:`device` | `default` | `default`, `cpu`, `gpu` | Device selected for computation. |
diff --git a/configs/common/sklearn.json b/configs/common/sklearn.json
index d7b13188..43051093 100644
--- a/configs/common/sklearn.json
+++ b/configs/common/sklearn.json
@@ -12,6 +12,11 @@
                 { "library": "sklearnex", "device": ["cpu", "gpu"] }
             ]
         },
+        "sklearn-ex[gpu] implementations": {
+            "algorithm": [
+                { "library": "sklearnex", "device": ["gpu"] }
+            ]
+        },
         "sklearn-ex[preview] implementations": {
             "algorithm": [
                 { "library": "sklearn", "device": "cpu" },
diff --git a/configs/regular/batch_for_online.json b/configs/regular/batch_for_online.json
new file mode 100644
index 00000000..973c4ed4
--- /dev/null
+++ b/configs/regular/batch_for_online.json
@@ -0,0 +1,85 @@
+{
+    "INCLUDE": ["../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common": {"bench": {"n_runs": 10}},
+        "basic_statistics data": {
+            "data": {
+                "source": "make_blobs",
+                "generation_kwargs": {
+                    "centers": 1,
+                    "n_samples": 12000000,
+                    "n_features": [10, 100]
+                },
+                "split_kwargs": {"ignore": true}
+            }
+        },
+        "linear_regression data": {
+            "data": {
+                "source": "make_regression",
+                "split_kwargs": {"train_size": 0.2, "test_size": 0.8},
+                "generation_kwargs": {
+                    "n_samples": 12000000,
+                    "n_features": [10, 100],
+                    "n_informative": 5,
+                    "noise": 2.0
+                }
+            }
+        },
+        "pca data": {
+            "data": {
+                "source": "make_blobs",
+                "generation_kwargs": {
+                    "centers": 1,
+                    "n_samples": 12000000,
+                    "n_features": [10, 100]
+                },
+                "split_kwargs": {"ignore": true}
+            }
+        },
+        "basic_statistics": {
+            "algorithm": [
+                {
+                    "estimator": "BasicStatistics",
+                    "library": "sklearnex.basic_statistics",
+                    "estimator_methods": {"training": "fit"}
+                }
+            ]
+        },
+        "covariance": {
+            "algorithm": [
+                {
+                    "estimator": "EmpiricalCovariance",
+                    "library": "sklearnex.preview.covariance",
+                    "estimator_methods": {"training": "fit"}
+                }
+            ]
+        },
+        "linear_regression": {
+            "algorithm": [
+                {
+                    "estimator": "LinearRegression",
+                    "library": "sklearnex.linear_model",
+                    "estimator_methods": {"training": "fit"}
+                }
+            ]
+        },
+        "pca": {
+            "algorithm": [
+                {
+                    "estimator": "PCA",
+                    "library": "sklearnex.decomposition",
+                    "estimator_methods": {"training": "fit"}
+                }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "basic_statistics": {"SETS": ["common", "basic_statistics", "basic_statistics data", "sklearn-ex[gpu] implementations"]},
+        "covariance": {"SETS": ["common", "basic_statistics data", "sklearn-ex[gpu] implementations", "covariance"]},
+        "linear_regression": {
+            "SETS": ["common", "linear_regression", "linear_regression data", "sklearn-ex[gpu] implementations"]
+        },
+        "pca": {"SETS": ["common", "pca", "pca data", "sklearn-ex[gpu] implementations"]}
+    }
+}
+
diff --git a/configs/regular/bf16/basic_statistics.json b/configs/regular/bf16/basic_statistics.json
new file mode 100644
index 00000000..671521ab
--- /dev/null
+++ b/configs/regular/bf16/basic_statistics.json
@@ -0,0 +1,27 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "basic stats parameters": {
+            "algorithm": {
+                "estimator": "BasicStatistics"
+            },
+            "data": {
+                "dtype": ["float32"]
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000,  "n_features": 10, "centers": 1 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "basic_statistics": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "basic stats parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/covariance.json b/configs/regular/bf16/covariance.json
new file mode 100644
index 00000000..1cd6ef4a
--- /dev/null
+++ b/configs/regular/bf16/covariance.json
@@ -0,0 +1,28 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "covariance parameters": {
+            "algorithm": {
+                "estimator": "EmpiricalCovariance",
+		"library": "sklearnex.preview.covariance"
+            },
+            "data": {
+                "dtype": ["float32"]
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000,  "n_features": 10, "centers": 1 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "covariance": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "covariance parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/dbscan.json b/configs/regular/bf16/dbscan.json
new file mode 100644
index 00000000..b91120e8
--- /dev/null
+++ b/configs/regular/bf16/dbscan.json
@@ -0,0 +1,41 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common dbscan parameters": {
+            "algorithm": {
+                "estimator": "DBSCAN",
+                "estimator_params": {
+                    "eps": "[SPECIAL_VALUE]distances_quantile:0.01",
+                    "min_samples": 5,
+                    "metric": "euclidean"
+                }
+            },
+            "data": {
+                "dtype": ["float32"]
+            }
+        },
+        "sklearn dbscan parameters": {
+            "algorithm": {
+                "estimator_params": {
+                    "algorithm": "brute",
+                    "n_jobs": "[SPECIAL_VALUE]physical_cpus"
+                }
+            }
+        },
+        "synthetic dataset": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000,  "n_features": 10, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } }
+            ]
+	    }
+    },
+    "TEMPLATES": {
+        "sklearn dbscan": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common dbscan parameters",
+                "sklearn dbscan parameters",
+                "synthetic dataset"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/forest.json b/configs/regular/bf16/forest.json
new file mode 100644
index 00000000..845b73a2
--- /dev/null
+++ b/configs/regular/bf16/forest.json
@@ -0,0 +1,34 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common forest params": {
+            "data": {
+                "dtype": ["float32"]
+            }
+        },
+        "forest classifier params": {
+            "algorithm": {"estimator": "RandomForestClassifier"},
+	    "data": { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 501000, "n_features": 10, "n_classes": 2 },  "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } }
+        },
+	"forest regression params": {
+            "algorithm": {"estimator": "RandomForestRegressor"},
+            "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 501000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } }}
+        }
+    },
+    "TEMPLATES": {
+        "forest cls": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common forest params",
+                "forest classifier params"
+            ]
+        },
+	"forest reg": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common forest params",
+                "forest regression params"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/kmeans.json b/configs/regular/bf16/kmeans.json
new file mode 100644
index 00000000..8a5323c5
--- /dev/null
+++ b/configs/regular/bf16/kmeans.json
@@ -0,0 +1,40 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common kmeans parameters": {
+            "algorithm": {
+                "estimator": "KMeans",
+                "estimator_params": {
+                    "n_clusters": "[SPECIAL_VALUE]auto",
+                    "n_init": 1,
+                    "max_iter": 30,
+                    "tol": 1e-3,
+                    "random_state": 42
+                },
+                "estimator_methods": { "inference": "predict" }
+            },
+            "data": {
+                "dtype": ["float32"],
+                "preprocessing_kwargs": { "normalize": true }
+            }
+        },
+        "sklearn kmeans parameters": {
+            "algorithm": { "estimator_params": { "init": "k-means++", "algorithm": "lloyd" } }
+        },
+        "synthetic data": {
+                "data": [
+                    { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000,  "n_features": 100, "centers": 100 }, "algorithm": { "n_clusters": 100, "max_iter": 100 } }
+                ]
+        }
+    },
+    "TEMPLATES": {
+        "sklearn kmeans": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common kmeans parameters",
+                "sklearn kmeans parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/knn.json b/configs/regular/bf16/knn.json
new file mode 100644
index 00000000..fabf6d6d
--- /dev/null
+++ b/configs/regular/bf16/knn.json
@@ -0,0 +1,56 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common knn parameters": {
+            "algorithm": {
+                "estimator_params": {
+                    "n_neighbors": [10, 100],
+                    "weights": "uniform"
+                }
+            },
+            "data": {
+                "dtype": ["float32"],
+                "preprocessing_kwargs": { "normalize": true }
+            }
+        },
+        "sklearn knn parameters": {
+            "algorithm": { "estimator_params": { "n_jobs": "[SPECIAL_VALUE]physical_cpus" } }
+        },
+        "synthetic classification data": {
+	    "algorithm": {
+                "estimator": "KNeighborsClassifier",
+                "estimator_params": { "algorithm": "brute", "metric": "minkowski", "p": [1, 2] }
+            },
+            "data": [
+		        { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 },   "generation_kwargs": {  "n_samples": 5001000,  "n_features": 100, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } }
+            ]
+        },
+        "synthetic regression data": {
+	    "algorithm": {
+                "estimator": "KNeighborsRegressor",
+                "estimator_params": { "algorithm": "brute", "metric": "minkowski", "p": [1, 2] }
+            },
+            "data": [
+		        { "source": "make_regression", "split_kwargs": { "train_size": 5000000, "test_size": 1000 },   "generation_kwargs": {  "n_samples": 5001000,  "n_features": 100, "noise":1.5 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "sklearn brute knn clsf": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common knn parameters",
+                "sklearn knn parameters",
+                "synthetic classification data"
+            ]
+        },
+	"sklearn brute knn reg": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common knn parameters",
+                "sklearn knn parameters",
+                "synthetic regression data"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/linear_model.json b/configs/regular/bf16/linear_model.json
new file mode 100644
index 00000000..23aa49c0
--- /dev/null
+++ b/configs/regular/bf16/linear_model.json
@@ -0,0 +1,33 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "synthetic data": {
+            "data": [
+                { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 3000000, "test_size": 5000 } }
+            ]
+        },
+        "common linear parameters": {
+            "algorithm": {
+                "estimator": "LinearRegression",
+                "estimator_params": { "fit_intercept": true, "copy_X": true }
+            },
+            "data": {
+                "dtype": ["float32"],
+                "order": "C"
+            }
+        },
+        "sklearn linear parameters": {
+            "estimator_params": { "n_jobs": "[SPECIAL_VALUE]physical_cpus" }
+        }
+    },
+    "TEMPLATES": {
+        "sklearn linear": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common linear parameters",
+                "sklearn linear parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/logreg.json b/configs/regular/bf16/logreg.json
new file mode 100644
index 00000000..863d67f9
--- /dev/null
+++ b/configs/regular/bf16/logreg.json
@@ -0,0 +1,45 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common logreg parameters": {
+            "algorithm": {
+                "estimator": "LogisticRegression",
+                "estimator_methods": { "inference": "predict" },
+                "estimator_params": {
+                    "penalty": "l2",
+                    "tol": 1e-4,
+                    "C": 1.0,
+                    "l1_ratio": null,
+                    "max_iter": 20
+                }
+            },
+	    "data": {
+                "dtype": ["float32"]
+            }
+        },
+        "sklearn logreg parameters": {
+            "algorithm": {
+                "estimator_params": {
+                    "solver": "newton-cg",
+                    "n_jobs": "[SPECIAL_VALUE]physical_cpus",
+                    "random_state": 42
+                }
+            }
+        },
+        "synthetic data": {
+            "data": [
+		        { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 5001000, "n_features": 10, "n_classes": 2 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "sklearn logreg": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common logreg parameters",
+                "sklearn logreg parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/pca.json b/configs/regular/bf16/pca.json
new file mode 100644
index 00000000..e5113261
--- /dev/null
+++ b/configs/regular/bf16/pca.json
@@ -0,0 +1,36 @@
+{
+    "INCLUDE": ["../../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "pca parameters": {
+            "algorithm": {
+                "estimator": "PCA",
+                "estimator_params": {
+                    "n_components": 3,
+                    "copy": true,
+                    "whiten": false,
+                    "svd_solver": "covariance_eigh",
+                    "tol": 0.0,
+                    "iterated_power": 15,
+                    "random_state": 42
+                }
+            },
+            "data": {
+                "dtype": ["float32"]
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000,  "n_features": 10, "centers": 1 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "sklearn pca": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "pca parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/dbscan.json b/configs/regular/dbscan.json
index 71dcdc9b..711c15cd 100644
--- a/configs/regular/dbscan.json
+++ b/configs/regular/dbscan.json
@@ -58,19 +58,11 @@
     "TEMPLATES": {
         "sklearn dbscan": {
             "SETS": [
-                "sklearn-ex[cpu,gpu] implementations",
+                "sklearn-ex[gpu] implementations",
                 "common dbscan parameters",
                 "sklearn dbscan parameters",
                 "dbscan datasets"
             ]
-        },
-        "cuml dbscan": {
-            "SETS": [
-                "cuml implementation",
-                "common dbscan parameters",
-                "cuml dbscan parameters",
-                "dbscan datasets"
-            ]
         }
     }
 }
diff --git a/configs/regular/ensemble.json b/configs/regular/ensemble.json
index 56e37e77..f01c1383 100644
--- a/configs/regular/ensemble.json
+++ b/configs/regular/ensemble.json
@@ -90,7 +90,7 @@
     "TEMPLATES": {
         "sklearn ensemble classification": {
             "SETS": [
-                "sklearn-ex[cpu,gpu] implementations",
+                "sklearn-ex[gpu] implementations",
                 "common ensemble params",
                 "sklearn ensemble classifier params",
                 "ensemble classification data"
@@ -98,27 +98,11 @@
         },
         "sklearn ensemble regression": {
             "SETS": [
-                "sklearn-ex[cpu,gpu] implementations",
+                "sklearn-ex[gpu] implementations",
                 "common ensemble params",
                 "sklearn ensemble regressor params",
                 "ensemble regression data"
             ]
-        },
-        "cuml ensemble classification": {
-            "SETS": [
-                "cuml implementation",
-                "common ensemble params",
-                "cuml ensemble classifier params",
-                "ensemble classification data"
-            ]
-        },
-        "cuml ensemble regression": {
-            "SETS": [
-                "cuml implementation",
-                "common ensemble params",
-                "cuml ensemble regressor params",
-                "ensemble regression data"
-            ]
         }
     }
 }
diff --git a/configs/regular/kmeans.json b/configs/regular/kmeans.json
index bcb7026f..756e2bab 100644
--- a/configs/regular/kmeans.json
+++ b/configs/regular/kmeans.json
@@ -70,19 +70,11 @@
     "TEMPLATES": {
         "sklearn kmeans": {
             "SETS": [
-                "sklearn-ex[cpu,gpu] implementations",
+                "sklearn-ex[gpu] implementations",
                 "common kmeans parameters",
                 "sklearn kmeans parameters",
                 "kmeans datasets"
             ]
-        },
-        "cuml kmeans": {
-            "SETS": [
-                "cuml implementation",
-                "common kmeans parameters",
-                "cuml kmeans parameters",
-                "kmeans datasets"
-            ]
         }
     }
 }
diff --git a/configs/regular/knn.json b/configs/regular/knn.json
index e1cd8a75..a69c6864 100644
--- a/configs/regular/knn.json
+++ b/configs/regular/knn.json
@@ -74,47 +74,17 @@
     "TEMPLATES": {
         "sklearn brute knn clsf": {
             "SETS": [
-                "sklearn-ex[cpu,gpu] implementations",
+                "sklearn-ex[gpu] implementations",
                 "common knn parameters",
                 "sklearn knn parameters",
                 "brute knn algorithm - classification data"
             ]
         },
-        "sklearn kd_tree knn clsf": {
-            "SETS": [
-                "sklearn-ex[cpu] implementations",
-                "common knn parameters",
-                "sklearn knn parameters",
-                "kd_tree knn algorithm - classification data"
-            ]
-        },
         "sklearn brute knn regr": {
             "SETS": [
-                "sklearn-ex[cpu,gpu] implementations",
-                "common knn parameters",
-                "sklearn knn parameters",
-                "brute knn algorithm - regression data"
-            ]
-        },
-        "sklearn kd_tree knn regr": {
-            "SETS": [
-                "sklearn-ex[cpu] implementations",
+                "sklearn-ex[gpu] implementations",
                 "common knn parameters",
                 "sklearn knn parameters",
-                "kd_tree knn algorithm - regression data"
-            ]
-        },
-        "cuml brute knn clsf": {
-            "SETS": [
-                "cuml implementation",
-                "common knn parameters",
-                "brute knn algorithm - classification data"
-            ]
-        },
-        "cuml brute knn regr": {
-            "SETS": [
-                "cuml implementation",
-                "common knn parameters",
                 "brute knn algorithm - regression data"
             ]
         }
diff --git a/configs/regular/linear_model.json b/configs/regular/linear_model.json
index eb1b79ba..3040c82d 100644
--- a/configs/regular/linear_model.json
+++ b/configs/regular/linear_model.json
@@ -85,34 +85,12 @@
     "TEMPLATES": {
         "sklearn linear": {
             "SETS": [
-                "sklearn-ex[cpu,gpu] implementations",
+                "sklearn-ex[gpu] implementations",
                 "common linear parameters",
                 "sklearn linear parameters",
                 "regression datasets"
             ]
         },
-        "sklearn ridge": {
-            "SETS": [
-                "sklearn-ex[cpu] implementations",
-                "common ridge parameters",
-                "sklearn ridge parameters",
-                "regression datasets"
-            ]
-        },
-        "sklearn lasso": {
-            "SETS": [
-                "sklearn-ex[cpu] implementations",
-                "common lasso parameters",
-                "regression datasets"
-            ]
-        },
-        "sklearn elasticnet": {
-            "SETS": [
-                "sklearn-ex[cpu] implementations",
-                "common elasticnet parameters",
-                "regression datasets"
-            ]
-        },
         "cuml linear": {
             "SETS": [
                 "cuml implementation",
@@ -120,30 +98,6 @@
                 "cuml L2 parameters",
                 "regression datasets"
             ]
-        },
-        "cuml ridge": {
-            "SETS": [
-                "cuml implementation",
-                "common ridge parameters",
-                "cuml L2 parameters",
-                "regression datasets"
-            ]
-        },
-        "cuml lasso": {
-            "SETS": [
-                "cuml implementation",
-                "common lasso parameters",
-                "cuml L1 parameters",
-                "regression datasets"
-            ]
-        },
-        "cuml elasticnet": {
-            "SETS": [
-                "cuml implementation",
-                "common elasticnet parameters",
-                "cuml L1 parameters",
-                "regression datasets"
-            ]
         }
     }
 }
diff --git a/configs/regular/logreg.json b/configs/regular/logreg.json
index a94a7fcf..a8323b02 100644
--- a/configs/regular/logreg.json
+++ b/configs/regular/logreg.json
@@ -54,19 +54,11 @@
     "TEMPLATES": {
         "sklearn logreg": {
             "SETS": [
-                "sklearn-ex[cpu,gpu] implementations",
+                "sklearn-ex[gpu] implementations",
                 "common logreg parameters",
                 "sklearn logreg parameters",
                 "logreg datasets"
             ]
-        },
-        "cuml logreg": {
-            "SETS": [
-                "cuml implementation",
-                "common logreg parameters",
-                "cuml logreg parameters",
-                "logreg datasets"
-            ]
         }
     }
 }
diff --git a/configs/regular/pca.json b/configs/regular/pca.json
index 582acc9e..e26d3f44 100644
--- a/configs/regular/pca.json
+++ b/configs/regular/pca.json
@@ -46,14 +46,7 @@
     "TEMPLATES": {
         "sklearn pca": {
             "SETS": [
-                "sklearn-ex[cpu,gpu] implementations",
-                "pca parameters",
-                "pca datasets"
-            ]
-        },
-        "cuml pca": {
-            "SETS": [
-                "cuml implementation",
+                "sklearn-ex[gpu] implementations",
                 "pca parameters",
                 "pca datasets"
             ]
diff --git a/configs/spmd/large_scale/basic_stats.json b/configs/spmd/large_scale/basic_stats.json
new file mode 100644
index 00000000..f8f44e4e
--- /dev/null
+++ b/configs/spmd/large_scale/basic_stats.json
@@ -0,0 +1,31 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd basicstats parameters": {
+            "algorithm": {
+                "estimator": "BasicStatistics",
+                "estimator_methods": { "training": "fit" },
+                "sklearnex_context": { "use_raw_input": true }
+            },
+            "data": {
+                "split_kwargs": { "test_size": 0.0001 }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000,  "n_features": 10, "centers": 1 } },
+		        { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000,  "n_features": 1000, "centers": 1 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "basicstats": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale 2k parameters",
+		        "synthetic data",
+                "spmd basicstats parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/basic_stats_strong.json b/configs/spmd/large_scale/basic_stats_strong.json
new file mode 100644
index 00000000..0c7c671e
--- /dev/null
+++ b/configs/spmd/large_scale/basic_stats_strong.json
@@ -0,0 +1,30 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd basicstats parameters": {
+            "algorithm": {
+                "estimator": "BasicStatistics",
+                "estimator_methods": { "training": "fit" },
+                "sklearnex_context": { "use_raw_input": true }
+            },
+            "data": {
+                "split_kwargs": { "test_size": 0.0001 }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 25000000,  "n_features": 100, "centers": 1 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "basicstats": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale strong <=64 parameters",
+                "synthetic data",
+                "spmd basicstats parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/covariance.json b/configs/spmd/large_scale/covariance.json
new file mode 100644
index 00000000..7f4d6d7d
--- /dev/null
+++ b/configs/spmd/large_scale/covariance.json
@@ -0,0 +1,31 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd basicstats parameters": {
+            "algorithm": {
+                "estimator": "EmpiricalCovariance",
+                "estimator_methods": { "training": "fit" },
+                "sklearnex_context": { "use_raw_input": true }
+            },
+            "data": {
+                "split_kwargs": { "test_size": 0.0001 }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000,  "n_features": 10, "centers": 1 } },
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000,  "n_features": 1000, "centers": 1 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "covariance": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale 2k parameters",
+                "synthetic data",
+                "spmd basicstats parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/covariance_strong.json b/configs/spmd/large_scale/covariance_strong.json
new file mode 100644
index 00000000..8e388801
--- /dev/null
+++ b/configs/spmd/large_scale/covariance_strong.json
@@ -0,0 +1,30 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd basicstats parameters": {
+            "algorithm": {
+                "estimator": "EmpiricalCovariance",
+                "estimator_methods": { "training": "fit" },
+                "sklearnex_context": { "use_raw_input": true }
+            },
+            "data": {
+                "split_kwargs": { "test_size": 0.0001 }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 25000000,  "n_features": 100, "centers": 1 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "covariance": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale strong <=64 parameters",
+                "synthetic data",
+                "spmd basicstats parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/dbscan.json b/configs/spmd/large_scale/dbscan.json
new file mode 100644
index 00000000..bf60b7cc
--- /dev/null
+++ b/configs/spmd/large_scale/dbscan.json
@@ -0,0 +1,36 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "../../regular/dbscan.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd dbscan parameters": {
+	    "algorithm": {
+		"estimator": "DBSCAN",
+		"estimator_methods": {
+		    "training": "fit"
+		},
+		"estimator_params" : {
+			"eps": 10, "min_samples": 5
+		},
+		"sklearnex_context": { "use_raw_input": true }
+	    },
+	    "data": {
+		"dtype": "float64"
+	    }
+	},
+	"synthetic dataset": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 40000,  "n_features": 100, "centers": 10 } }
+            ]
+	}
+    },
+    "TEMPLATES": {
+        "dbscan": {
+            "SETS": [
+                "common dbscan parameters",
+                "synthetic dataset",
+                "sklearnex spmd implementation",
+		"large scale <=64 parameters",
+                "spmd dbscan parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/dbscan_strong.json b/configs/spmd/large_scale/dbscan_strong.json
new file mode 100644
index 00000000..5e7ab322
--- /dev/null
+++ b/configs/spmd/large_scale/dbscan_strong.json
@@ -0,0 +1,36 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "../../regular/dbscan.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd dbscan parameters": {
+	    "algorithm": {
+                "estimator": "DBSCAN",
+                "estimator_methods": {
+                "training": "fit"
+		},
+                "estimator_params" : {
+                        "eps": 15, "min_samples": 50
+                },
+		"sklearnex_context": { "use_raw_input": true }
+	    },
+	    "data": {
+		"dtype": "float64"
+	    }
+	},
+	"synthetic dataset": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 4000000,  "n_features": 100, "centers": 10 } }
+            ]
+	}
+    },
+    "TEMPLATES": {
+        "dbscan": {
+            "SETS": [
+                "common dbscan parameters",
+                "synthetic dataset",
+                "sklearnex spmd implementation",
+                "large scale strong <=64 parameters",
+                "spmd dbscan parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/forest_max_samples.json b/configs/spmd/large_scale/forest_max_samples.json
new file mode 100644
index 00000000..95affb16
--- /dev/null
+++ b/configs/spmd/large_scale/forest_max_samples.json
@@ -0,0 +1,28 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd forest classification parameters": {
+            "algorithm": {
+                "estimator": "RandomForestClassifier",
+                "estimator_methods": { "training": "fit" },
+                "estimator_params": { "n_estimators": 20, "max_depth": 10 },
+                "sklearnex_context": { "use_raw_input": true }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_classification", "split_kwargs": { "train_size": 1000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 1001000, "n_features": 100, "n_classes": 2 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "forestCls": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale 32 parameters",
+                "synthetic data",
+                "spmd forest classification parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/forest_no_max_samples.json b/configs/spmd/large_scale/forest_no_max_samples.json
new file mode 100644
index 00000000..c371371b
--- /dev/null
+++ b/configs/spmd/large_scale/forest_no_max_samples.json
@@ -0,0 +1,27 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd forest classification parameters": {
+            "algorithm": {
+                "estimator": "RandomForestClassifier",
+                "estimator_params": { "n_estimators": 100, "max_depth": 7 },
+                "sklearnex_context": { "use_raw_input": true }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_classification", "split_kwargs": { "train_size": 1000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 1001000, "n_features": 100, "n_classes": 2 }, "n_informative": "[SPECIAL_VALUE]0.5" }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "forestCls": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale 2k parameters",
+                "synthetic data",
+                "spmd forest classification parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/forest_strong.json b/configs/spmd/large_scale/forest_strong.json
new file mode 100644
index 00000000..653c70dc
--- /dev/null
+++ b/configs/spmd/large_scale/forest_strong.json
@@ -0,0 +1,28 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd forest classification parameters": {
+            "algorithm": {
+                "estimator": "RandomForestClassifier",
+                "estimator_methods": { "training": "fit" },
+                "estimator_params": { "n_estimators": 100, "max_depth": 8 },
+                "sklearnex_context": {"use_raw_input": true}
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_classification", "split_kwargs": { "train_size": 20000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 21000, "n_features": 200, "n_classes": 2 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "forestCls": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale strong <=64 parameters",
+                "synthetic data",
+                "spmd forest classification parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/incremental.json b/configs/spmd/large_scale/incremental.json
new file mode 100644
index 00000000..195074ee
--- /dev/null
+++ b/configs/spmd/large_scale/incremental.json
@@ -0,0 +1,77 @@
+{   "INCLUDE": [ ],
+    "PARAMETERS_SETS": {
+        "common incremental raw gpu params": {
+                "algorithm": {
+                    "device": "gpu",
+                    "sklearnex_context": { "use_raw_input": true }
+                },
+                "data": {
+                    "format":"dpctl",
+                    "order": "C"
+                }
+        },
+        "statistical batches and data": [
+                { "algorithm": { "num_batches": { "training": [1,2,6,12]} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000,  "n_features": 10, "centers": 1 } } },
+                { "algorithm": { "num_batches": { "training": 2} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 50000000,  "n_features": 10, "centers": 1 } } },
+                { "algorithm": { "num_batches": { "training": 6} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 16666667,  "n_features": 10, "centers": 1 } } },
+                { "algorithm": { "num_batches": { "training": 12} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 8333333,  "n_features": 10, "centers": 1 } } },
+                { "algorithm": { "num_batches": { "training": [1,2,6,12]} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000,  "n_features": 1000, "centers": 1 } } },
+                { "algorithm": { "num_batches": { "training": 2} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 500000,  "n_features": 1000, "centers": 1 } } },
+                { "algorithm": { "num_batches": { "training": 6} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 166667,  "n_features": 1000, "centers": 1 } } },
+                { "algorithm": { "num_batches": { "training": 12} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 83333,  "n_features": 1000, "centers": 1 } } }
+        ],
+        "regression batches and data": [
+            { "algorithm": { "num_batches": { "training": [1,2,6,12]} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 100000000, "test_size": 5000 } } },
+            { "algorithm": { "num_batches": { "training": 2} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 50000000, "test_size": 5000 } } },
+            { "algorithm": { "num_batches": { "training": 6} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 16666667, "test_size": 5000 } } },
+            { "algorithm": { "num_batches": { "training": 12} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 8333333, "test_size": 5000 } } },
+            { "algorithm": { "num_batches": { "training": [1,2,6,12]} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000,  "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 3000000, "test_size": 5000 } } },
+            { "algorithm": { "num_batches": { "training": 2} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000,  "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 1500000, "test_size": 5000 } } },
+            { "algorithm": { "num_batches": { "training": 6} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000,  "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 500000, "test_size": 5000 } } },
+            { "algorithm": { "num_batches": { "training": 12} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000,  "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 250000, "test_size": 5000 } } }
+        ],
+        "covariance": {
+            "algorithm": {
+                    "estimator": "IncrementalEmpiricalCovariance",
+                    "library": "sklearnex",
+                    "estimator_methods": {"training": "partial_fit"}
+                },
+            "data": {
+                    "split_kwargs": { "test_size": 0.0001 }
+                }
+         },
+        "basic_statistics": {
+            "algorithm": {
+                    "estimator": "IncrementalBasicStatistics",
+                    "library": "sklearnex",
+                    "estimator_methods": {"training": "partial_fit"}
+                },
+            "data": {
+                    "split_kwargs": { "test_size": 0.0001 }
+                }
+        },
+        "linear_regression": {
+            "algorithm": {
+                    "estimator": "IncrementalLinearRegression",
+                    "library": "sklearnex",
+                    "estimator_methods": {"training": "partial_fit"}
+                }
+        },
+        "pca": {
+            "algorithm": {
+                    "estimator": "IncrementalPCA",
+                    "library": "sklearnex.preview",
+                    "estimator_methods": {"training": "partial_fit"}
+                    },
+            "data": {
+                    "split_kwargs": { "test_size": 0.0001 }
+                }
+        }
+    },
+    "TEMPLATES": {
+        "basic_statistics": { "SETS": ["common incremental raw gpu params", "basic_statistics", "statistical batches and data"] },
+        "covariance": { "SETS": ["common incremental raw gpu params", "covariance", "statistical batches and data"] },
+        "linear_regression": { "SETS": ["common incremental raw gpu params", "linear_regression", "regression batches and data"] },
+        "pca": { "SETS": ["common incremental raw gpu params", "pca", "statistical batches and data"] }
+    }
+}
diff --git a/configs/spmd/large_scale/kmeans_narrow_weak.json b/configs/spmd/large_scale/kmeans_narrow_weak.json
new file mode 100644
index 00000000..69f0b6ac
--- /dev/null
+++ b/configs/spmd/large_scale/kmeans_narrow_weak.json
@@ -0,0 +1,33 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd kmeans parameters": {
+            "algorithm": {
+                "estimator": "KMeans",
+                "estimator_params": {
+                    "algorithm": "lloyd",
+		    "max_iter": 20,
+		    "n_clusters": 10,
+		    "random_state": 42
+                },
+                "estimator_methods": { "training": "fit", "inference": "predict" },
+                "sklearnex_context": { "use_raw_input": true }
+            }
+        },
+        "synthetic data": {
+                "data": [
+		    { "source": "make_blobs", "generation_kwargs": { "n_samples": 2000000,  "n_features": 100, "centers": 2000, "cluster_std": 3, "center_box": 100.0}}
+                ]
+        }
+    },
+    "TEMPLATES": {
+        "kmeans": {
+            "SETS": [
+                "synthetic data",
+                "sklearnex spmd implementation",
+                "large scale 2k parameters sample shift",
+                "spmd kmeans parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/kmeans_strong.json b/configs/spmd/large_scale/kmeans_strong.json
new file mode 100644
index 00000000..90a1ea3f
--- /dev/null
+++ b/configs/spmd/large_scale/kmeans_strong.json
@@ -0,0 +1,33 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd kmeans parameters": {
+            "algorithm": {
+                "estimator": "KMeans",
+                "estimator_params": {
+                    "algorithm": "lloyd",
+		    "max_iter": 20,
+		    "n_clusters": 100
+                },
+                "estimator_methods": { "training": "fit", "inference": "predict" },
+                "sklearnex_context": { "use_raw_input": true }
+            }
+        },
+        "synthetic data": {
+                "data": [
+                        { "source": "make_blobs", "generation_kwargs": { "n_samples": 25000000,  "n_features": 100, "centers": 100 }}
+                ]
+        }
+    },
+    "TEMPLATES": {
+        "kmeans": {
+            "SETS": [
+                "synthetic data",
+                "sklearnex spmd implementation",
+                "large scale strong <=64 parameters",
+                "spmd kmeans parameters"
+            ]
+        }
+    }
+}
+
diff --git a/configs/spmd/large_scale/kmeans_wide_weak.json b/configs/spmd/large_scale/kmeans_wide_weak.json
new file mode 100644
index 00000000..5520f10a
--- /dev/null
+++ b/configs/spmd/large_scale/kmeans_wide_weak.json
@@ -0,0 +1,34 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd kmeans parameters": {
+            "algorithm": {
+                "estimator": "KMeans",
+                "estimator_params": {
+                    "algorithm": "lloyd",
+		    "max_iter": 20,
+		    "n_clusters": 10,
+		    "random_state": 42
+                },
+                "estimator_methods": { "training": "fit", "inference": "predict" },
+                "sklearnex_context": { "use_raw_input": true }
+            }
+        },
+        "synthetic data": {
+                "data": [
+		    { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000,  "n_features": 1000, "centers": 2000}}
+                ]
+        }
+    },
+    "TEMPLATES": {
+        "kmeans": {
+            "SETS": [
+                "synthetic data",
+                "sklearnex spmd implementation",
+                "large scale 2k parameters",
+                "spmd kmeans parameters"
+            ]
+        }
+    }
+}
+
diff --git a/configs/spmd/large_scale/knn_strong.json b/configs/spmd/large_scale/knn_strong.json
new file mode 100644
index 00000000..36daf3f1
--- /dev/null
+++ b/configs/spmd/large_scale/knn_strong.json
@@ -0,0 +1,37 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd knn cls parameters": {
+            "algorithm": {
+                "estimator": "KNeighborsClassifier",
+                "estimator_params": {
+                    "algorithm": "brute",
+                    "metric": "minkowski",
+                    "p": 2,
+                    "weights": "uniform",
+                    "n_neighbors": 100
+                },
+                "estimator_methods": {
+                    "training": "fit",
+                    "inference": "predict"
+                },
+		"sklearnex_context": { "use_raw_input": true }
+	    }
+        },
+        "synthetic classification data": {
+            "data": [
+                { "source": "make_classification", "split_kwargs": { "train_size": 3000000, "test_size": 2000000 },   "generation_kwargs": {  "n_samples": 5000000,  "n_features": 100, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } }
+            ]
+        }	
+    },
+    "TEMPLATES": {
+        "knn classifier": {
+            "SETS": [
+                "synthetic classification data",
+                "sklearnex spmd implementation",
+                "large scale strong <=64 parameters",
+                "spmd knn cls parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/knn_tier1.json b/configs/spmd/large_scale/knn_tier1.json
new file mode 100644
index 00000000..c230cc4e
--- /dev/null
+++ b/configs/spmd/large_scale/knn_tier1.json
@@ -0,0 +1,35 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd knn cls parameters": {
+            "algorithm": {
+                "estimator": "KNeighborsClassifier",
+                "estimator_params": {
+                    "algorithm": "brute",
+                    "metric": "minkowski",
+                    "p": 2,
+                    "weights": "uniform"
+                },
+                "estimator_methods": {
+                    "training": "fit",
+                    "inference": "predict"
+                },
+                "sklearnex_context": { "use_raw_input": true }
+            }
+        },
+        "synthetic classification data": [
+            { "data": { "source": "make_classification", "split_kwargs": { "train_size": 100000, "test_size": 100000},   "generation_kwargs": {  "n_samples": 2000000,  "n_features": 10, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } }, "algorithm": { "estimator_params": { "n_neighbors": 5 } } },
+            { "data": { "source": "make_classification", "split_kwargs": { "train_size": 100000, "test_size": 10000},   "generation_kwargs": {  "n_samples": 2000000,  "n_features": 1000, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } }, "algorithm": { "estimator_params": { "n_neighbors": 100 } } }
+        ]
+    },
+    "TEMPLATES": {
+        "knn classifier": {
+            "SETS": [
+                "synthetic classification data",
+                "sklearnex spmd implementation",
+                "large scale 32 parameters",
+                "spmd knn cls parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/knn_tier2.json b/configs/spmd/large_scale/knn_tier2.json
new file mode 100644
index 00000000..ff0032e2
--- /dev/null
+++ b/configs/spmd/large_scale/knn_tier2.json
@@ -0,0 +1,37 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd knn cls parameters": {
+            "algorithm": {
+                "estimator": "KNeighborsClassifier",
+                "estimator_params": {
+                    "algorithm": "brute",
+                    "metric": "minkowski",
+                    "p": 2,
+                    "weights": "uniform",
+                    "n_neighbors": 5
+                },
+                "estimator_methods": {
+                    "training": "fit",
+                    "inference": "predict"
+                },
+                "sklearnex_context": { "use_raw_input": true }
+            }
+        },
+        "synthetic classification data": {
+            "data": [
+                { "source": "make_classification", "split_kwargs": { "train_size": 100, "test_size": 100},   "generation_kwargs": {  "n_samples": 200,  "n_features": 10, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } }
+            ]
+        }	
+    },
+    "TEMPLATES": {
+        "knn classifier": {
+            "SETS": [
+                "synthetic classification data",
+                "sklearnex spmd implementation",
+                "large scale 2k parameters",
+                "spmd knn cls parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/large_scale.json b/configs/spmd/large_scale/large_scale.json
new file mode 100644
index 00000000..28626dc9
--- /dev/null
+++ b/configs/spmd/large_scale/large_scale.json
@@ -0,0 +1,85 @@
+{
+    "PARAMETERS_SETS": {
+        "large scale default parameters": {
+            "data": {
+                "dtype": "float64",
+		        "distributed_split": "None"
+            },
+            "bench": {
+                "mpi_params": {"n": [1,2,6,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+            }
+        },
+        "large scale strong parameters": {
+            "data": {
+                "dtype": "float64",
+                "distributed_split": "rank_based"
+            },
+            "bench": {
+                "mpi_params": {"n": [1,2,6,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+            }
+        },
+        "large scale 2k parameters": {
+            "data": {
+                "dtype": "float64",
+                "distributed_split": "None"
+            },
+            "bench": {
+                "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+            }
+        },
+        "large scale 2k parameters sample shift": {
+            "data": {
+                "dtype": "float64",
+                "distributed_split": "sample_shift"
+            },
+            "bench": {
+                "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+            }
+        },
+        "large scale 32 parameters": {
+            "data": {
+                "dtype": "float64",
+                "distributed_split": "None"
+            },
+            "bench": {
+                "mpi_params": {"n": [1,2,6,12,24,48,96,192,384], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+            }
+        },
+        "large scale <=64 parameters": {
+            "data": {
+                "dtype": "float64",
+                "distributed_split": "None"
+            },
+            "bench": {
+                "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+            }
+        },
+        "large scale strong 2k parameters": {
+            "data": {
+                "dtype": "float64",
+                "distributed_split": "rank_based"
+            },
+            "bench": {
+                "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+            }
+        },
+        "large scale strong <=64 parameters": {
+            "data": {
+                "dtype": "float64",
+                "distributed_split": "rank_based"
+            },
+            "bench": {
+               "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+            }
+        },
+        "large scale impi parameters": {
+            "data": {
+                "dtype": "float64",
+                "distributed_split": "no"
+            },
+            "bench": {
+                "mpi_params": {"n": [1,2,4,6,9,12], "ppn": 12}
+            }
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/linreg.json b/configs/spmd/large_scale/linreg.json
new file mode 100644
index 00000000..7c7fb035
--- /dev/null
+++ b/configs/spmd/large_scale/linreg.json
@@ -0,0 +1,28 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd linear parameters": {
+            "algorithm": {
+                "estimator": "LinearRegression",
+                "estimator_methods": { "training": "fit" },
+                "sklearnex_context": { "use_raw_input": true }
+            }
+        },
+        "synthetic data": {
+            "data": [
+		    { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 100000000, "test_size": 5000 } },
+		    { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000,  "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 3000000, "test_size": 5000 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "linreg": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale 2k parameters",
+                "synthetic data",
+                "spmd linear parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/linreg_strong.json b/configs/spmd/large_scale/linreg_strong.json
new file mode 100644
index 00000000..ac5a6c7a
--- /dev/null
+++ b/configs/spmd/large_scale/linreg_strong.json
@@ -0,0 +1,27 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd linear parameters": {
+            "algorithm": {
+                "estimator": "LinearRegression",
+                "estimator_methods": { "training": "fit" },
+                "sklearnex_context": { "use_raw_input": true }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_regression", "generation_kwargs": { "n_samples": 25005000,  "n_features": 100, "noise": 1.25 }, "split_kwargs": { "train_size": 25000000, "test_size": 5000 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "linreg": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale strong <=64 parameters",
+                "synthetic data",
+                "spmd linear parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/logreg.json b/configs/spmd/large_scale/logreg.json
new file mode 100644
index 00000000..b7b4b998
--- /dev/null
+++ b/configs/spmd/large_scale/logreg.json
@@ -0,0 +1,30 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "../logreg.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd logreg2 parameters": {
+            "algorithm":{
+                "estimator": "LogisticRegression",
+                "estimator_methods": { "inference": "predict" },
+                "estimator_params": { "max_iter": 10 },
+                "sklearnex_context": { "use_raw_input": true }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 5001000, "n_features": 10, "n_classes": 2 } },
+                { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 501000, "n_features": 1000, "n_classes": 2, "n_informative": 40, "n_clusters_per_class": 3, "flip_y": 0.05 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "logreg": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale 2k parameters",
+                "spmd logreg parameters",
+                "synthetic data",
+                "spmd logreg2 parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/logreg_strong.json b/configs/spmd/large_scale/logreg_strong.json
new file mode 100644
index 00000000..219840ea
--- /dev/null
+++ b/configs/spmd/large_scale/logreg_strong.json
@@ -0,0 +1,29 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "../logreg.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd logreg2 parameters": {
+            "algorithm":{
+                "estimator": "LogisticRegression",
+                "estimator_methods": { "inference": "predict" },
+                "estimator_params": { "max_iter": 16 },
+                "sklearnex_context": { "use_raw_input": true }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_classification", "split_kwargs": { "train_size": 12000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 12001000, "n_features": 200, "n_classes": 2, "n_informative": 40, "n_clusters_per_class": 3, "flip_y": 0.05 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "logreg": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale strong 64 parameters",
+                "spmd logreg parameters",
+                "synthetic data",
+                "spmd logreg2 parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/pca.json b/configs/spmd/large_scale/pca.json
new file mode 100644
index 00000000..ce56bd8a
--- /dev/null
+++ b/configs/spmd/large_scale/pca.json
@@ -0,0 +1,31 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd pca parameters": {
+            "algorithm": {
+                "estimator": "PCA",
+                "estimator_methods": { "training": "fit", "inference": "" },
+                "sklearnex_context": { "use_raw_input": true }
+            },
+            "data": {
+                "split_kwargs": { "test_size": 0.0001 }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000,  "n_features": 10, "centers": 1 } },
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000,  "n_features": 1000, "centers": 1 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "linreg": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale 2k parameters",
+                "synthetic data",
+                "spmd pca parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/pca_strong.json b/configs/spmd/large_scale/pca_strong.json
new file mode 100644
index 00000000..70461ba7
--- /dev/null
+++ b/configs/spmd/large_scale/pca_strong.json
@@ -0,0 +1,30 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd pca parameters": {
+            "algorithm": {
+                "estimator": "PCA",
+                "estimator_methods": { "training": "fit", "inference": "" },
+                "sklearnex_context": { "use_raw_input": true }
+            },
+            "data": {
+                "split_kwargs": { "test_size": 0.0001 }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 25000000,  "n_features": 100, "centers": 1 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "linreg": {
+            "SETS": [
+                "sklearnex spmd implementation",
+                "large scale strong <=64 parameters",
+                "synthetic data",
+                "spmd pca parameters"
+            ]
+        }
+    }
+}
diff --git a/sklbench/benchmarks/custom_function.py b/sklbench/benchmarks/custom_function.py
index 25abb900..34b223ed 100644
--- a/sklbench/benchmarks/custom_function.py
+++ b/sklbench/benchmarks/custom_function.py
@@ -64,9 +64,13 @@ def get_function_args(bench_case: BenchCase, x_train, y_train, x_test, y_test) -
 
 def measure_function_instance(bench_case, function_instance, args: Tuple, kwargs: Dict):
     metrics = dict()
-    metrics["time[ms]"], metrics["time std[ms]"], _ = measure_case(
-        bench_case, function_instance, *args, **kwargs
-    )
+    (
+        metrics["time[ms]"],
+        metrics["time std[ms]"],
+        metrics["first iter[ms]"],
+        metrics["box filter mean[ms]"],
+        metrics["box filter std[ms]"],
+    ) = measure_case(bench_case, function_instance, *args, **kwargs)
     return metrics
 
 
diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py
index f9c0a75e..819f5fb5 100644
--- a/sklbench/benchmarks/sklearn_estimator.py
+++ b/sklbench/benchmarks/sklearn_estimator.py
@@ -66,15 +66,15 @@ def get_estimator(library_name: str, estimator_name: str):
             f"Using first {classes_map[estimator_name][0]}."
         )
     estimator = classes_map[estimator_name][0]
-    if not issubclass(estimator, BaseEstimator):
-        logger.info(f"{estimator} estimator is not derived from sklearn's BaseEstimator")
+    # if not issubclass(estimator, BaseEstimator):
+    #    logger.info(f"{estimator} estimator is not derived from sklearn's BaseEstimator")
     return estimator
 
 
 def get_estimator_methods(bench_case: BenchCase) -> Dict[str, List[str]]:
     # default estimator methods
     estimator_methods = {
-        "training": ["fit"],
+        "training": ["partial_fit", "fit"],
         "inference": ["predict", "predict_proba", "transform"],
     }
     for stage in estimator_methods.keys():
@@ -134,6 +134,9 @@ def get_subset_metrics_of_estimator(
                 and isinstance(iterations[0], Union[Numeric, NumpyNumeric].__args__)
             ):
                 metrics.update({"iterations": int(iterations[0])})
+        if hasattr(estimator_instance, "_n_inner_iter"):
+            inner_iters = estimator_instance._n_inner_iter
+            metrics.update({"inner_iters": int(inner_iters)})
     if task == "classification":
         y_pred = convert_to_numpy(estimator_instance.predict(x))
         metrics.update(
@@ -142,7 +145,7 @@ def get_subset_metrics_of_estimator(
                 "balanced accuracy": float(balanced_accuracy_score(y_compat, y_pred)),
             }
         )
-        if hasattr(estimator_instance, "predict_proba") and not (
+        """if hasattr(estimator_instance, "predict_proba") and not (
             hasattr(estimator_instance, "probability")
             and getattr(estimator_instance, "probability") == False
         ):
@@ -162,7 +165,7 @@ def get_subset_metrics_of_estimator(
                     ),
                     "logloss": float(log_loss(y_compat, y_pred_proba)),
                 }
-            )
+            )"""
     elif task == "regression":
         y_pred = convert_to_numpy(estimator_instance.predict(x))
         metrics.update(
@@ -188,19 +191,6 @@ def get_subset_metrics_of_estimator(
                     }
                 )
     elif task == "clustering":
-        if hasattr(estimator_instance, "inertia_"):
-            # compute inertia manually using distances to cluster centers
-            # provided by KMeans.transform
-            metrics.update(
-                {
-                    "inertia": float(
-                        np.power(
-                            convert_to_numpy(estimator_instance.transform(x)).min(axis=1),
-                            2,
-                        ).sum()
-                    )
-                }
-            )
         if hasattr(estimator_instance, "predict"):
             y_pred = convert_to_numpy(estimator_instance.predict(x))
             metrics.update(
@@ -334,34 +324,43 @@ def verify_patching(stream: io.StringIO, function_name) -> bool:
     return acceleration_lines > 0 and fallback_lines == 0
 
 
-def create_online_function(method_instance, data_args, batch_size):
-    n_batches = data_args[0].shape[0] // batch_size
+def create_online_function(
+    estimator_instance, method_instance, data_args, num_batches, batch_size
+):
 
     if "y" in list(inspect.signature(method_instance).parameters):
 
         def ndarray_function(x, y):
-            for i in range(n_batches):
+            for i in range(num_batches):
                 method_instance(
                     x[i * batch_size : (i + 1) * batch_size],
                     y[i * batch_size : (i + 1) * batch_size],
                 )
+            if hasattr(estimator_instance, "_onedal_finalize_fit"):
+                estimator_instance._onedal_finalize_fit()
 
         def dataframe_function(x, y):
-            for i in range(n_batches):
+            for i in range(num_batches):
                 method_instance(
                     x.iloc[i * batch_size : (i + 1) * batch_size],
                     y.iloc[i * batch_size : (i + 1) * batch_size],
                 )
+            if hasattr(estimator_instance, "_onedal_finalize_fit"):
+                estimator_instance._onedal_finalize_fit()
 
     else:
 
         def ndarray_function(x):
-            for i in range(n_batches):
+            for i in range(num_batches):
                 method_instance(x[i * batch_size : (i + 1) * batch_size])
+            if hasattr(estimator_instance, "_onedal_finalize_fit"):
+                estimator_instance._onedal_finalize_fit()
 
         def dataframe_function(x):
-            for i in range(n_batches):
+            for i in range(num_batches):
                 method_instance(x.iloc[i * batch_size : (i + 1) * batch_size])
+            if hasattr(estimator_instance, "_onedal_finalize_fit"):
+                estimator_instance._onedal_finalize_fit()
 
     if "ndarray" in str(type(data_args[0])):
         return ndarray_function
@@ -414,12 +413,28 @@ def measure_sklearn_estimator(
                         data_args = (x_train,)
                     else:
                         data_args = (x_test,)
-                batch_size = get_bench_case_value(
-                    bench_case, f"algorithm:batch_size:{stage}"
-                )
-                if batch_size is not None:
+
+                if method == "partial_fit":
+                    num_batches = get_bench_case_value(bench_case, "data:num_batches")
+                    batch_size = get_bench_case_value(bench_case, "data:batch_size")
+
+                    if batch_size is None:
+                        if num_batches is None:
+                            num_batches = 5
+                        batch_size = (
+                            data_args[0].shape[0] + num_batches - 1
+                        ) // num_batches
+                    if num_batches is None:
+                        num_batches = (
+                            data_args[0].shape[0] + batch_size - 1
+                        ) // batch_size
+
                     method_instance = create_online_function(
-                        method_instance, data_args, batch_size
+                        estimator_instance,
+                        method_instance,
+                        data_args,
+                        num_batches,
+                        batch_size,
                     )
                 # daal4py model builders enabling branch
                 if enable_modelbuilders and stage == "inference":
@@ -429,17 +444,14 @@ def measure_sklearn_estimator(
                         estimator_instance.get_booster()
                     )
                     method_instance = getattr(daal_model, method)
-
                 metrics[method] = dict()
                 (
                     metrics[method]["time[ms]"],
                     metrics[method]["time std[ms]"],
-                    _,
+                    metrics[method]["first iter[ms]"],
+                    metrics[method]["box filter mean[ms]"],
+                    metrics[method]["box filter std[ms]"],
                 ) = measure_case(bench_case, method_instance, *data_args)
-                if batch_size is not None:
-                    metrics[method]["throughput[samples/ms]"] = (
-                        (data_args[0].shape[0] // batch_size) * batch_size
-                    ) / metrics[method]["time[ms]"]
                 if ensure_sklearnex_patching:
                     full_method_name = f"{estimator_class.__name__}.{method}"
                     sklearnex_logging_stream.seek(0)
@@ -490,7 +502,18 @@ def main(bench_case: BenchCase, filters: List[BenchCase]):
     estimator_params = get_bench_case_value(
         bench_case, "algorithm:estimator_params", dict()
     )
+    # logger.debug("estimator params: " + str(estimator_params))
+    if (
+        "DBSCAN" in str(estimator_name)
+        and get_bench_case_value(bench_case, "data:distributed_split", None)
+        != "rank_based"
+    ):
+        if "min_samples" in estimator_params:
+            from mpi4py import MPI
 
+            estimator_params["min_samples"] = (
+                MPI.COMM_WORLD.Get_size() * estimator_params["min_samples"]
+            )
     # get estimator methods for measurement
     estimator_methods = get_estimator_methods(bench_case)
 
@@ -521,12 +544,12 @@ def main(bench_case: BenchCase, filters: List[BenchCase]):
     result_template = enrich_result(result_template, bench_case)
     if "assume_finite" in context_params:
         result_template["assume_finite"] = context_params["assume_finite"]
-    if hasattr(estimator_instance, "get_params"):
-        estimator_params = estimator_instance.get_params()
+    # if hasattr(estimator_instance, "get_params"):
+    #    estimator_params = estimator_instance.get_params()
     # note: "handle" is not JSON-serializable
     if "handle" in estimator_params:
         del estimator_params["handle"]
-    logger.debug(f"Estimator parameters:\n{custom_format(estimator_params)}")
+    # logger.debug(f"Estimator parameters:\n{custom_format(estimator_params)}")
     result_template.update(estimator_params)
 
     data_descs = {
diff --git a/sklbench/datasets/__init__.py b/sklbench/datasets/__init__.py
index 093875c4..d4bddca1 100644
--- a/sklbench/datasets/__init__.py
+++ b/sklbench/datasets/__init__.py
@@ -67,6 +67,11 @@ def load_data(bench_case: BenchCase) -> Tuple[Dict, Dict]:
             generation_kwargs = get_bench_case_value(
                 bench_case, "data:generation_kwargs", dict()
             )
+            if "center_box" in generation_kwargs:
+                generation_kwargs["center_box"] = (
+                    -1 * generation_kwargs["center_box"],
+                    generation_kwargs["center_box"],
+                )
             return load_sklearn_synthetic_data(
                 function_name=source,
                 input_kwargs=generation_kwargs,
diff --git a/sklbench/datasets/common.py b/sklbench/datasets/common.py
index e7ed0160..28b62fe6 100644
--- a/sklbench/datasets/common.py
+++ b/sklbench/datasets/common.py
@@ -136,11 +136,11 @@ def cache_wrapper(**kwargs):
         data_name = kwargs["data_name"]
         data_cache = kwargs["data_cache"]
         if len(get_filenames_by_prefix(data_cache, data_name)) > 0:
-            logger.info(f'Loading "{data_name}" dataset from cache files')
+            # logger.info(f'Loading "{data_name}" dataset from cache files')
             data = load_data_from_cache(data_cache, data_name)
             data_desc = load_data_description(data_cache, data_name)
         else:
-            logger.info(f'Loading "{data_name}" dataset from scratch')
+            # logger.info(f'Loading "{data_name}" dataset from scratch')
             data, data_desc = function(**kwargs)
             save_data_to_cache(data, data_cache, data_name)
             save_data_description(data_desc, data_cache, data_name)
diff --git a/sklbench/datasets/transformer.py b/sklbench/datasets/transformer.py
index d2e63e9e..c63d3b20 100644
--- a/sklbench/datasets/transformer.py
+++ b/sklbench/datasets/transformer.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 # ===============================================================================
 
+import math
 import os
 
 import numpy as np
@@ -109,7 +110,23 @@ def split_and_transform_data(bench_case, data, data_description):
         y_train, y_test = None, None
 
     distributed_split = get_bench_case_value(bench_case, "data:distributed_split", None)
-    if distributed_split == "rank_based":
+    # knn_split_train = (
+    #     "KNeighbors" in get_bench_case_value(bench_case, "algorithm:estimator", "")
+    #     and int(get_bench_case_value(bench_case, "bench:mpi_params:n", 1)) > 1
+    # )
+    if distributed_split == "sample_shift":
+        from mpi4py import MPI
+
+        rank = MPI.COMM_WORLD.Get_rank()
+        # This approach was chosen to shift the distribution of synthetic data on each rank
+        # for KMeans weak scaling tests. When testing with a large number of tiles, this method avoids duplication of data on each rank.
+        # For example, if there are 24,576 tiles being used, each data point in the 24,576th tile would be multiplied by 1.47.
+        # The factor 0.003 was chosen arbitrarily and can be fine-tuned for other datasets and algorithms if needed.
+        adjust_number = (math.sqrt(rank) * 0.003) + 1
+        x_test = x_test * adjust_number
+        x_train = x_train * adjust_number
+
+    elif distributed_split == "rank_based":
         from mpi4py import MPI
 
         comm = MPI.COMM_WORLD
@@ -129,10 +146,12 @@ def split_and_transform_data(bench_case, data, data_description):
                 x_train[train_start:train_end],
                 y_train[train_start:train_end],
             )
-            x_test, y_test = x_test[test_start:test_end], y_test[test_start:test_end]
+            if distributed_split == "rank_based":
+                x_test, y_test = x_test[test_start:test_end], y_test[test_start:test_end]
         else:
             x_train = x_train[train_start:train_end]
-            x_test = x_test[test_start:test_end]
+            if distributed_split == "rank_based":
+                x_test = x_test[test_start:test_end]
 
     device = get_bench_case_value(bench_case, "algorithm:device", None)
     common_data_format = get_bench_case_value(bench_case, "data:format", "pandas")
@@ -178,7 +197,7 @@ def split_and_transform_data(bench_case, data, data_description):
                 "format": data_format,
                 "order": data_order,
                 "dtype": data_dtype,
-                "samples": converted_data.shape[0],
+                "samples (per rank)": converted_data.shape[0],
             }
             if len(converted_data.shape) == 2 and converted_data.shape[1] > 1:
                 data_description[subset_name]["features"] = converted_data.shape[1]
diff --git a/sklbench/report/implementation.py b/sklbench/report/implementation.py
index 28fa2bb0..2bc3a05e 100644
--- a/sklbench/report/implementation.py
+++ b/sklbench/report/implementation.py
@@ -16,7 +16,7 @@
 
 import argparse
 import json
-from typing import Dict, List
+from typing import Dict, Hashable, List
 
 import openpyxl as xl
 import pandas as pd
@@ -32,6 +32,9 @@
 METRICS = {
     "lower is better": [
         "time[ms]",
+        "first iter[ms]",
+        "box filter mean[ms]",
+        "box filter std[ms]",
         "iterations",
         # classification
         "logloss",
@@ -239,6 +242,7 @@ def get_result_tables_as_df(
     bench_cases = pd.DataFrame(
         [flatten_dict(bench_case) for bench_case in results["bench_cases"]]
     )
+    bench_cases = bench_cases.map(lambda x: str(x) if not isinstance(x, Hashable) else x)
 
     if compatibility_mode:
         bench_cases = transform_results_to_compatible(bench_cases)
@@ -248,7 +252,7 @@ def get_result_tables_as_df(
             bench_cases.drop(columns=[column], inplace=True)
             diffby_columns.remove(column)
 
-    return split_df_by_columns(bench_cases, splitby_columns)
+    return split_df_by_columns(bench_cases, splitby_columns, False)
 
 
 def get_summary_from_df(df: pd.DataFrame, df_name: str) -> pd.DataFrame:
@@ -258,7 +262,10 @@ def get_summary_from_df(df: pd.DataFrame, df_name: str) -> pd.DataFrame:
             # only relative improvements are included in summary currently
             if len(column) > 1 and column[1] == f"{metric_name} relative improvement":
                 metric_columns.append(column)
-    summary = df[metric_columns].aggregate(geomean_wrapper, axis=0).to_frame().T
+    if metric_columns:
+        summary = df[metric_columns].aggregate(geomean_wrapper, axis=0).to_frame().T
+    else:
+        summary = pd.DataFrame()
     summary.index = pd.Index([df_name])
     return summary
 
diff --git a/sklbench/runner/commands_helper.py b/sklbench/runner/commands_helper.py
index 09e61369..aace5643 100644
--- a/sklbench/runner/commands_helper.py
+++ b/sklbench/runner/commands_helper.py
@@ -45,6 +45,10 @@ def generate_benchmark_command(
         mpi_prefix = "mpirun"
         for mpi_param_name, mpi_param_value in mpi_params.items():
             mpi_prefix += f" -{mpi_param_name} {mpi_param_value}"
+            if mpi_param_name == "-hostfile":
+                import os
+
+                mpi_prefix += os.environ.get("PBS_NODEFILE")
         command_prefix = f"{mpi_prefix} {command_prefix}"
     # 3. Intel(R) VTune* profiling command prefix
     vtune_profiling = get_bench_case_value(bench_case, "bench:vtune_profiling")
diff --git a/sklbench/utils/logger.py b/sklbench/utils/logger.py
index 90940630..250c5fa6 100644
--- a/sklbench/utils/logger.py
+++ b/sklbench/utils/logger.py
@@ -19,7 +19,7 @@
 logger = logging.Logger("sklbench")
 
 logging_channel = logging.StreamHandler()
-logging_formatter = logging.Formatter("%(levelname)s:%(name)s: %(message)s")
+logging_formatter = logging.Formatter("%(asctime)s - %(levelname)s:%(name)s: %(message)s")
 logging_channel.setFormatter(logging_formatter)
 
 logger.addHandler(logging_channel)
diff --git a/sklbench/utils/measurement.py b/sklbench/utils/measurement.py
index 989daefd..3677e760 100644
--- a/sklbench/utils/measurement.py
+++ b/sklbench/utils/measurement.py
@@ -40,6 +40,22 @@ def box_filter(timing, left=0.2, right=0.8):
     return np.mean(result) * 1000, np.std(result) * 1000
 
 
+def large_scale_measurements(timing):
+    first_iter = timing[0] * 1000
+    mean = np.mean(timing[1:]) * 1000
+    stdev = np.std(timing[1:]) * 1000
+    timing_sorted = np.sort(timing)
+    Q1, Q3 = np.percentile(timing_sorted, [25, 75])
+    IQ = Q3 - Q1
+    lower, upper = Q1 - 1.5 * IQ, Q3 + 1.5 * IQ
+
+    filtered_times = timing_sorted[(timing_sorted >= lower) & (timing_sorted <= upper)]
+
+    box_filter_mean = np.mean(filtered_times) * 1000 if filtered_times.size > 0 else 0
+    box_filter_stdev = np.std(filtered_times) * 1000 if filtered_times.size > 0 else 0
+    return mean, stdev, first_iter, box_filter_mean, box_filter_stdev
+
+
 def measure_time(
     func,
     *args,
@@ -56,12 +72,16 @@ def measure_time(
         )
     times = []
     func_return_value = None
+    inners, iters = [], []
     while len(times) < n_runs:
         if enable_itt and itt_is_available:
             itt.resume()
         t0 = timeit.default_timer()
         func_return_value = func(*args, **kwargs)
         t1 = timeit.default_timer()
+        if hasattr(func, "__self__") and hasattr(func.__self__, "_n_inner_iter"):
+            inners.append(func.__self__._n_inner_iter)
+            iters.append(func.__self__.n_iter_)
         if enable_itt and itt_is_available:
             itt.pause()
         times.append(t1 - t0)
@@ -72,13 +92,27 @@ def measure_time(
                 f"exceeded time limit ({time_limit} seconds)"
             )
             break
-    mean, std = box_filter(times)
-    if std / mean > std_mean_ratio:
-        logger.warning(
-            f'Measured "std / mean" time ratio of "{str(func)}" function is higher '
-            f"than threshold ({round(std / mean, 3)} vs. {std_mean_ratio})"
-        )
-    return mean, std, func_return_value
+
+    try:
+        from mpi4py import MPI
+
+        if MPI.COMM_WORLD.Get_rank() == 0:
+            logger.debug(
+                "iters across n runs: "
+                + str(iters)
+                + ", inner iters across n runs: "
+                + str(inners)
+            )
+            logger.debug(f"Runtime for all {n_runs} iterations: {times}")
+    except ModuleNotFoundError:
+        logger.debug(f"Runtime for all {n_runs} iterations: {times}")
+    # mean, std = box_filter(times)
+    # if std / mean > std_mean_ratio:
+    #    logger.warning(
+    #        f'Measured "std / mean" time ratio of "{str(func)}" function is higher '
+    #        f"than threshold ({round(std / mean, 3)} vs. {std_mean_ratio})"
+    #    )
+    return large_scale_measurements(times)
 
 
 # wrapper to get measurement params from benchmarking case