Skip to content

Commit 535c1e4

Browse files
committed
Add incremental algorithms support
1 parent 328fcaf commit 535c1e4

File tree

5 files changed

+144
-10
lines changed

5 files changed

+144
-10
lines changed

configs/incremental.json

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
{
2+
"PARAMETERS_SETS": {
3+
"common": {"bench": {"n_runs": 10, "time_limit": 60}},
4+
"covariance data": {
5+
"data": [
6+
{
7+
"source": "make_blobs",
8+
"generation_kwargs": {
9+
"centers": 1,
10+
"n_samples": 1000,
11+
"n_features": [16, 64]
12+
},
13+
"split_kwargs": {"ignore": true}
14+
}
15+
]
16+
},
17+
"basic_statistics data": {
18+
"data": {
19+
"source": "make_blobs",
20+
"generation_kwargs": {
21+
"centers": 1,
22+
"n_samples": 10000,
23+
"n_features": [16, 64]
24+
},
25+
"split_kwargs": {"ignore": true}
26+
}
27+
},
28+
"linear_regression data": {
29+
"data": {
30+
"source": "make_regression",
31+
"split_kwargs": {"train_size": 0.2, "test_size": 0.8},
32+
"generation_kwargs": {
33+
"n_samples": 5000,
34+
"n_features": [40, 100],
35+
"n_informative": 5,
36+
"noise": 2.0
37+
}
38+
}
39+
},
40+
"pca data": {
41+
"data": {
42+
"source": "make_blobs",
43+
"generation_kwargs": {
44+
"centers": 1,
45+
"n_samples": 1000,
46+
"n_features": [16, 64]
47+
},
48+
"split_kwargs": {"ignore": true}
49+
}
50+
},
51+
"covariance": {
52+
"algorithm": [
53+
{
54+
"estimator": "IncrementalEmpiricalCovariance",
55+
"library": "sklearnex.covariance",
56+
"estimator_methods": {"training": "partial_fit"},
57+
"num_batches": {"training": 2}
58+
}
59+
]
60+
},
61+
"basic_statistics": {
62+
"algorithm": [
63+
{
64+
"estimator": "IncrementalBasicStatistics",
65+
"library": "sklearnex.basic_statistics",
66+
"num_batches": {"training": 2}
67+
}
68+
]
69+
},
70+
"linear_regression": {
71+
"algorithm": [
72+
{
73+
"estimator": "IncrementalLinearRegression",
74+
"library": "sklearnex.linear_model",
75+
"num_batches": {"training": 2}
76+
}
77+
]
78+
},
79+
"pca": {
80+
"algorithm": [
81+
{
82+
"estimator": "IncrementalPCA",
83+
"library": "sklearnex.preview.decomposition",
84+
"num_batches": {"training": 2}
85+
}
86+
]
87+
}
88+
},
89+
"TEMPLATES": {
90+
"covariance": {"SETS": ["common", "covariance", "covariance data"]},
91+
"basic_statistics": {
92+
"SETS": ["common", "basic_statistics", "basic_statistics data"]
93+
},
94+
"linear_regression": {
95+
"SETS": ["common", "linear_regression", "linear_regression data"]
96+
},
97+
"pca": {"SETS": ["common", "pca", "pca data"]}
98+
}
99+
}

sklbench/benchmarks/sklearn_estimator.py

+29-7
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def get_estimator(library_name: str, estimator_name: str):
7474
def get_estimator_methods(bench_case: BenchCase) -> Dict[str, List[str]]:
7575
# default estimator methods
7676
estimator_methods = {
77-
"training": ["fit"],
77+
"training": ["partial_fit", "fit"],
7878
"inference": ["predict", "predict_proba", "transform"],
7979
}
8080
for stage in estimator_methods.keys():
@@ -334,7 +334,9 @@ def verify_patching(stream: io.StringIO, function_name) -> bool:
334334
return acceleration_lines > 0 and fallback_lines == 0
335335

336336

337-
def create_online_function(method_instance, data_args, batch_size):
337+
def create_online_function(
338+
estimator_instance, method_instance, data_args, num_batches, batch_size
339+
):
338340
n_batches = data_args[0].shape[0] // batch_size
339341

340342
if "y" in list(inspect.signature(method_instance).parameters):
@@ -345,23 +347,27 @@ def ndarray_function(x, y):
345347
x[i * batch_size : (i + 1) * batch_size],
346348
y[i * batch_size : (i + 1) * batch_size],
347349
)
350+
estimator_instance._onedal_finalize_fit()
348351

349352
def dataframe_function(x, y):
350353
for i in range(n_batches):
351354
method_instance(
352355
x.iloc[i * batch_size : (i + 1) * batch_size],
353356
y.iloc[i * batch_size : (i + 1) * batch_size],
354357
)
358+
estimator_instance._onedal_finalize_fit()
355359

356360
else:
357361

358362
def ndarray_function(x):
359363
for i in range(n_batches):
360364
method_instance(x[i * batch_size : (i + 1) * batch_size])
365+
estimator_instance._onedal_finalize_fit()
361366

362367
def dataframe_function(x):
363368
for i in range(n_batches):
364369
method_instance(x.iloc[i * batch_size : (i + 1) * batch_size])
370+
estimator_instance._onedal_finalize_fit()
365371

366372
if "ndarray" in str(type(data_args[0])):
367373
return ndarray_function
@@ -414,12 +420,28 @@ def measure_sklearn_estimator(
414420
data_args = (x_train,)
415421
else:
416422
data_args = (x_test,)
417-
batch_size = get_bench_case_value(
418-
bench_case, f"algorithm:batch_size:{stage}"
419-
)
420-
if batch_size is not None:
423+
424+
if method == "partial_fit":
425+
num_batches = get_bench_case_value(bench_case, "data:num_batches")
426+
batch_size = get_bench_case_value(bench_case, "data:batch_size")
427+
428+
if batch_size is None:
429+
if num_batches is None:
430+
num_batches = 5
431+
batch_size = (
432+
data_args[0].shape[0] + num_batches - 1
433+
) // num_batches
434+
if num_batches is None:
435+
num_batches = (
436+
data_args[0].shape[0] + batch_size - 1
437+
) // batch_size
438+
421439
method_instance = create_online_function(
422-
method_instance, data_args, batch_size
440+
estimator_instance,
441+
method_instance,
442+
data_args,
443+
num_batches,
444+
batch_size,
423445
)
424446
# daal4py model builders enabling branch
425447
if enable_modelbuilders and stage == "inference":

sklbench/report/implementation.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import argparse
1818
import json
19-
from typing import Dict, List
19+
from typing import Dict, Hashable, List
2020

2121
import openpyxl as xl
2222
import pandas as pd
@@ -239,6 +239,7 @@ def get_result_tables_as_df(
239239
bench_cases = pd.DataFrame(
240240
[flatten_dict(bench_case) for bench_case in results["bench_cases"]]
241241
)
242+
bench_cases = bench_cases.map(lambda x: str(x) if not isinstance(x, Hashable) else x)
242243

243244
if compatibility_mode:
244245
bench_cases = transform_results_to_compatible(bench_cases)
@@ -248,7 +249,7 @@ def get_result_tables_as_df(
248249
bench_cases.drop(columns=[column], inplace=True)
249250
diffby_columns.remove(column)
250251

251-
return split_df_by_columns(bench_cases, splitby_columns)
252+
return split_df_by_columns(bench_cases, splitby_columns, False)
252253

253254

254255
def get_summary_from_df(df: pd.DataFrame, df_name: str) -> pd.DataFrame:
@@ -258,7 +259,10 @@ def get_summary_from_df(df: pd.DataFrame, df_name: str) -> pd.DataFrame:
258259
# only relative improvements are included in summary currently
259260
if len(column) > 1 and column[1] == f"{metric_name} relative improvement":
260261
metric_columns.append(column)
261-
summary = df[metric_columns].aggregate(geomean_wrapper, axis=0).to_frame().T
262+
if metric_columns:
263+
summary = df[metric_columns].aggregate(geomean_wrapper, axis=0).to_frame().T
264+
else:
265+
summary = pd.DataFrame()
262266
summary.index = pd.Index([df_name])
263267
return summary
264268

test-configuration-linux.yml

+5
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ steps:
4545
conda activate bench-env
4646
python -m sklbench --report -l DEBUG --report -c configs/sklearn_example.json
4747
displayName: Sklearn example run
48+
- script: |
49+
source /usr/share/miniconda/etc/profile.d/conda.sh
50+
conda activate bench-env
51+
python -m sklbench --report -l DEBUG --report -c configs/incremental.json
52+
displayName: Incremental algorithms example run
4853
- script: |
4954
source /usr/share/miniconda/etc/profile.d/conda.sh
5055
conda activate bench-env

test-configuration-win.yml

+4
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ steps:
4343
call activate bench-env
4444
python -m sklbench --report -l DEBUG --report -c configs/sklearn_example.json
4545
displayName: Sklearn example run
46+
- script: |
47+
call activate bench-env
48+
python -m sklbench --report -l DEBUG --report -c configs/incremental.json
49+
displayName: Incremental algorithms example run
4650
- script: |
4751
call activate bench-env
4852
python -m sklbench --report -l DEBUG --report -c configs/xgboost_example.json

0 commit comments

Comments
 (0)