Skip to content

Commit 1d48f3a

Browse files
committed
remove batch_size logic from incremental benchmarking for num_batches
1 parent 66d977d commit 1d48f3a

File tree

3 files changed

+26
-33
lines changed

3 files changed

+26
-33
lines changed

configs/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ Configs have the three highest parameter keys:
117117
|:---------------|:--------------|:--------|:------------|
118118
| `algorithm`:`estimator` | None | | Name of measured estimator. |
119119
| `algorithm`:`estimator_params` | Empty `dict` | | Parameters for estimator constructor. |
120+
| `algorithm`:`training`:`num_batches` | 5 | | Number of batches to benchmark `partial_fit` function, using batches the size of number of samples specified (not samples divided by `num_batches`). For incremental estimators only. |
120121
| `algorithm`:`online_inference_mode` | False | | Enables online mode for inference methods of estimator (separate call for each sample). |
121122
| `algorithm`:`sklearn_context` | None | | Parameters for sklearn `config_context` used over estimator. |
122123
| `algorithm`:`sklearnex_context` | None | | Parameters for sklearnex `config_context` used over estimator. Updated by `sklearn_context` if set. |

sklbench/benchmarks/sklearn_estimator.py

Lines changed: 24 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -334,41 +334,33 @@ def verify_patching(stream: io.StringIO, function_name) -> bool:
334334
return acceleration_lines > 0 and fallback_lines == 0
335335

336336

337-
def create_online_function(
338-
estimator_instance, method_instance, data_args, num_batches, batch_size
339-
):
337+
def create_online_function(estimator_instance, method_instance, data_args, num_batches):
340338

341339
if "y" in list(inspect.signature(method_instance).parameters):
342340

343341
def ndarray_function(x, y):
344342
for i in range(num_batches):
345-
method_instance(
346-
x[i * batch_size : (i + 1) * batch_size],
347-
y[i * batch_size : (i + 1) * batch_size],
348-
)
343+
method_instance(x, y)
349344
if hasattr(estimator_instance, "_onedal_finalize_fit"):
350345
estimator_instance._onedal_finalize_fit()
351346

352347
def dataframe_function(x, y):
353348
for i in range(num_batches):
354-
method_instance(
355-
x.iloc[i * batch_size : (i + 1) * batch_size],
356-
y.iloc[i * batch_size : (i + 1) * batch_size],
357-
)
349+
method_instance(x, y)
358350
if hasattr(estimator_instance, "_onedal_finalize_fit"):
359351
estimator_instance._onedal_finalize_fit()
360352

361353
else:
362354

363355
def ndarray_function(x):
364356
for i in range(num_batches):
365-
method_instance(x[i * batch_size : (i + 1) * batch_size])
357+
method_instance(x)
366358
if hasattr(estimator_instance, "_onedal_finalize_fit"):
367359
estimator_instance._onedal_finalize_fit()
368360

369361
def dataframe_function(x):
370362
for i in range(num_batches):
371-
method_instance(x.iloc[i * batch_size : (i + 1) * batch_size])
363+
method_instance(x)
372364
if hasattr(estimator_instance, "_onedal_finalize_fit"):
373365
estimator_instance._onedal_finalize_fit()
374366

@@ -423,32 +415,20 @@ def measure_sklearn_estimator(
423415
data_args = (x_train,)
424416
else:
425417
data_args = (x_test,)
418+
batch_size = get_bench_case_value(
419+
bench_case, f"algorithm:batch_size:{stage}"
420+
)
426421

427422
if method == "partial_fit":
428423
num_batches = get_bench_case_value(
429-
bench_case, f"algorithm:num_batches:{stage}"
430-
)
431-
batch_size = get_bench_case_value(
432-
bench_case, f"algorithm:batch_size:{stage}"
424+
bench_case, f"algorithm:num_batches:{stage}", 5
433425
)
434426

435-
if batch_size is None:
436-
if num_batches is None:
437-
num_batches = 5
438-
batch_size = (
439-
data_args[0].shape[0] + num_batches - 1
440-
) // num_batches
441-
if num_batches is None:
442-
num_batches = (
443-
data_args[0].shape[0] + batch_size - 1
444-
) // batch_size
445-
446427
method_instance = create_online_function(
447428
estimator_instance,
448429
method_instance,
449430
data_args,
450-
num_batches,
451-
batch_size,
431+
num_batches
452432
)
453433
# daal4py model builders enabling branch
454434
if enable_modelbuilders and stage == "inference":
@@ -465,6 +445,10 @@ def measure_sklearn_estimator(
465445
metrics[method]["time std[ms]"],
466446
_,
467447
) = measure_case(bench_case, method_instance, *data_args)
448+
if batch_size is not None:
449+
metrics[method]["throughput[samples/ms]"] = (
450+
(data_args[0].shape[0] // batch_size) * batch_size
451+
) / metrics[method]["time[ms]"]
468452
if ensure_sklearnex_patching:
469453
full_method_name = f"{estimator_class.__name__}.{method}"
470454
sklearnex_logging_stream.seek(0)
@@ -561,9 +545,16 @@ def main(bench_case: BenchCase, filters: List[BenchCase]):
561545
for stage in estimator_methods.keys():
562546
data_descs[stage].update(
563547
{
564-
"batch_size": get_bench_case_value(
565-
bench_case, f"algorithm:batch_size:{stage}"
566-
)
548+
key: val
549+
for key, val in {
550+
"batch_size": get_bench_case_value(
551+
bench_case, f"algorithm:batch_size:{stage}"
552+
),
553+
"num_batches": get_bench_case_value(
554+
bench_case, f"algorithm:num_batches:{stage}"
555+
)
556+
}.items()
557+
if val is not None
567558
}
568559
)
569560
if "n_classes" in data_description:

sklbench/report/implementation.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@
9494
"order",
9595
"n_classes",
9696
"n_clusters",
97+
"num_batches",
9798
"batch_size",
9899
]
99100

0 commit comments

Comments
 (0)