diff --git a/configs/incremental.json b/configs/incremental.json deleted file mode 100644 index e1f589a4..00000000 --- a/configs/incremental.json +++ /dev/null @@ -1,100 +0,0 @@ -{ "INCLUDE": ["./common/sklearn.json"], - "PARAMETERS_SETS": { - "common": {"bench": {"n_runs": 10, "time_limit": 60}}, - "covariance data": { - "data": [ - { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 12000000, - "n_features": [10, 100] - }, - "split_kwargs": {"ignore": true} - } - ] - }, - "basic_statistics data": { - "data": { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 12000000, - "n_features": [10, 100] - }, - "split_kwargs": {"ignore": true} - } - }, - "linear_regression data": { - "data": { - "source": "make_regression", - "split_kwargs": {"train_size": 0.2, "test_size": 0.8}, - "generation_kwargs": { - "n_samples": 12000000, - "n_features": [10, 100], - "n_informative": 5, - "noise": 2.0 - } - } - }, - "pca data": { - "data": { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 12000000, - "n_features": [10, 100] - }, - "split_kwargs": {"ignore": true} - } - }, - "covariance": { - "algorithm": [ - { - "estimator": "IncrementalEmpiricalCovariance", - "library": "sklearnex.covariance", - "estimator_methods": {"training": "partial_fit"}, - "num_batches": {"training": 12} - } - ] - }, - "basic_statistics": { - "algorithm": [ - { - "estimator": "IncrementalBasicStatistics", - "library": "sklearnex.basic_statistics", - "estimator_methods": {"training": "partial_fit"}, - "num_batches": {"training": 12} - } - ] - }, - "linear_regression": { - "algorithm": [ - { - "estimator": "IncrementalLinearRegression", - "library": "sklearnex.linear_model", - "estimator_methods": {"training": "partial_fit"}, - "num_batches": {"training": 12} - } - ] - }, - "pca": { - "algorithm": [ - { - "estimator": "IncrementalPCA", - "library": "sklearnex.preview.decomposition", - "estimator_methods": {"training": "partial_fit"}, - "num_batches": {"training": 12} - } - ] - } - }, - "TEMPLATES": { - "basic_statistics": {"SETS": ["common", "basic_statistics", "basic_statistics data", "sklearn-ex[gpu] implementations"]}, - "covariance": {"SETS": ["common", "covariance", "covariance data", "sklearn-ex[gpu] implementations"]}, - "linear_regression": { - "SETS": ["common", "linear_regression", "linear_regression data", "sklearn-ex[gpu] implementations"] - }, - "pca": {"SETS": ["common", "pca", "pca data", "sklearn-ex[gpu] implementations"]} - } -} diff --git a/configs/spmd/large_scale/incremental.json b/configs/spmd/large_scale/incremental.json new file mode 100644 index 00000000..195074ee --- /dev/null +++ b/configs/spmd/large_scale/incremental.json @@ -0,0 +1,77 @@ +{ "INCLUDE": [ ], + "PARAMETERS_SETS": { + "common incremental raw gpu params": { + "algorithm": { + "device": "gpu", + "sklearnex_context": { "use_raw_input": true } + }, + "data": { + "format":"dpctl", + "order": "C" + } + }, + "statistical batches and data": [ + { "algorithm": { "num_batches": { "training": [1,2,6,12]} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000000, "n_features": 10, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": 2} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 50000000, "n_features": 10, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": 6} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 16666667, "n_features": 10, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": 12} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 8333333, "n_features": 10, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": [1,2,6,12]} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 1000, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": 2} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 500000, "n_features": 1000, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": 6} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 166667, "n_features": 1000, "centers": 1 } } }, + { "algorithm": { "num_batches": { "training": 12} }, "data": { "source": "make_blobs", "generation_kwargs": { "n_samples": 83333, "n_features": 1000, "centers": 1 } } } + ], + "regression batches and data": [ + { "algorithm": { "num_batches": { "training": [1,2,6,12]} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 100000000, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": 2} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 50000000, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": 6} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 16666667, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": 12} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 100005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 8333333, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": [1,2,6,12]} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 3000000, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": 2} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 1500000, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": 6} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 500000, "test_size": 5000 } } }, + { "algorithm": { "num_batches": { "training": 12} }, "data": { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 250000, "test_size": 5000 } } } + ], + "covariance": { + "algorithm": { + "estimator": "IncrementalEmpiricalCovariance", + "library": "sklearnex", + "estimator_methods": {"training": "partial_fit"} + }, + "data": { + "split_kwargs": { "test_size": 0.0001 } + } + }, + "basic_statistics": { + "algorithm": { + "estimator": "IncrementalBasicStatistics", + "library": "sklearnex", + "estimator_methods": {"training": "partial_fit"} + }, + "data": { + "split_kwargs": { "test_size": 0.0001 } + } + }, + "linear_regression": { + "algorithm": { + "estimator": "IncrementalLinearRegression", + "library": "sklearnex", + "estimator_methods": {"training": "partial_fit"} + } + }, + "pca": { + "algorithm": { + "estimator": "IncrementalPCA", + "library": "sklearnex.preview", + "estimator_methods": {"training": "partial_fit"} + }, + "data": { + "split_kwargs": { "test_size": 0.0001 } + } + } + }, + "TEMPLATES": { + "basic_statistics": { "SETS": ["common incremental raw gpu params", "basic_statistics", "statistical batches and data"] }, + "covariance": { "SETS": ["common incremental raw gpu params", "covariance", "statistical batches and data"] }, + "linear_regression": { "SETS": ["common incremental raw gpu params", "linear_regression", "regression batches and data"] }, + "pca": { "SETS": ["common incremental raw gpu params", "pca", "statistical batches and data"] } + } +} diff --git a/configs/spmd/large_scale/incremental/basic_stats.json b/configs/spmd/large_scale/incremental/basic_stats.json deleted file mode 100644 index ca9e3eb9..00000000 --- a/configs/spmd/large_scale/incremental/basic_stats.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "INCLUDE": ["../../../common/sklearn.json", "../../../spmd/stats_covariance.json", "../large_scale.json"], - "PARAMETERS_SETS": { - "spmd basicstats parameters": { - "algorithm": { - "estimator": "IncrementalBasicStatistics", - "estimator_methods": { "training": "fit" }, - "num_batches": {"training": 10} - }, - "data": { - "split_kwargs": { "test_size": 0.0001 } - } - }, - "synthetic data": { - "data": [ - { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } } - ] - } - }, - "TEMPLATES": { - "basicstats": { - "SETS": [ - "sklearnex spmd implementation", - "large scale 32 parameters", - "synthetic data", - "spmd basicstats parameters" - ] - } - } -} diff --git a/configs/spmd/large_scale/incremental/covariance.json b/configs/spmd/large_scale/incremental/covariance.json deleted file mode 100644 index 04fcd76b..00000000 --- a/configs/spmd/large_scale/incremental/covariance.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "INCLUDE": ["../../../common/sklearn.json", "../../../spmd/stats_covariance.json", "../large_scale.json"], - "PARAMETERS_SETS": { - "spmd covariance parameters": { - "algorithm": { - "estimator": "IncrementalEmpiricalCovariance", - "estimator_methods": { "training": "fit" }, - "num_batches": {"training": 10} - }, - "data": { - "split_kwargs": { "test_size": 0.0001 } - } - }, - "synthetic data": { - "data": [ - { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } } - ] - } - }, - "TEMPLATES": { - "covariance": { - "SETS": [ - "sklearnex spmd implementation", - "large scale 32 parameters", - "synthetic data", - "spmd covariance parameters" - ] - } - } -} diff --git a/configs/spmd/large_scale/incremental/linear_model.json b/configs/spmd/large_scale/incremental/linear_model.json deleted file mode 100644 index a483f613..00000000 --- a/configs/spmd/large_scale/incremental/linear_model.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "INCLUDE": ["../../../common/sklearn.json", "../../../regular/linear_model.json", "../large_scale.json"], - "PARAMETERS_SETS": { - "spmd linear parameters": { - "algorithm": { - "estimator": "IncrementalLinearRegression", - "estimator_methods": { "training": "fit" }, - "num_batches": {"training": 10} - } - }, - "synthetic data": { - "data": [ - { "source": "make_regression", "generation_kwargs": { "n_samples": 305000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 300000, "test_size": 5000 } } - ] - } - }, - "TEMPLATES": { - "linreg": { - "SETS": [ - "sklearnex spmd implementation", - "large scale 32 parameters", - "synthetic data", - "spmd linear parameters" - ] - } - } -} diff --git a/configs/spmd/large_scale/incremental/pca.json b/configs/spmd/large_scale/incremental/pca.json deleted file mode 100644 index 11fa5125..00000000 --- a/configs/spmd/large_scale/incremental/pca.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "INCLUDE": ["../../../common/sklearn.json", "../../../regular/pca.json", "../large_scale.json"], - "PARAMETERS_SETS": { - "spmd pca parameters": { - "algorithm": { - "estimator": "IncrementalPCA", - "estimator_methods": { "training": "fit", "inference": "" }, - "num_batches": {"training": 10} - }, - "data": { - "split_kwargs": { "test_size": 0.0001 } - } - }, - "synthetic data": { - "data": [ - { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } } - ] - } - }, - "TEMPLATES": { - "linreg": { - "SETS": [ - "sklearnex spmd implementation", - "large scale 32 parameters", - "synthetic data", - "spmd pca parameters" - ] - } - } -} diff --git a/configs/spmd/large_scale/spmd_for_online.json b/configs/spmd/large_scale/spmd_for_online.json deleted file mode 100644 index 2ef60f5b..00000000 --- a/configs/spmd/large_scale/spmd_for_online.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], - "PARAMETERS_SETS": { - "covariance data": { - "data": [ - { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 1000000, - "n_features": [10, 100] - }, - "split_kwargs": {"ignore": true} - } - ] - }, - "basic_statistics data": { - "data": { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 1000000, - "n_features": [10, 100] - }, - "split_kwargs": {"ignore": true} - } - }, - "linear_regression data": { - "data": { - "source": "make_regression", - "split_kwargs": {"train_size": 0.2, "test_size": 0.8}, - "generation_kwargs": { - "n_samples": 1000000, - "n_features": [10, 100], - "n_informative": 5, - "noise": 2.0 - } - } - }, - "pca data": { - "data": { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 1000000, - "n_features": [10, 100] - }, - "split_kwargs": {"ignore": true} - } - }, - "basic_statistics": { - "algorithm": [ - { - "estimator": "BasicStatistics", - "library": "sklearnex.spmd", - "estimator_methods": {"training": "fit"} - } - ] - }, - "covariance": { - "algorithm": [ - { - "estimator": "EmpiricalCovariance", - "library": "sklearnex.spmd", - "estimator_methods": {"training": "fit"} - } - ] - }, - "linear_regression": { - "algorithm": [ - { - "estimator": "LinearRegression", - "library": "sklearnex.spmd", - "estimator_methods": {"training": "fit"} - } - ] - }, - "pca": { - "algorithm": [ - { - "estimator": "PCA", - "library": "sklearnex.spmd", - "estimator_methods": {"training": "fit", "inference": ""} - } - ] - } - }, - "TEMPLATES": { - "basic_statistics": {"SETS": ["basic_statistics", "basic_statistics data", "sklearnex spmd implementation", "large scale full one node parameters"]}, - "covariance": {"SETS": ["covariance", "covariance data", "sklearnex spmd implementation", "large scale full one node parameters"]}, - "linear_regression": { - "SETS": ["linear_regression", "linear_regression data", "sklearnex spmd implementation", "large scale full one node parameters"] - }, - "pca": {"SETS": ["pca", "pca data", "sklearnex spmd implementation", "large scale full one node parameters"]} - } -} diff --git a/configs/spmd/large_scale/spmd_for_online_strong.json b/configs/spmd/large_scale/spmd_for_online_strong.json deleted file mode 100644 index 77a25075..00000000 --- a/configs/spmd/large_scale/spmd_for_online_strong.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], - "PARAMETERS_SETS": { - "covariance data": { - "data": [ - { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 12000000, - "n_features": [10, 100] - }, - "split_kwargs": {"ignore": true} - } - ] - }, - "basic_statistics data": { - "data": { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 12000000, - "n_features": [10, 100] - }, - "split_kwargs": {"ignore": true} - } - }, - "linear_regression data": { - "data": { - "source": "make_regression", - "split_kwargs": {"train_size": 0.2, "test_size": 0.8}, - "generation_kwargs": { - "n_samples": 12000000, - "n_features": [10, 100], - "n_informative": 5, - "noise": 2.0 - } - } - }, - "pca data": { - "data": { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 12000000, - "n_features": [10, 100] - }, - "split_kwargs": {"ignore": true} - } - } - }, - "TEMPLATES": { - "basic_statistics": {"SETS": ["basic_statistics data", "spmd default parameters", "sklearnex spmd implementation", "large scale strong full one node parameters"]}, - "covariance": {"SETS": ["covariance data", "spmd default parameters","sklearnex spmd implementation", "large scale strong full one node parameters"]}, - "linear_regression": { - "SETS": ["linear_regression data", "spmd default parameters", "sklearnex spmd implementation", "large scale strong full one node parameters"] - }, - "pca": {"SETS": ["pca data", "spmd default parameters", "sklearnex spmd implementation", "large scale strong full one node parameters"]} - } -} diff --git a/sklbench/utils/measurement.py b/sklbench/utils/measurement.py index bfabbdc0..6d251982 100644 --- a/sklbench/utils/measurement.py +++ b/sklbench/utils/measurement.py @@ -92,16 +92,20 @@ def measure_time( f"exceeded time limit ({time_limit} seconds)" ) break - from mpi4py import MPI - - if MPI.COMM_WORLD.Get_rank() == 0: - logger.debug( - "iters across n runs: " - + str(iters) - + ", inner iters across n runs: " - + str(inners) - ) - logger.debug(times) + + try: + from mpi4py import MPI + + if MPI.COMM_WORLD.Get_rank() == 0: + logger.debug( + "iters across n runs: " + + str(iters) + + ", inner iters across n runs: " + + str(inners) + ) + logger.debug(times) + except ModuleNotFoundError: + pass # mean, std = box_filter(times) # if std / mean > std_mean_ratio: # logger.warning( diff --git a/test-configuration-linux.yml b/test-configuration-linux.yml index 722d1008..a37769ce 100644 --- a/test-configuration-linux.yml +++ b/test-configuration-linux.yml @@ -45,11 +45,6 @@ steps: conda activate bench-env python -m sklbench --report -l DEBUG --report -c configs/sklearn_example.json displayName: Sklearn example run - - script: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate bench-env - python -m sklbench --report -l DEBUG --report -c configs/incremental.json - displayName: Incremental algorithms example run - script: | source /usr/share/miniconda/etc/profile.d/conda.sh conda activate bench-env diff --git a/test-configuration-win.yml b/test-configuration-win.yml index f3ac1595..a1eddaeb 100644 --- a/test-configuration-win.yml +++ b/test-configuration-win.yml @@ -43,10 +43,6 @@ steps: call activate bench-env python -m sklbench --report -l DEBUG --report -c configs/sklearn_example.json displayName: Sklearn example run - - script: | - call activate bench-env - python -m sklbench --report -l DEBUG --report -c configs/incremental.json - displayName: Incremental algorithms example run - script: | call activate bench-env python -m sklbench --report -l DEBUG --report -c configs/xgboost_example.json