diff --git a/docs/examples/plot_benchmark_tree.py b/docs/examples/plot_benchmark_tree.py new file mode 100644 index 000000000..c0ca824b8 --- /dev/null +++ b/docs/examples/plot_benchmark_tree.py @@ -0,0 +1,372 @@ +""" +.. _l-plot-optim-tree-ensemble: + +TreeEnsemble optimization +========================= + +:: + + export PYTHONPATH=~/github/onnxruntime/build/linux_cpu/Release/:$PYTHONPATH + +""" + +import logging +import os +import timeit +from typing import Tuple +import numpy +import onnx +from onnx.helper import make_graph, make_model +from onnx.reference import ReferenceEvaluator +from pandas import DataFrame +from sklearn.datasets import make_regression +from sklearn.ensemble import RandomForestRegressor +from skl2onnx import to_onnx +from onnxruntime import InferenceSession, SessionOptions +from onnx_array_api.plotting.text_plot import onnx_simple_text_plot +from onnx_extended.reference import CReferenceEvaluator +from onnx_extended.ortops.optim.cpu import get_ort_ext_libs +from onnx_extended.ortops.optim.optimize import optimize_model +from onnx_extended.tools.onnx_nodes import multiply_tree +from onnx_extended.args import get_parsed_args +from onnx_extended.ext_test_case import unit_test_going +from onnx_extended.plotting.benchmark import hhistograms + +logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR) + +script_args = get_parsed_args( + "plot_benchmark_tree", + description=__doc__, + scenarios={ + "SHORT": "short optimization (default)", + "LONG": "test more options", + "CUSTOM": "use values specified by the command line", + }, + n_features=(2 if unit_test_going() else 5, "number of features to generate"), + n_trees=(3 if unit_test_going() else 10, "number of trees to train"), + max_depth=(2 if unit_test_going() else 5, "max_depth"), + batch_size=(1000 if unit_test_going() else 10000, "batch size"), + parallel_tree=("80,160,40", "values to try for parallel_tree"), + parallel_tree_N=("256,128,64", "values to try for parallel_tree_N"), + parallel_N=("100,50,25", "values to try for parallel_N"), + expose="", + n_jobs=("-1", "number of jobs to train the RandomForestRegressor"), +) + + +################################ +# Training a model +# ++++++++++++++++ + + +def train_model( + batch_size: int, n_features: int, n_trees: int, max_depth: int +) -> Tuple[str, numpy.ndarray, numpy.ndarray]: + filename = f"plot_op_tree_ensemble_optim-f{n_features}-{n_trees}-d{max_depth}.onnx" + if not os.path.exists(filename): + X, y = make_regression( + batch_size + max(batch_size, 2 ** (max_depth + 1)), + n_features=n_features, + n_targets=1, + ) + print(f"Training to get {filename!r} with X.shape={X.shape}") + X, y = X.astype(numpy.float32), y.astype(numpy.float32) + # To be faster, we train only 1 tree. + model = RandomForestRegressor( + 1, max_depth=max_depth, verbose=2, n_jobs=int(script_args.n_jobs) + ) + model.fit(X[:-batch_size], y[:-batch_size]) + onx = to_onnx(model, X[:1], target_opset={"": 18, "ai.onnx.ml": 3}) + + # And wd multiply the trees. + node = multiply_tree(onx.graph.node[0], n_trees) + onx = make_model( + make_graph([node], onx.graph.name, onx.graph.input, onx.graph.output), + domain=onx.domain, + opset_imports=onx.opset_import, + ir_version=onx.ir_version, + ) + + with open(filename, "wb") as f: + f.write(onx.SerializeToString()) + else: + X, y = make_regression(batch_size, n_features=n_features, n_targets=1) + X, y = X.astype(numpy.float32), y.astype(numpy.float32) + Xb, yb = X[-batch_size:].copy(), y[-batch_size:].copy() + return filename, Xb, yb + + +batch_size = script_args.batch_size +n_features = script_args.n_features +n_trees = script_args.n_trees +max_depth = script_args.max_depth + +print(f"batch_size={batch_size}") +print(f"n_features={n_features}") +print(f"n_trees={n_trees}") +print(f"max_depth={max_depth}") + +############################## +# training + +filename, Xb, yb = train_model(batch_size, n_features, n_trees, max_depth) + +print(f"Xb.shape={Xb.shape}") +print(f"yb.shape={yb.shape}") + +####################################### +# Rewrite the onnx file to use a different kernel +# +++++++++++++++++++++++++++++++++++++++++++++++ +# +# The custom kernel is mapped to a custom operator with the same name +# the attributes and domain = `"onnx_extended.ortops.optim.cpu"`. +# We call a function to do that replacement. +# First the current model. + +with open(filename, "rb") as f: + onx = onnx.load(f) +print(onnx_simple_text_plot(onx)) + +############################ +# And then the modified model. + + +def transform_model( + model: onnx.ModelProto, + parallel_tree: int, + parallel_tree_N: int, + parallel_N: int, + first=-556, +): + """ + Modifies the graph. + Attributes is unused ``nodes_hitrates_as_tensor`` by the runtime so we use + that field to specify parallelization settings. + + :param model: model proto serialized, the function makes a copy + :param parallel_tree: see https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/providers/cpu/ml/tree_ensemble_common.h#L38 + :param parallel_tree_N: see https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/providers/cpu/ml/tree_ensemble_common.h#L39 + :param parallel_N: see https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/providers/cpu/ml/tree_ensemble_common.h#L40 + :param first: -556 or -555, -556 makes onnxruntime prints out the parallelization + settings to make sure these are the expected values + :return: ModelProto + """ + onx = onnx.ModelProto() + onx.ParseFromString(model.SerializeToString()) + new_nodes = [] + for node in onx.graph.node: + if node.op_type.startswith("TreeEnsemble"): + new_atts = [] + for att in node.attribute: + if att.name.startswith("nodes_hitrates"): + continue + new_atts.append(att) + new_atts.append( + onnx.helper.make_attribute( + "nodes_hitrates_as_tensor", + onnx.numpy_helper.from_array( + numpy.array( + [first, parallel_tree, parallel_tree_N, parallel_N], + dtype=numpy.float32, + ), + name="nodes_hitrates_as_tensor", + ), + ) + ) + del node.attribute[:] + node.attribute.extend(new_atts) + new_nodes.append(node) + continue + new_nodes.append(node) + del onx.graph.node[:] + onx.graph.node.extend(new_nodes) + del onx.opset_import[:] + onx.opset_import.extend( + [onnx.helper.make_opsetid("", 18), onnx.helper.make_opsetid("ai.onnx.ml", 3)] + ) + return onx + + +print("Tranform model to add a custom node.") +onx_modified = transform_model(onx, 777, 778, 779) +print(f"Save into {filename + 'modified.onnx'!r}.") +with open(filename + "modified.onnx", "wb") as f: + f.write(onx_modified.SerializeToString()) +print("done.") +print(onnx_simple_text_plot(onx_modified)) + +####################################### +# Comparing onnxruntime and the custom kernel +# +++++++++++++++++++++++++++++++++++++++++++ + +print(f"Loading {filename!r}") +sess_ort = InferenceSession(filename, providers=["CPUExecutionProvider"]) + +r = get_ort_ext_libs() +print(f"Creating SessionOptions with {r!r}") +opts = SessionOptions() +if r is not None: + opts.register_custom_ops_library(r[0]) + +print(f"Loading modified {filename!r}") +sess_cus = InferenceSession( + onx_modified.SerializeToString(), opts, providers=["CPUExecutionProvider"] +) + +print(f"Running once with shape {Xb.shape}.") +base = sess_ort.run(None, {"X": Xb})[0] +print(f"Running modified with shape {Xb.shape}.") +got = sess_cus.run(None, {"X": Xb})[0] +print("done.") + +####################################### +# Discrepancies? + +d = numpy.abs(base - got) +ya = numpy.abs(base).mean() +print(f"Discrepancies: max={d.max() / ya}, mean={d.mean() / ya} (A={ya})") + +######################################## +# Simple verification +# +++++++++++++++++++ +# +# Baseline with onnxruntime. +t1 = timeit.timeit(lambda: sess_ort.run(None, {"X": Xb}), number=50) +print(f"baseline: {t1}") + +################################# +# The custom implementation. +t2 = timeit.timeit(lambda: sess_cus.run(None, {"X": Xb}), number=50) +print(f"new time: {t2}") + +################################# +# The same implementation but ran from the onnx python backend. +ref = CReferenceEvaluator(filename) +ref.run(None, {"X": Xb}) +t3 = timeit.timeit(lambda: ref.run(None, {"X": Xb}), number=50) +print(f"CReferenceEvaluator: {t3}") + +################################# +# The python implementation but from the onnx python backend. +if n_trees < 50: + # It is usully slow. + ref = ReferenceEvaluator(filename) + ref.run(None, {"X": Xb}) + t4 = timeit.timeit(lambda: ref.run(None, {"X": Xb}), number=5) + print(f"ReferenceEvaluator: {t4} (only 5 times instead of 50)") + + +############################################# +# Time for comparison +# +++++++++++++++++++ +# +# The custom kernel supports the same attributes as *TreeEnsembleRegressor* +# plus new ones to tune the parallelization. They can be seen in +# `tree_ensemble.cc `_. +# Let's try out many possibilities. +# The default values are the first ones. + +if unit_test_going(): + optim_params = dict( + parallel_tree=[40], # default is 80 + parallel_tree_N=[128], # default is 128 + parallel_N=[50, 25], # default is 50 + ) +elif script_args.scenario in (None, "SHORT"): + optim_params = dict( + parallel_tree=[80, 40], # default is 80 + parallel_tree_N=[128, 64], # default is 128 + parallel_N=[50, 25], # default is 50 + ) +elif script_args.scenario == "LONG": + optim_params = dict( + parallel_tree=[80, 160, 40], + parallel_tree_N=[256, 128, 64], + parallel_N=[100, 50, 25], + ) +elif script_args.scenario == "CUSTOM": + optim_params = dict( + parallel_tree=[int(i) for i in script_args.parallel_tree.split(",")], + parallel_tree_N=[int(i) for i in script_args.parallel_tree_N.split(",")], + parallel_N=[int(i) for i in script_args.parallel_N.split(",")], + ) +else: + raise ValueError( + f"Unknown scenario {script_args.scenario!r}, use --help to get them." + ) + +cmds = [] +for att, value in optim_params.items(): + cmds.append(f"--{att}={','.join(map(str, value))}") +print("Full list of optimization parameters:") +print(" ".join(cmds)) + +################################## +# Then the optimization. + + +def create_session(onx): + return InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) + + +res = optimize_model( + onx, + feeds={"X": Xb}, + transform=transform_model, + session=create_session, + baseline=lambda onx: InferenceSession( + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ), + params=optim_params, + verbose=True, + number=script_args.number, + repeat=script_args.repeat, + warmup=script_args.warmup, + sleep=script_args.sleep, + n_tries=script_args.tries, +) + +############################### +# And the results. + +df = DataFrame(res) +df.to_csv("plot_op_tree_ensemble_optim.csv", index=False) +df.to_excel("plot_op_tree_ensemble_optim.xlsx", index=False) +print(df.columns) +print(df.head(5)) + +################################ +# Sorting +# +++++++ + +small_df = df.drop( + [ + "min_exec", + "max_exec", + "repeat", + "number", + "context_size", + "n_exp_name", + ], + axis=1, +).sort_values("average") +print(small_df.head(n=10)) + + +################################ +# Worst +# +++++ + +print(small_df.tail(n=10)) + + +################################# +# Plot +# ++++ + +skeys = ",".join(optim_params.keys()) +title = f"TreeEnsemble tuning, n_tries={script_args.tries}\n{skeys}\nlower is better" +ax = hhistograms(df, title=title, keys=("name",)) +fig = ax.get_figure() +fig.savefig("plot_op_tree_ensemble_optim.png") diff --git a/docs/tutorial/plot_dbegin_options_zipmap.py b/docs/tutorial/plot_dbegin_options_zipmap.py index c478ae772..7ed22e072 100644 --- a/docs/tutorial/plot_dbegin_options_zipmap.py +++ b/docs/tutorial/plot_dbegin_options_zipmap.py @@ -26,7 +26,6 @@ import onnxruntime as rt import onnx import skl2onnx -from skl2onnx.common.data_types import FloatTensorType from skl2onnx import to_onnx from sklearn.linear_model import LogisticRegression from sklearn.multioutput import MultiOutputClassifier diff --git a/pyproject.toml b/pyproject.toml index cda762c49..4be1713cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ select = [ "C413", "C408", "C417", "E731", "PIE808", - "RUF012", "RUF015", + "RUF012", "RUF015", "RUF051", "SIM103", "SIM108", "SIM114", "SIM910", "UP006", "UP008", "UP015", "UP028", "UP030", "UP031", "UP035", "UP032" ] @@ -53,8 +53,8 @@ select = [ "B007", "B019", "B028", "B904", "C401", "C403", "C405", "C406", "C408", "C413", "C416", "C417", "PIE808", "PIE810", - "RUF005", "RUF012", "RUF010", "RUF015", - "SIM102", "SIM105", "SIM113", "SIM114", "SIM118", "SIM300", + "RUF005", "RUF012", "RUF010", "RUF015", "RUF046", "RUF051", + "SIM102", "SIM105", "SIM113", "SIM114", "SIM118", "SIM300", "SIM905", "UP015", "UP018", "UP028", "UP030", "UP031", "UP032" ] "tests_onnxmltools/**" = ["B028", "B904", "C403", "C408", "C413", "C417", "PIE808", "PIE810", "RUF010", "RUF015", "SIM102", "SIM105", "SIM118", "UP015", "UP028", "UP030", "UP031", "UP032"]