From fbbfc573e2cd2449a9372c4b5bfe131a2dfc8ee1 Mon Sep 17 00:00:00 2001 From: Theodoros Katzalis Date: Thu, 15 Aug 2024 15:17:45 +0200 Subject: [PATCH 001/187] Fix weight converters and return their corresponding v5 weight descr --- .../weight_converter/keras/_tensorflow.py | 23 ++++++-- .../core/weight_converter/torch/_onnx.py | 46 +++++++--------- .../weight_converter/torch/_torchscript.py | 53 +++++++++---------- setup.py | 2 +- .../weight_converter/keras/test_tensorflow.py | 40 +++++--------- tests/weight_converter/torch/test_onnx.py | 25 +++++---- .../torch/test_torchscript.py | 26 ++++----- 7 files changed, 102 insertions(+), 113 deletions(-) diff --git a/bioimageio/core/weight_converter/keras/_tensorflow.py b/bioimageio/core/weight_converter/keras/_tensorflow.py index c901f458..5804226d 100644 --- a/bioimageio/core/weight_converter/keras/_tensorflow.py +++ b/bioimageio/core/weight_converter/keras/_tensorflow.py @@ -5,6 +5,9 @@ from typing import no_type_check from zipfile import ZipFile +from bioimageio.spec._internal.version_type import Version +from bioimageio.spec.model import v0_5 + try: import tensorflow.saved_model except Exception: @@ -39,7 +42,7 @@ def _convert_tf1( input_name: str, output_name: str, zip_weights: bool, -): +) -> v0_5.TensorflowSavedModelBundleWeightsDescr: try: # try to build the tf model with the keras import from tensorflow from bioimageio.core.weight_converter.keras._tensorflow import ( @@ -77,10 +80,16 @@ def build_tf_model(): output_path = _zip_model_bundle(output_path) print("TensorFlow model exported to", output_path) - return 0 + return v0_5.TensorflowSavedModelBundleWeightsDescr( + source=output_path, + parent="keras_hdf5", + tensorflow_version=Version(tensorflow.__version__), + ) -def _convert_tf2(keras_weight_path: Path, output_path: Path, zip_weights: bool): +def _convert_tf2( + keras_weight_path: Path, output_path: Path, zip_weights: bool +) -> v0_5.TensorflowSavedModelBundleWeightsDescr: try: # try to build the tf model with the keras import from tensorflow from bioimageio.core.weight_converter.keras._tensorflow import keras @@ -95,12 +104,16 @@ def _convert_tf2(keras_weight_path: Path, output_path: Path, zip_weights: bool): output_path = _zip_model_bundle(output_path) print("TensorFlow model exported to", output_path) - return 0 + return v0_5.TensorflowSavedModelBundleWeightsDescr( + source=output_path, + parent="keras_hdf5", + tensorflow_version=tensorflow.__version__, + ) def convert_weights_to_tensorflow_saved_model_bundle( model: ModelDescr, output_path: Path -): +) -> v0_5.TensorflowSavedModelBundleWeightsDescr: """Convert model weights from format 'keras_hdf5' to 'tensorflow_saved_model_bundle'. Adapted from diff --git a/bioimageio/core/weight_converter/torch/_onnx.py b/bioimageio/core/weight_converter/torch/_onnx.py index 3935e1d1..d3c7bf01 100644 --- a/bioimageio/core/weight_converter/torch/_onnx.py +++ b/bioimageio/core/weight_converter/torch/_onnx.py @@ -1,13 +1,11 @@ # type: ignore # TODO: type -import warnings +from __future__ import annotations from pathlib import Path -from typing import Any, List, Sequence, cast +from typing import Any, List, Sequence, cast, Union import numpy as np from numpy.testing import assert_array_almost_equal -from bioimageio.spec import load_description -from bioimageio.spec.common import InvalidDescr from bioimageio.spec.model import v0_4, v0_5 from ...digest_spec import get_member_id, get_test_inputs @@ -19,15 +17,15 @@ torch = None -def add_onnx_weights( - model_spec: "str | Path | v0_4.ModelDescr | v0_5.ModelDescr", +def convert_weights_to_onnx( + model_spec: Union[v0_4.ModelDescr, v0_5.ModelDescr], *, output_path: Path, use_tracing: bool = True, test_decimal: int = 4, verbose: bool = False, - opset_version: "int | None" = None, -): + opset_version: int = 15, +) -> v0_5.OnnxWeightsDescr: """Convert model weights from format 'pytorch_state_dict' to 'onnx'. Args: @@ -36,16 +34,6 @@ def add_onnx_weights( use_tracing: whether to use tracing or scripting to export the onnx format test_decimal: precision for testing whether the results agree """ - if isinstance(model_spec, (str, Path)): - loaded_spec = load_description(Path(model_spec)) - if isinstance(loaded_spec, InvalidDescr): - raise ValueError(f"Bad resource description: {loaded_spec}") - if not isinstance(loaded_spec, (v0_4.ModelDescr, v0_5.ModelDescr)): - raise TypeError( - f"Path {model_spec} is a {loaded_spec.__class__.__name__}, expected a v0_4.ModelDescr or v0_5.ModelDescr" - ) - model_spec = loaded_spec - state_dict_weights_descr = model_spec.weights.pytorch_state_dict if state_dict_weights_descr is None: raise ValueError( @@ -54,9 +42,10 @@ def add_onnx_weights( assert torch is not None with torch.no_grad(): - sample = get_test_inputs(model_spec) - input_data = [sample[get_member_id(ipt)].data.data for ipt in model_spec.inputs] + input_data = [ + sample.members[get_member_id(ipt)].data.data for ipt in model_spec.inputs + ] input_tensors = [torch.from_numpy(ipt) for ipt in input_data] model = load_torch_model(state_dict_weights_descr) @@ -81,9 +70,9 @@ def add_onnx_weights( try: import onnxruntime as rt # pyright: ignore [reportMissingTypeStubs] except ImportError: - msg = "The onnx weights were exported, but onnx rt is not available and weights cannot be checked." - warnings.warn(msg) - return + raise ImportError( + "The onnx weights were exported, but onnx rt is not available and weights cannot be checked." + ) # check the onnx model sess = rt.InferenceSession(str(output_path)) @@ -101,8 +90,11 @@ def add_onnx_weights( try: for exp, out in zip(expected_outputs, outputs): assert_array_almost_equal(exp, out, decimal=test_decimal) - return 0 except AssertionError as e: - msg = f"The onnx weights were exported, but results before and after conversion do not agree:\n {str(e)}" - warnings.warn(msg) - return 1 + raise ValueError( + f"Results before and after weights conversion do not agree:\n {str(e)}" + ) + + return v0_5.OnnxWeightsDescr( + source=output_path, parent="pytorch_state_dict", opset_version=opset_version + ) diff --git a/bioimageio/core/weight_converter/torch/_torchscript.py b/bioimageio/core/weight_converter/torch/_torchscript.py index 5ca16069..16dc0128 100644 --- a/bioimageio/core/weight_converter/torch/_torchscript.py +++ b/bioimageio/core/weight_converter/torch/_torchscript.py @@ -1,9 +1,11 @@ # type: ignore # TODO: type +from __future__ import annotations from pathlib import Path from typing import List, Sequence, Union import numpy as np from numpy.testing import assert_array_almost_equal +from torch.jit import ScriptModule from typing_extensions import Any, assert_never from bioimageio.spec.model import v0_4, v0_5 @@ -17,12 +19,11 @@ torch = None -# FIXME: remove Any def _check_predictions( model: Any, scripted_model: Any, - model_spec: "v0_4.ModelDescr | v0_5.ModelDescr", - input_data: Sequence["torch.Tensor"], + model_spec: v0_4.ModelDescr | v0_5.ModelDescr, + input_data: Sequence[torch.Tensor], ): assert torch is not None @@ -77,22 +78,27 @@ def _check(input_: Sequence[torch.Tensor]) -> None: else: assert_never(axis.size) - half_step = [st // 2 for st in step] + input_data = input_data[0] + max_shape = input_data.shape max_steps = 4 # check that input and output agree for decreasing input sizes for step_factor in range(1, max_steps + 1): slice_ = tuple( - slice(None) if st == 0 else slice(step_factor * st, -step_factor * st) - for st in half_step - ) - this_input = [inp[slice_] for inp in input_data] - this_shape = this_input[0].shape - if any(tsh < msh for tsh, msh in zip(this_shape, min_shape)): - raise ValueError( - f"Mismatched shapes: {this_shape}. Expected at least {min_shape}" + ( + slice(None) + if step_dim == 0 + else slice(0, max_dim - step_factor * step_dim, 1) ) - _check(this_input) + for max_dim, step_dim in zip(max_shape, step) + ) + sliced_input = input_data[slice_] + if any( + sliced_dim < min_dim + for sliced_dim, min_dim in zip(sliced_input.shape, min_shape) + ): + return + _check([sliced_input]) def convert_weights_to_torchscript( @@ -107,7 +113,6 @@ def convert_weights_to_torchscript( output_path: where to save the torchscript weights use_tracing: whether to use tracing or scripting to export the torchscript format """ - state_dict_weights_descr = model_descr.weights.pytorch_state_dict if state_dict_weights_descr is None: raise ValueError( @@ -118,26 +123,20 @@ def convert_weights_to_torchscript( with torch.no_grad(): input_data = [torch.from_numpy(inp.astype("float32")) for inp in input_data] - model = load_torch_model(state_dict_weights_descr) - - # FIXME: remove Any - if use_tracing: - scripted_model: Any = torch.jit.trace(model, input_data) - else: - scripted_model: Any = torch.jit.script(model) - + scripted_module: ScriptModule = ( + torch.jit.trace(model, input_data) + if use_tracing + else torch.jit.script(model) + ) _check_predictions( model=model, - scripted_model=scripted_model, + scripted_model=scripted_module, model_spec=model_descr, input_data=input_data, ) - # save the torchscript model - scripted_model.save( - str(output_path) - ) # does not support Path, so need to cast to str + scripted_module.save(str(output_path)) return v0_5.TorchscriptWeightsDescr( source=output_path, diff --git a/setup.py b/setup.py index 7aa66e16..a1a86f45 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ extras_require={ "pytorch": ["torch>=1.6", "torchvision", "keras>=3.0"], "tensorflow": ["tensorflow", "keras>=2.15"], - "onnx": ["onnxruntime"], + "onnx": ["onnxruntime", "onnx"], "dev": [ "black", # "crick", # currently requires python<=3.9 diff --git a/tests/weight_converter/keras/test_tensorflow.py b/tests/weight_converter/keras/test_tensorflow.py index 65c93f60..18a4f2dc 100644 --- a/tests/weight_converter/keras/test_tensorflow.py +++ b/tests/weight_converter/keras/test_tensorflow.py @@ -3,49 +3,33 @@ from pathlib import Path import pytest - from bioimageio.spec import load_description -from bioimageio.spec.model.v0_5 import ModelDescr +from bioimageio.spec.model import v0_5 +from bioimageio.core.weight_converter.keras._tensorflow import ( + convert_weights_to_tensorflow_saved_model_bundle, +) -@pytest.mark.skip( - "tensorflow converter not updated yet" -) # TODO: test tensorflow converter -def test_tensorflow_converter(any_keras_model: Path, tmp_path: Path): - from bioimageio.core.weight_converter.keras import ( - convert_weights_to_tensorflow_saved_model_bundle, - ) - out_path = tmp_path / "weights" +@pytest.mark.skip() +def test_tensorflow_converter(any_keras_model: Path, tmp_path: Path): model = load_description(any_keras_model) - assert isinstance(model, ModelDescr), model.validation_summary.format() + out_path = tmp_path / "weights.h5" ret_val = convert_weights_to_tensorflow_saved_model_bundle(model, out_path) assert out_path.exists() - assert (out_path / "variables").exists() - assert (out_path / "saved_model.pb").exists() - assert ( - ret_val == 0 - ) # check for correctness is done in converter and returns 0 if it passes + assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) + assert ret_val.source == out_path -@pytest.mark.skip( - "tensorflow converter not updated yet" -) # TODO: test tensorflow converter +@pytest.mark.skip() def test_tensorflow_converter_zipped(any_keras_model: Path, tmp_path: Path): - from bioimageio.core.weight_converter.keras import ( - convert_weights_to_tensorflow_saved_model_bundle, - ) - out_path = tmp_path / "weights.zip" model = load_description(any_keras_model) - assert isinstance(model, ModelDescr), model.validation_summary.format() ret_val = convert_weights_to_tensorflow_saved_model_bundle(model, out_path) + assert out_path.exists() - assert ( - ret_val == 0 - ) # check for correctness is done in converter and returns 0 if it passes + assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) - # make sure that the zip package was created correctly expected_names = {"saved_model.pb", "variables/variables.index"} with zipfile.ZipFile(out_path, "r") as f: names = set([name for name in f.namelist()]) diff --git a/tests/weight_converter/torch/test_onnx.py b/tests/weight_converter/torch/test_onnx.py index 54f2cdf4..faab39d6 100644 --- a/tests/weight_converter/torch/test_onnx.py +++ b/tests/weight_converter/torch/test_onnx.py @@ -1,18 +1,23 @@ # type: ignore # TODO enable type checking import os -from pathlib import Path -import pytest +from bioimageio.spec import load_description +from bioimageio.spec.model import v0_5 +from bioimageio.core.weight_converter.torch._onnx import convert_weights_to_onnx -@pytest.mark.skip("onnx converter not updated yet") # TODO: test onnx converter -def test_onnx_converter(convert_to_onnx: Path, tmp_path: Path): - from bioimageio.core.weight_converter.torch._onnx import convert_weights_to_onnx +def test_onnx_converter(convert_to_onnx, tmp_path): + bio_model = load_description(convert_to_onnx) out_path = tmp_path / "weights.onnx" - ret_val = convert_weights_to_onnx(convert_to_onnx, out_path, test_decimal=3) + opset_version = 15 + ret_val = convert_weights_to_onnx( + model_spec=bio_model, + output_path=out_path, + test_decimal=3, + opset_version=opset_version, + ) assert os.path.exists(out_path) - if not pytest.skip_onnx: - assert ( - ret_val == 0 - ) # check for correctness is done in converter and returns 0 if it passes + assert isinstance(ret_val, v0_5.OnnxWeightsDescr) + assert ret_val.opset_version == opset_version + assert ret_val.source == out_path diff --git a/tests/weight_converter/torch/test_torchscript.py b/tests/weight_converter/torch/test_torchscript.py index e0cee3d8..6b397f08 100644 --- a/tests/weight_converter/torch/test_torchscript.py +++ b/tests/weight_converter/torch/test_torchscript.py @@ -1,22 +1,18 @@ # type: ignore # TODO enable type checking -from pathlib import Path - import pytest +from bioimageio.spec import load_description +from bioimageio.spec.model import v0_5 -from bioimageio.spec.model import v0_4, v0_5 - +from bioimageio.core.weight_converter.torch._torchscript import ( + convert_weights_to_torchscript, +) -@pytest.mark.skip( - "torchscript converter not updated yet" -) # TODO: test torchscript converter -def test_torchscript_converter( - any_torch_model: "v0_4.ModelDescr | v0_5.ModelDescr", tmp_path: Path -): - from bioimageio.core.weight_converter.torch import convert_weights_to_torchscript +@pytest.mark.skip() +def test_torchscript_converter(any_torch_model, tmp_path): + bio_model = load_description(any_torch_model) out_path = tmp_path / "weights.pt" - ret_val = convert_weights_to_torchscript(any_torch_model, out_path) + ret_val = convert_weights_to_torchscript(bio_model, out_path) assert out_path.exists() - assert ( - ret_val == 0 - ) # check for correctness is done in converter and returns 0 if it passes + assert isinstance(ret_val, v0_5.TorchscriptWeightsDescr) + assert ret_val.source == out_path From a37b56879def4802e27b026e58126929cbee2ca2 Mon Sep 17 00:00:00 2001 From: Theodoros Katzalis Date: Thu, 15 Aug 2024 16:03:06 +0200 Subject: [PATCH 002/187] Create an interface for weight conversion - Instead of having one module per conversion, use one module, and individual classes will handle the logic of the conversion - Create an abstract class to have a common interface among conversions --- bioimageio/core/weight_converter/__init__.py | 1 - .../core/weight_converter/keras/__init__.py | 1 - .../weight_converter/keras/_tensorflow.py | 164 ------ .../core/weight_converter/torch/__init__.py | 1 - .../core/weight_converter/torch/_onnx.py | 100 ---- .../weight_converter/torch/_torchscript.py | 145 ------ .../core/weight_converter/torch/_utils.py | 24 - bioimageio/core/weight_converters.py | 492 ++++++++++++++++++ .../test_add_weights.py | 0 tests/test_weight_converters.py | 69 +++ .../weight_converter/keras/test_tensorflow.py | 36 -- tests/weight_converter/torch/test_onnx.py | 23 - .../torch/test_torchscript.py | 18 - 13 files changed, 561 insertions(+), 513 deletions(-) delete mode 100644 bioimageio/core/weight_converter/__init__.py delete mode 100644 bioimageio/core/weight_converter/keras/__init__.py delete mode 100644 bioimageio/core/weight_converter/keras/_tensorflow.py delete mode 100644 bioimageio/core/weight_converter/torch/__init__.py delete mode 100644 bioimageio/core/weight_converter/torch/_onnx.py delete mode 100644 bioimageio/core/weight_converter/torch/_torchscript.py delete mode 100644 bioimageio/core/weight_converter/torch/_utils.py create mode 100644 bioimageio/core/weight_converters.py rename tests/{weight_converter => }/test_add_weights.py (100%) create mode 100644 tests/test_weight_converters.py delete mode 100644 tests/weight_converter/keras/test_tensorflow.py delete mode 100644 tests/weight_converter/torch/test_onnx.py delete mode 100644 tests/weight_converter/torch/test_torchscript.py diff --git a/bioimageio/core/weight_converter/__init__.py b/bioimageio/core/weight_converter/__init__.py deleted file mode 100644 index 5f1674c9..00000000 --- a/bioimageio/core/weight_converter/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""coming soon""" diff --git a/bioimageio/core/weight_converter/keras/__init__.py b/bioimageio/core/weight_converter/keras/__init__.py deleted file mode 100644 index 195b42b8..00000000 --- a/bioimageio/core/weight_converter/keras/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# TODO: update keras weight converters diff --git a/bioimageio/core/weight_converter/keras/_tensorflow.py b/bioimageio/core/weight_converter/keras/_tensorflow.py deleted file mode 100644 index 5804226d..00000000 --- a/bioimageio/core/weight_converter/keras/_tensorflow.py +++ /dev/null @@ -1,164 +0,0 @@ -# type: ignore # TODO: type -import os -import shutil -from pathlib import Path -from typing import no_type_check -from zipfile import ZipFile - -from bioimageio.spec._internal.version_type import Version -from bioimageio.spec.model import v0_5 - -try: - import tensorflow.saved_model -except Exception: - tensorflow = None - -from bioimageio.spec._internal.io_utils import download -from bioimageio.spec.model.v0_5 import ModelDescr - - -def _zip_model_bundle(model_bundle_folder: Path): - zipped_model_bundle = model_bundle_folder.with_suffix(".zip") - - with ZipFile(zipped_model_bundle, "w") as zip_obj: - for root, _, files in os.walk(model_bundle_folder): - for filename in files: - src = os.path.join(root, filename) - zip_obj.write(src, os.path.relpath(src, model_bundle_folder)) - - try: - shutil.rmtree(model_bundle_folder) - except Exception: - print("TensorFlow bundled model was not removed after compression") - - return zipped_model_bundle - - -# adapted from -# https://github.com/deepimagej/pydeepimagej/blob/master/pydeepimagej/yaml/create_config.py#L236 -def _convert_tf1( - keras_weight_path: Path, - output_path: Path, - input_name: str, - output_name: str, - zip_weights: bool, -) -> v0_5.TensorflowSavedModelBundleWeightsDescr: - try: - # try to build the tf model with the keras import from tensorflow - from bioimageio.core.weight_converter.keras._tensorflow import ( - keras, # type: ignore - ) - - except Exception: - # if the above fails try to export with the standalone keras - import keras - - @no_type_check - def build_tf_model(): - keras_model = keras.models.load_model(keras_weight_path) - assert tensorflow is not None - builder = tensorflow.saved_model.builder.SavedModelBuilder(output_path) - signature = tensorflow.saved_model.signature_def_utils.predict_signature_def( - inputs={input_name: keras_model.input}, - outputs={output_name: keras_model.output}, - ) - - signature_def_map = { - tensorflow.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature - } - - builder.add_meta_graph_and_variables( - keras.backend.get_session(), - [tensorflow.saved_model.tag_constants.SERVING], - signature_def_map=signature_def_map, - ) - builder.save() - - build_tf_model() - - if zip_weights: - output_path = _zip_model_bundle(output_path) - print("TensorFlow model exported to", output_path) - - return v0_5.TensorflowSavedModelBundleWeightsDescr( - source=output_path, - parent="keras_hdf5", - tensorflow_version=Version(tensorflow.__version__), - ) - - -def _convert_tf2( - keras_weight_path: Path, output_path: Path, zip_weights: bool -) -> v0_5.TensorflowSavedModelBundleWeightsDescr: - try: - # try to build the tf model with the keras import from tensorflow - from bioimageio.core.weight_converter.keras._tensorflow import keras - except Exception: - # if the above fails try to export with the standalone keras - import keras - - model = keras.models.load_model(keras_weight_path) - keras.models.save_model(model, output_path) - - if zip_weights: - output_path = _zip_model_bundle(output_path) - print("TensorFlow model exported to", output_path) - - return v0_5.TensorflowSavedModelBundleWeightsDescr( - source=output_path, - parent="keras_hdf5", - tensorflow_version=tensorflow.__version__, - ) - - -def convert_weights_to_tensorflow_saved_model_bundle( - model: ModelDescr, output_path: Path -) -> v0_5.TensorflowSavedModelBundleWeightsDescr: - """Convert model weights from format 'keras_hdf5' to 'tensorflow_saved_model_bundle'. - - Adapted from - https://github.com/deepimagej/pydeepimagej/blob/5aaf0e71f9b04df591d5ca596f0af633a7e024f5/pydeepimagej/yaml/create_config.py - - Args: - model: The bioimageio model description - output_path: where to save the tensorflow weights. This path must not exist yet. - """ - assert tensorflow is not None - tf_major_ver = int(tensorflow.__version__.split(".")[0]) - - if output_path.suffix == ".zip": - output_path = output_path.with_suffix("") - zip_weights = True - else: - zip_weights = False - - if output_path.exists(): - raise ValueError(f"The ouptut directory at {output_path} must not exist.") - - if model.weights.keras_hdf5 is None: - raise ValueError("Missing Keras Hdf5 weights to convert from.") - - weight_spec = model.weights.keras_hdf5 - weight_path = download(weight_spec.source).path - - if weight_spec.tensorflow_version: - model_tf_major_ver = int(weight_spec.tensorflow_version.major) - if model_tf_major_ver != tf_major_ver: - raise RuntimeError( - f"Tensorflow major versions of model {model_tf_major_ver} is not {tf_major_ver}" - ) - - if tf_major_ver == 1: - if len(model.inputs) != 1 or len(model.outputs) != 1: - raise NotImplementedError( - "Weight conversion for models with multiple inputs or outputs is not yet implemented." - ) - return _convert_tf1( - weight_path, - output_path, - model.inputs[0].id, - model.outputs[0].id, - zip_weights, - ) - else: - return _convert_tf2(weight_path, output_path, zip_weights) diff --git a/bioimageio/core/weight_converter/torch/__init__.py b/bioimageio/core/weight_converter/torch/__init__.py deleted file mode 100644 index 1b1ba526..00000000 --- a/bioimageio/core/weight_converter/torch/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# TODO: torch weight converters diff --git a/bioimageio/core/weight_converter/torch/_onnx.py b/bioimageio/core/weight_converter/torch/_onnx.py deleted file mode 100644 index d3c7bf01..00000000 --- a/bioimageio/core/weight_converter/torch/_onnx.py +++ /dev/null @@ -1,100 +0,0 @@ -# type: ignore # TODO: type -from __future__ import annotations -from pathlib import Path -from typing import Any, List, Sequence, cast, Union - -import numpy as np -from numpy.testing import assert_array_almost_equal - -from bioimageio.spec.model import v0_4, v0_5 - -from ...digest_spec import get_member_id, get_test_inputs -from ...weight_converter.torch._utils import load_torch_model - -try: - import torch -except ImportError: - torch = None - - -def convert_weights_to_onnx( - model_spec: Union[v0_4.ModelDescr, v0_5.ModelDescr], - *, - output_path: Path, - use_tracing: bool = True, - test_decimal: int = 4, - verbose: bool = False, - opset_version: int = 15, -) -> v0_5.OnnxWeightsDescr: - """Convert model weights from format 'pytorch_state_dict' to 'onnx'. - - Args: - source_model: model without onnx weights - opset_version: onnx opset version - use_tracing: whether to use tracing or scripting to export the onnx format - test_decimal: precision for testing whether the results agree - """ - state_dict_weights_descr = model_spec.weights.pytorch_state_dict - if state_dict_weights_descr is None: - raise ValueError( - "The provided model does not have weights in the pytorch state dict format" - ) - - assert torch is not None - with torch.no_grad(): - sample = get_test_inputs(model_spec) - input_data = [ - sample.members[get_member_id(ipt)].data.data for ipt in model_spec.inputs - ] - input_tensors = [torch.from_numpy(ipt) for ipt in input_data] - model = load_torch_model(state_dict_weights_descr) - - expected_tensors = model(*input_tensors) - if isinstance(expected_tensors, torch.Tensor): - expected_tensors = [expected_tensors] - expected_outputs: List[np.ndarray[Any, Any]] = [ - out.numpy() for out in expected_tensors - ] - - if use_tracing: - torch.onnx.export( - model, - tuple(input_tensors) if len(input_tensors) > 1 else input_tensors[0], - str(output_path), - verbose=verbose, - opset_version=opset_version, - ) - else: - raise NotImplementedError - - try: - import onnxruntime as rt # pyright: ignore [reportMissingTypeStubs] - except ImportError: - raise ImportError( - "The onnx weights were exported, but onnx rt is not available and weights cannot be checked." - ) - - # check the onnx model - sess = rt.InferenceSession(str(output_path)) - onnx_input_node_args = cast( - List[Any], sess.get_inputs() - ) # fixme: remove cast, try using rt.NodeArg instead of Any - onnx_inputs = { - input_name.name: inp - for input_name, inp in zip(onnx_input_node_args, input_data) - } - outputs = cast( - Sequence[np.ndarray[Any, Any]], sess.run(None, onnx_inputs) - ) # FIXME: remove cast - - try: - for exp, out in zip(expected_outputs, outputs): - assert_array_almost_equal(exp, out, decimal=test_decimal) - except AssertionError as e: - raise ValueError( - f"Results before and after weights conversion do not agree:\n {str(e)}" - ) - - return v0_5.OnnxWeightsDescr( - source=output_path, parent="pytorch_state_dict", opset_version=opset_version - ) diff --git a/bioimageio/core/weight_converter/torch/_torchscript.py b/bioimageio/core/weight_converter/torch/_torchscript.py deleted file mode 100644 index 16dc0128..00000000 --- a/bioimageio/core/weight_converter/torch/_torchscript.py +++ /dev/null @@ -1,145 +0,0 @@ -# type: ignore # TODO: type -from __future__ import annotations -from pathlib import Path -from typing import List, Sequence, Union - -import numpy as np -from numpy.testing import assert_array_almost_equal -from torch.jit import ScriptModule -from typing_extensions import Any, assert_never - -from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.model.v0_5 import Version - -from ._utils import load_torch_model - -try: - import torch -except ImportError: - torch = None - - -def _check_predictions( - model: Any, - scripted_model: Any, - model_spec: v0_4.ModelDescr | v0_5.ModelDescr, - input_data: Sequence[torch.Tensor], -): - assert torch is not None - - def _check(input_: Sequence[torch.Tensor]) -> None: - expected_tensors = model(*input_) - if isinstance(expected_tensors, torch.Tensor): - expected_tensors = [expected_tensors] - expected_outputs: List[np.ndarray[Any, Any]] = [ - out.numpy() for out in expected_tensors - ] - - output_tensors = scripted_model(*input_) - if isinstance(output_tensors, torch.Tensor): - output_tensors = [output_tensors] - outputs: List[np.ndarray[Any, Any]] = [out.numpy() for out in output_tensors] - - try: - for exp, out in zip(expected_outputs, outputs): - assert_array_almost_equal(exp, out, decimal=4) - except AssertionError as e: - raise ValueError( - f"Results before and after weights conversion do not agree:\n {str(e)}" - ) - - _check(input_data) - - if len(model_spec.inputs) > 1: - return # FIXME: why don't we check multiple inputs? - - input_descr = model_spec.inputs[0] - if isinstance(input_descr, v0_4.InputTensorDescr): - if not isinstance(input_descr.shape, v0_4.ParameterizedInputShape): - return - min_shape = input_descr.shape.min - step = input_descr.shape.step - else: - min_shape: List[int] = [] - step: List[int] = [] - for axis in input_descr.axes: - if isinstance(axis.size, v0_5.ParameterizedSize): - min_shape.append(axis.size.min) - step.append(axis.size.step) - elif isinstance(axis.size, int): - min_shape.append(axis.size) - step.append(0) - elif axis.size is None: - raise NotImplementedError( - f"Can't verify inputs that don't specify their shape fully: {axis}" - ) - elif isinstance(axis.size, v0_5.SizeReference): - raise NotImplementedError(f"Can't handle axes like '{axis}' yet") - else: - assert_never(axis.size) - - input_data = input_data[0] - max_shape = input_data.shape - max_steps = 4 - - # check that input and output agree for decreasing input sizes - for step_factor in range(1, max_steps + 1): - slice_ = tuple( - ( - slice(None) - if step_dim == 0 - else slice(0, max_dim - step_factor * step_dim, 1) - ) - for max_dim, step_dim in zip(max_shape, step) - ) - sliced_input = input_data[slice_] - if any( - sliced_dim < min_dim - for sliced_dim, min_dim in zip(sliced_input.shape, min_shape) - ): - return - _check([sliced_input]) - - -def convert_weights_to_torchscript( - model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], - output_path: Path, - use_tracing: bool = True, -) -> v0_5.TorchscriptWeightsDescr: - """Convert model weights from format 'pytorch_state_dict' to 'torchscript'. - - Args: - model_descr: location of the resource for the input bioimageio model - output_path: where to save the torchscript weights - use_tracing: whether to use tracing or scripting to export the torchscript format - """ - state_dict_weights_descr = model_descr.weights.pytorch_state_dict - if state_dict_weights_descr is None: - raise ValueError( - "The provided model does not have weights in the pytorch state dict format" - ) - - input_data = model_descr.get_input_test_arrays() - - with torch.no_grad(): - input_data = [torch.from_numpy(inp.astype("float32")) for inp in input_data] - model = load_torch_model(state_dict_weights_descr) - scripted_module: ScriptModule = ( - torch.jit.trace(model, input_data) - if use_tracing - else torch.jit.script(model) - ) - _check_predictions( - model=model, - scripted_model=scripted_module, - model_spec=model_descr, - input_data=input_data, - ) - - scripted_module.save(str(output_path)) - - return v0_5.TorchscriptWeightsDescr( - source=output_path, - pytorch_version=Version(torch.__version__), - parent="pytorch_state_dict", - ) diff --git a/bioimageio/core/weight_converter/torch/_utils.py b/bioimageio/core/weight_converter/torch/_utils.py deleted file mode 100644 index 01df0747..00000000 --- a/bioimageio/core/weight_converter/torch/_utils.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import Union - -from bioimageio.core.model_adapters._pytorch_model_adapter import PytorchModelAdapter -from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download - -try: - import torch -except ImportError: - torch = None - - -# additional convenience for pytorch state dict, eventually we want this in python-bioimageio too -# and for each weight format -def load_torch_model( # pyright: ignore[reportUnknownParameterType] - node: Union[v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr], -): - assert torch is not None - model = ( # pyright: ignore[reportUnknownVariableType] - PytorchModelAdapter.get_network(node) - ) - state = torch.load(download(node.source).path, map_location="cpu") - model.load_state_dict(state) # FIXME: check incompatible keys? - return model.eval() # pyright: ignore[reportUnknownVariableType] diff --git a/bioimageio/core/weight_converters.py b/bioimageio/core/weight_converters.py new file mode 100644 index 00000000..6e0d06ec --- /dev/null +++ b/bioimageio/core/weight_converters.py @@ -0,0 +1,492 @@ +# type: ignore # TODO: type +from __future__ import annotations + +import abc +from bioimageio.spec.model.v0_5 import WeightsEntryDescrBase +from typing import Any, List, Sequence, cast, Union +from typing_extensions import assert_never +import numpy as np +from numpy.testing import assert_array_almost_equal +from bioimageio.spec.model import v0_4, v0_5 +from torch.jit import ScriptModule +from bioimageio.core.digest_spec import get_test_inputs, get_member_id +from bioimageio.core.model_adapters._pytorch_model_adapter import PytorchModelAdapter +import os +import shutil +from pathlib import Path +from typing import no_type_check +from zipfile import ZipFile +from bioimageio.spec._internal.version_type import Version +from bioimageio.spec._internal.io_utils import download + +try: + import torch +except ImportError: + torch = None + +try: + import tensorflow.saved_model +except Exception: + tensorflow = None + + +# additional convenience for pytorch state dict, eventually we want this in python-bioimageio too +# and for each weight format +def load_torch_model( # pyright: ignore[reportUnknownParameterType] + node: Union[v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr], +): + assert torch is not None + model = ( # pyright: ignore[reportUnknownVariableType] + PytorchModelAdapter.get_network(node) + ) + state = torch.load(download(node.source).path, map_location="cpu") + model.load_state_dict(state) # FIXME: check incompatible keys? + return model.eval() # pyright: ignore[reportUnknownVariableType] + + +class WeightConverter(abc.ABC): + @abc.abstractmethod + def convert( + self, model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], output_path: Path + ) -> WeightsEntryDescrBase: + raise NotImplementedError + + +class Pytorch2Onnx(WeightConverter): + def __init__(self): + super().__init__() + assert torch is not None + + def convert( + self, + model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], + output_path: Path, + use_tracing: bool = True, + test_decimal: int = 4, + verbose: bool = False, + opset_version: int = 15, + ) -> v0_5.OnnxWeightsDescr: + """ + Convert model weights from the PyTorch state_dict format to the ONNX format. + + Args: + model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + The model description object that contains the model and its weights. + output_path (Path): + The file path where the ONNX model will be saved. + use_tracing (bool, optional): + Whether to use tracing or scripting to export the ONNX format. Defaults to True. + test_decimal (int, optional): + The decimal precision for comparing the results between the original and converted models. + This is used in the `assert_array_almost_equal` function to check if the outputs match. + Defaults to 4. + verbose (bool, optional): + If True, will print out detailed information during the ONNX export process. Defaults to False. + opset_version (int, optional): + The ONNX opset version to use for the export. Defaults to 15. + + Raises: + ValueError: + If the provided model does not have weights in the PyTorch state_dict format. + ImportError: + If ONNX Runtime is not available for checking the exported ONNX model. + ValueError: + If the results before and after weights conversion do not agree. + + Returns: + v0_5.OnnxWeightsDescr: + A descriptor object that contains information about the exported ONNX weights. + """ + + state_dict_weights_descr = model_descr.weights.pytorch_state_dict + if state_dict_weights_descr is None: + raise ValueError( + "The provided model does not have weights in the pytorch state dict format" + ) + + assert torch is not None + with torch.no_grad(): + sample = get_test_inputs(model_descr) + input_data = [ + sample.members[get_member_id(ipt)].data.data + for ipt in model_descr.inputs + ] + input_tensors = [torch.from_numpy(ipt) for ipt in input_data] + model = load_torch_model(state_dict_weights_descr) + + expected_tensors = model(*input_tensors) + if isinstance(expected_tensors, torch.Tensor): + expected_tensors = [expected_tensors] + expected_outputs: List[np.ndarray[Any, Any]] = [ + out.numpy() for out in expected_tensors + ] + + if use_tracing: + torch.onnx.export( + model, + ( + tuple(input_tensors) + if len(input_tensors) > 1 + else input_tensors[0] + ), + str(output_path), + verbose=verbose, + opset_version=opset_version, + ) + else: + raise NotImplementedError + + try: + import onnxruntime as rt # pyright: ignore [reportMissingTypeStubs] + except ImportError: + raise ImportError( + "The onnx weights were exported, but onnx rt is not available and weights cannot be checked." + ) + + # check the onnx model + sess = rt.InferenceSession(str(output_path)) + onnx_input_node_args = cast( + List[Any], sess.get_inputs() + ) # fixme: remove cast, try using rt.NodeArg instead of Any + onnx_inputs = { + input_name.name: inp + for input_name, inp in zip(onnx_input_node_args, input_data) + } + outputs = cast( + Sequence[np.ndarray[Any, Any]], sess.run(None, onnx_inputs) + ) # FIXME: remove cast + + try: + for exp, out in zip(expected_outputs, outputs): + assert_array_almost_equal(exp, out, decimal=test_decimal) + except AssertionError as e: + raise ValueError( + f"Results before and after weights conversion do not agree:\n {str(e)}" + ) + + return v0_5.OnnxWeightsDescr( + source=output_path, parent="pytorch_state_dict", opset_version=opset_version + ) + + +class Pytorch2Torchscipt(WeightConverter): + def __init__(self): + super().__init__() + assert torch is not None + + def convert( + self, + model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], + output_path: Path, + use_tracing: bool = True, + ) -> v0_5.TorchscriptWeightsDescr: + """ + Convert model weights from the PyTorch `state_dict` format to TorchScript. + + Args: + model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + The model description object that contains the model and its weights in the PyTorch `state_dict` format. + output_path (Path): + The file path where the TorchScript model will be saved. + use_tracing (bool): + Whether to use tracing or scripting to export the TorchScript format. + - `True`: Use tracing, which is recommended for models with straightforward control flow. + - `False`: Use scripting, which is better for models with dynamic control flow (e.g., loops, conditionals). + + Raises: + ValueError: + If the provided model does not have weights in the PyTorch `state_dict` format. + + Returns: + v0_5.TorchscriptWeightsDescr: + A descriptor object that contains information about the exported TorchScript weights. + """ + state_dict_weights_descr = model_descr.weights.pytorch_state_dict + if state_dict_weights_descr is None: + raise ValueError( + "The provided model does not have weights in the pytorch state dict format" + ) + + input_data = model_descr.get_input_test_arrays() + + with torch.no_grad(): + input_data = [torch.from_numpy(inp.astype("float32")) for inp in input_data] + model = load_torch_model(state_dict_weights_descr) + scripted_module: ScriptModule = ( + torch.jit.trace(model, input_data) + if use_tracing + else torch.jit.script(model) + ) + self._check_predictions( + model=model, + scripted_model=scripted_module, + model_spec=model_descr, + input_data=input_data, + ) + + scripted_module.save(str(output_path)) + + return v0_5.TorchscriptWeightsDescr( + source=output_path, + pytorch_version=Version(torch.__version__), + parent="pytorch_state_dict", + ) + + def _check_predictions( + self, + model: Any, + scripted_model: Any, + model_spec: v0_4.ModelDescr | v0_5.ModelDescr, + input_data: Sequence[torch.Tensor], + ): + assert torch is not None + + def _check(input_: Sequence[torch.Tensor]) -> None: + expected_tensors = model(*input_) + if isinstance(expected_tensors, torch.Tensor): + expected_tensors = [expected_tensors] + expected_outputs: List[np.ndarray[Any, Any]] = [ + out.numpy() for out in expected_tensors + ] + + output_tensors = scripted_model(*input_) + if isinstance(output_tensors, torch.Tensor): + output_tensors = [output_tensors] + outputs: List[np.ndarray[Any, Any]] = [ + out.numpy() for out in output_tensors + ] + + try: + for exp, out in zip(expected_outputs, outputs): + assert_array_almost_equal(exp, out, decimal=4) + except AssertionError as e: + raise ValueError( + f"Results before and after weights conversion do not agree:\n {str(e)}" + ) + + _check(input_data) + + if len(model_spec.inputs) > 1: + return # FIXME: why don't we check multiple inputs? + + input_descr = model_spec.inputs[0] + if isinstance(input_descr, v0_4.InputTensorDescr): + if not isinstance(input_descr.shape, v0_4.ParameterizedInputShape): + return + min_shape = input_descr.shape.min + step = input_descr.shape.step + else: + min_shape: List[int] = [] + step: List[int] = [] + for axis in input_descr.axes: + if isinstance(axis.size, v0_5.ParameterizedSize): + min_shape.append(axis.size.min) + step.append(axis.size.step) + elif isinstance(axis.size, int): + min_shape.append(axis.size) + step.append(0) + elif axis.size is None: + raise NotImplementedError( + f"Can't verify inputs that don't specify their shape fully: {axis}" + ) + elif isinstance(axis.size, v0_5.SizeReference): + raise NotImplementedError(f"Can't handle axes like '{axis}' yet") + else: + assert_never(axis.size) + + input_data = input_data[0] + max_shape = input_data.shape + max_steps = 4 + + # check that input and output agree for decreasing input sizes + for step_factor in range(1, max_steps + 1): + slice_ = tuple( + ( + slice(None) + if step_dim == 0 + else slice(0, max_dim - step_factor * step_dim, 1) + ) + for max_dim, step_dim in zip(max_shape, step) + ) + sliced_input = input_data[slice_] + if any( + sliced_dim < min_dim + for sliced_dim, min_dim in zip(sliced_input.shape, min_shape) + ): + return + _check([sliced_input]) + + +class Tensorflow2Bundled(WeightConverter): + def __init__(self): + super().__init__() + assert tensorflow is not None + + def convert( + self, model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], output_path: Path + ) -> v0_5.TensorflowSavedModelBundleWeightsDescr: + """ + Convert model weights from the 'keras_hdf5' format to the 'tensorflow_saved_model_bundle' format. + + This method handles the conversion of Keras HDF5 model weights into a TensorFlow SavedModel bundle, + which is the recommended format for deploying TensorFlow models. The method supports both TensorFlow 1.x + and 2.x versions, with appropriate checks to ensure compatibility. + + Adapted from: + https://github.com/deepimagej/pydeepimagej/blob/5aaf0e71f9b04df591d5ca596f0af633a7e024f5/pydeepimagej/yaml/create_config.py + + Args: + model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + The bioimage.io model description containing the model's metadata and weights. + output_path (Path): + The directory where the TensorFlow SavedModel bundle will be saved. + This path must not already exist and, if necessary, will be zipped into a .zip file. + use_tracing (bool): + Placeholder argument; currently not used in this method but required to match the abstract method signature. + + Raises: + ValueError: + - If the specified `output_path` already exists. + - If the Keras HDF5 weights are missing in the model description. + RuntimeError: + If there is a mismatch between the TensorFlow version used by the model and the version installed. + NotImplementedError: + If the model has multiple inputs or outputs and TensorFlow 1.x is being used. + + Returns: + v0_5.TensorflowSavedModelBundleWeightsDescr: + A descriptor object containing information about the converted TensorFlow SavedModel bundle. + """ + assert tensorflow is not None + tf_major_ver = int(tensorflow.__version__.split(".")[0]) + + if output_path.suffix == ".zip": + output_path = output_path.with_suffix("") + zip_weights = True + else: + zip_weights = False + + if output_path.exists(): + raise ValueError(f"The ouptut directory at {output_path} must not exist.") + + if model_descr.weights.keras_hdf5 is None: + raise ValueError("Missing Keras Hdf5 weights to convert from.") + + weight_spec = model_descr.weights.keras_hdf5 + weight_path = download(weight_spec.source).path + + if weight_spec.tensorflow_version: + model_tf_major_ver = int(weight_spec.tensorflow_version.major) + if model_tf_major_ver != tf_major_ver: + raise RuntimeError( + f"Tensorflow major versions of model {model_tf_major_ver} is not {tf_major_ver}" + ) + + if tf_major_ver == 1: + if len(model_descr.inputs) != 1 or len(model_descr.outputs) != 1: + raise NotImplementedError( + "Weight conversion for models with multiple inputs or outputs is not yet implemented." + ) + return self._convert_tf1( + weight_path, + output_path, + model_descr.inputs[0].id, + model_descr.outputs[0].id, + zip_weights, + ) + else: + return self._convert_tf2(weight_path, output_path, zip_weights) + + def _convert_tf2( + self, keras_weight_path: Path, output_path: Path, zip_weights: bool + ) -> v0_5.TensorflowSavedModelBundleWeightsDescr: + try: + # try to build the tf model with the keras import from tensorflow + from tensorflow import keras + except Exception: + # if the above fails try to export with the standalone keras + import keras + + model = keras.models.load_model(keras_weight_path) + keras.models.save_model(model, output_path) + + if zip_weights: + output_path = self._zip_model_bundle(output_path) + print("TensorFlow model exported to", output_path) + + return v0_5.TensorflowSavedModelBundleWeightsDescr( + source=output_path, + parent="keras_hdf5", + tensorflow_version=Version(tensorflow.__version__), + ) + + # adapted from + # https://github.com/deepimagej/pydeepimagej/blob/master/pydeepimagej/yaml/create_config.py#L236 + def _convert_tf1( + self, + keras_weight_path: Path, + output_path: Path, + input_name: str, + output_name: str, + zip_weights: bool, + ) -> v0_5.TensorflowSavedModelBundleWeightsDescr: + try: + # try to build the tf model with the keras import from tensorflow + from tensorflow import ( + keras, # type: ignore + ) + + except Exception: + # if the above fails try to export with the standalone keras + import keras + + @no_type_check + def build_tf_model(): + keras_model = keras.models.load_model(keras_weight_path) + assert tensorflow is not None + builder = tensorflow.saved_model.builder.SavedModelBuilder(output_path) + signature = ( + tensorflow.saved_model.signature_def_utils.predict_signature_def( + inputs={input_name: keras_model.input}, + outputs={output_name: keras_model.output}, + ) + ) + + signature_def_map = { + tensorflow.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature + } + + builder.add_meta_graph_and_variables( + keras.backend.get_session(), + [tensorflow.saved_model.tag_constants.SERVING], + signature_def_map=signature_def_map, + ) + builder.save() + + build_tf_model() + + if zip_weights: + output_path = self._zip_model_bundle(output_path) + print("TensorFlow model exported to", output_path) + + return v0_5.TensorflowSavedModelBundleWeightsDescr( + source=output_path, + parent="keras_hdf5", + tensorflow_version=Version(tensorflow.__version__), + ) + + def _zip_model_bundle(self, model_bundle_folder: Path): + zipped_model_bundle = model_bundle_folder.with_suffix(".zip") + + with ZipFile(zipped_model_bundle, "w") as zip_obj: + for root, _, files in os.walk(model_bundle_folder): + for filename in files: + src = os.path.join(root, filename) + zip_obj.write(src, os.path.relpath(src, model_bundle_folder)) + + try: + shutil.rmtree(model_bundle_folder) + except Exception: + print("TensorFlow bundled model was not removed after compression") + + return zipped_model_bundle diff --git a/tests/weight_converter/test_add_weights.py b/tests/test_add_weights.py similarity index 100% rename from tests/weight_converter/test_add_weights.py rename to tests/test_add_weights.py diff --git a/tests/test_weight_converters.py b/tests/test_weight_converters.py new file mode 100644 index 00000000..88010744 --- /dev/null +++ b/tests/test_weight_converters.py @@ -0,0 +1,69 @@ +# type: ignore # TODO enable type checking +import zipfile +from pathlib import Path + +import pytest + +import os + +from bioimageio.spec import load_description +from bioimageio.spec.model import v0_5 + +from bioimageio.core.weight_converters import ( + Pytorch2Torchscipt, + Pytorch2Onnx, + Tensorflow2Bundled, +) + + +def test_torchscript_converter(any_torch_model, tmp_path): + bio_model = load_description(any_torch_model) + out_path = tmp_path / "weights.pt" + util = Pytorch2Torchscipt() + ret_val = util.convert(bio_model, out_path) + assert out_path.exists() + assert isinstance(ret_val, v0_5.TorchscriptWeightsDescr) + assert ret_val.source == out_path + + +def test_onnx_converter(convert_to_onnx, tmp_path): + bio_model = load_description(convert_to_onnx) + out_path = tmp_path / "weights.onnx" + opset_version = 15 + util = Pytorch2Onnx() + ret_val = util.convert( + model_descr=bio_model, + output_path=out_path, + test_decimal=3, + opset_version=opset_version, + ) + assert os.path.exists(out_path) + assert isinstance(ret_val, v0_5.OnnxWeightsDescr) + assert ret_val.opset_version == opset_version + assert ret_val.source == out_path + + +def test_tensorflow_converter(any_keras_model: Path, tmp_path: Path): + model = load_description(any_keras_model) + out_path = tmp_path / "weights.h5" + util = Tensorflow2Bundled() + ret_val = util.convert(model, out_path) + assert out_path.exists() + assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) + assert ret_val.source == out_path + + +@pytest.mark.skip() +def test_tensorflow_converter_zipped(any_keras_model: Path, tmp_path: Path): + out_path = tmp_path / "weights.zip" + model = load_description(any_keras_model) + util = Tensorflow2Bundled() + ret_val = util.convert(model, out_path) + + assert out_path.exists() + assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) + + expected_names = {"saved_model.pb", "variables/variables.index"} + with zipfile.ZipFile(out_path, "r") as f: + names = set([name for name in f.namelist()]) + assert len(expected_names - names) == 0 diff --git a/tests/weight_converter/keras/test_tensorflow.py b/tests/weight_converter/keras/test_tensorflow.py deleted file mode 100644 index 18a4f2dc..00000000 --- a/tests/weight_converter/keras/test_tensorflow.py +++ /dev/null @@ -1,36 +0,0 @@ -# type: ignore # TODO enable type checking -import zipfile -from pathlib import Path - -import pytest -from bioimageio.spec import load_description -from bioimageio.spec.model import v0_5 - -from bioimageio.core.weight_converter.keras._tensorflow import ( - convert_weights_to_tensorflow_saved_model_bundle, -) - - -@pytest.mark.skip() -def test_tensorflow_converter(any_keras_model: Path, tmp_path: Path): - model = load_description(any_keras_model) - out_path = tmp_path / "weights.h5" - ret_val = convert_weights_to_tensorflow_saved_model_bundle(model, out_path) - assert out_path.exists() - assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) - assert ret_val.source == out_path - - -@pytest.mark.skip() -def test_tensorflow_converter_zipped(any_keras_model: Path, tmp_path: Path): - out_path = tmp_path / "weights.zip" - model = load_description(any_keras_model) - ret_val = convert_weights_to_tensorflow_saved_model_bundle(model, out_path) - - assert out_path.exists() - assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) - - expected_names = {"saved_model.pb", "variables/variables.index"} - with zipfile.ZipFile(out_path, "r") as f: - names = set([name for name in f.namelist()]) - assert len(expected_names - names) == 0 diff --git a/tests/weight_converter/torch/test_onnx.py b/tests/weight_converter/torch/test_onnx.py deleted file mode 100644 index faab39d6..00000000 --- a/tests/weight_converter/torch/test_onnx.py +++ /dev/null @@ -1,23 +0,0 @@ -# type: ignore # TODO enable type checking -import os - -from bioimageio.spec import load_description -from bioimageio.spec.model import v0_5 - -from bioimageio.core.weight_converter.torch._onnx import convert_weights_to_onnx - - -def test_onnx_converter(convert_to_onnx, tmp_path): - bio_model = load_description(convert_to_onnx) - out_path = tmp_path / "weights.onnx" - opset_version = 15 - ret_val = convert_weights_to_onnx( - model_spec=bio_model, - output_path=out_path, - test_decimal=3, - opset_version=opset_version, - ) - assert os.path.exists(out_path) - assert isinstance(ret_val, v0_5.OnnxWeightsDescr) - assert ret_val.opset_version == opset_version - assert ret_val.source == out_path diff --git a/tests/weight_converter/torch/test_torchscript.py b/tests/weight_converter/torch/test_torchscript.py deleted file mode 100644 index 6b397f08..00000000 --- a/tests/weight_converter/torch/test_torchscript.py +++ /dev/null @@ -1,18 +0,0 @@ -# type: ignore # TODO enable type checking -import pytest -from bioimageio.spec import load_description -from bioimageio.spec.model import v0_5 - -from bioimageio.core.weight_converter.torch._torchscript import ( - convert_weights_to_torchscript, -) - - -@pytest.mark.skip() -def test_torchscript_converter(any_torch_model, tmp_path): - bio_model = load_description(any_torch_model) - out_path = tmp_path / "weights.pt" - ret_val = convert_weights_to_torchscript(bio_model, out_path) - assert out_path.exists() - assert isinstance(ret_val, v0_5.TorchscriptWeightsDescr) - assert ret_val.source == out_path From db891ebb2456ab9db4696b8753e7bf0e99979d54 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 2 Dec 2024 13:33:09 +0100 Subject: [PATCH 003/187] fix import_callable annotation --- bioimageio/core/digest_spec.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index edb5a45d..854e6a7c 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -50,7 +50,12 @@ def import_callable( - node: Union[CallableFromDepencency, ArchitectureFromLibraryDescr], + node: Union[ + ArchitectureFromFileDescr, + ArchitectureFromLibraryDescr, + CallableFromDepencency, + CallableFromFile, + ], /, **kwargs: Unpack[HashKwargs], ) -> Callable[..., Any]: @@ -65,7 +70,6 @@ def import_callable( c = _import_from_file_impl(node.source_file, str(node.callable_name), **kwargs) elif isinstance(node, ArchitectureFromFileDescr): c = _import_from_file_impl(node.source, str(node.callable), sha256=node.sha256) - else: assert_never(node) From a391d940a93566b2350a44aaa2bf61b5fd0d0921 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 2 Dec 2024 13:33:38 +0100 Subject: [PATCH 004/187] improve error traceback for single weights format attempt --- bioimageio/core/model_adapters/_model_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/model_adapters/_model_adapter.py b/bioimageio/core/model_adapters/_model_adapter.py index c918603e..da2a2ea9 100644 --- a/bioimageio/core/model_adapters/_model_adapter.py +++ b/bioimageio/core/model_adapters/_model_adapter.py @@ -137,7 +137,7 @@ def create( raise ValueError( f"The '{weight_format_priority_order[0]}' model adapter could not be created" + f" in this environment:\n{errors[0][1].__class__.__name__}({errors[0][1]}).\n\n" - ) + ) from errors[0][1] else: error_list = "\n - ".join( From 4103b511e8edf67e5d207613f1555939d7c5e24d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 2 Dec 2024 15:26:48 +0100 Subject: [PATCH 005/187] add load_state --- .../model_adapters/_pytorch_model_adapter.py | 123 +++++++++++------- .../core/weight_converter/torch/_utils.py | 24 ---- 2 files changed, 73 insertions(+), 74 deletions(-) delete mode 100644 bioimageio/core/weight_converter/torch/_utils.py diff --git a/bioimageio/core/model_adapters/_pytorch_model_adapter.py b/bioimageio/core/model_adapters/_pytorch_model_adapter.py index a5178d74..1992f406 100644 --- a/bioimageio/core/model_adapters/_pytorch_model_adapter.py +++ b/bioimageio/core/model_adapters/_pytorch_model_adapter.py @@ -1,23 +1,23 @@ import gc import warnings -from typing import Any, List, Optional, Sequence, Tuple, Union +from contextlib import nullcontext +from io import TextIOWrapper +from pathlib import Path +from typing import Any, List, Literal, Optional, Sequence, Tuple, Union +import torch +from loguru import logger +from torch import nn +from typing_extensions import assert_never + +from bioimageio.spec.common import ZipPath from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.utils import download -from ..axis import AxisId from ..digest_spec import get_axes_infos, import_callable from ..tensor import Tensor from ._model_adapter import ModelAdapter -try: - import torch -except Exception as e: - torch = None - torch_error = str(e) -else: - torch_error = None - class PytorchModelAdapter(ModelAdapter): def __init__( @@ -29,48 +29,41 @@ def __init__( weights: Union[ v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr ], - devices: Optional[Sequence[str]] = None, + devices: Optional[Sequence[Union[str, torch.device]]] = None, + mode: Literal["eval", "train"] = "eval", ): - if torch is None: - raise ImportError(f"failed to import torch: {torch_error}") - super().__init__() self.output_dims = [tuple(a.id for a in get_axes_infos(out)) for out in outputs] - self._network = self.get_network(weights) - self._devices = self.get_devices(devices) - self._network = self._network.to(self._devices[0]) - - self._primary_device = self._devices[0] - state: Any = torch.load( - download(weights).path, - map_location=self._primary_device, # pyright: ignore[reportUnknownArgumentType] - ) - self._network.load_state_dict(state) + devices = self.get_devices(devices) + self._network = self.get_network(weights, load_state=True, devices=devices) + if mode == "eval": + self._network = self._network.eval() + elif mode == "train": + self._network = self._network.train() + else: + assert_never(mode) - self._network = self._network.eval() + self._mode: Literal["eval", "train"] = mode + self._primary_device = devices[0] def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - if torch is None: - raise ImportError("torch") - with torch.no_grad(): + if self._mode == "eval": + ctxt = torch.no_grad + elif self._mode == "train": + ctxt = nullcontext + else: + assert_never(self._mode) + + with ctxt(): tensors = [ None if ipt is None else torch.from_numpy(ipt.data.data) for ipt in input_tensors ] tensors = [ - ( - None - if t is None - else t.to( - self._primary_device # pyright: ignore[reportUnknownArgumentType] - ) - ) - for t in tensors + (None if t is None else t.to(self._primary_device)) for t in tensors ] result: Union[Tuple[Any, ...], List[Any], Any] - result = self._network( # pyright: ignore[reportUnknownVariableType] - *tensors - ) + result = self._network(*tensors) if not isinstance(result, (tuple, list)): result = [result] @@ -98,14 +91,16 @@ def unload(self) -> None: assert torch is not None torch.cuda.empty_cache() # release reserved memory - @staticmethod - def get_network( # pyright: ignore[reportUnknownParameterType] + @classmethod + def get_network( + cls, weight_spec: Union[ v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr ], - ) -> "torch.nn.Module": # pyright: ignore[reportInvalidTypeForm] - if torch is None: - raise ImportError("torch") + *, + load_state: bool = False, + devices: Optional[Sequence[Union[str, torch.device]]] = None, + ) -> nn.Module: arch = import_callable( weight_spec.architecture, sha256=( @@ -120,19 +115,47 @@ def get_network( # pyright: ignore[reportUnknownParameterType] else weight_spec.architecture.kwargs ) network = arch(**model_kwargs) - if not isinstance(network, torch.nn.Module): + if not isinstance(network, nn.Module): raise ValueError( f"calling {weight_spec.architecture.callable} did not return a torch.nn.Module" ) + if load_state or devices: + use_devices = cls.get_devices(devices) + network = network.to(use_devices[0]) + if load_state: + network = cls.load_state( + network, + path=download(weight_spec).path, + devices=use_devices, + ) + return network + + @staticmethod + def load_state( + network: nn.Module, + path: Union[Path, ZipPath], + devices: Sequence[torch.device], + ) -> nn.Module: + network = network.to(devices[0]) + with path.open("rb") as f: + assert not isinstance(f, TextIOWrapper) + state = torch.load(f, map_location=devices[0]) + + incompatible = network.load_state_dict(state) + if incompatible.missing_keys: + logger.warning("Missing state dict keys: {}", incompatible.missing_keys) + + if incompatible.unexpected_keys: + logger.warning( + "Unexpected state dict keys: {}", incompatible.unexpected_keys + ) return network @staticmethod - def get_devices( # pyright: ignore[reportUnknownParameterType] - devices: Optional[Sequence[str]] = None, - ) -> List["torch.device"]: # pyright: ignore[reportInvalidTypeForm] - if torch is None: - raise ImportError("torch") + def get_devices( + devices: Optional[Sequence[Union[torch.device, str]]] = None, + ) -> List[torch.device]: if not devices: torch_devices = [ ( diff --git a/bioimageio/core/weight_converter/torch/_utils.py b/bioimageio/core/weight_converter/torch/_utils.py deleted file mode 100644 index 01df0747..00000000 --- a/bioimageio/core/weight_converter/torch/_utils.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import Union - -from bioimageio.core.model_adapters._pytorch_model_adapter import PytorchModelAdapter -from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download - -try: - import torch -except ImportError: - torch = None - - -# additional convenience for pytorch state dict, eventually we want this in python-bioimageio too -# and for each weight format -def load_torch_model( # pyright: ignore[reportUnknownParameterType] - node: Union[v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr], -): - assert torch is not None - model = ( # pyright: ignore[reportUnknownVariableType] - PytorchModelAdapter.get_network(node) - ) - state = torch.load(download(node.source).path, map_location="cpu") - model.load_state_dict(state) # FIXME: check incompatible keys? - return model.eval() # pyright: ignore[reportUnknownVariableType] From 7ec7afb49a187646b854ba2d94d796d71b85b336 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 2 Dec 2024 15:27:15 +0100 Subject: [PATCH 006/187] update ONNXModelAdapter --- .../model_adapters/_onnx_model_adapter.py | 23 +++++-------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/bioimageio/core/model_adapters/_onnx_model_adapter.py b/bioimageio/core/model_adapters/_onnx_model_adapter.py index c747de22..87045897 100644 --- a/bioimageio/core/model_adapters/_onnx_model_adapter.py +++ b/bioimageio/core/model_adapters/_onnx_model_adapter.py @@ -1,8 +1,9 @@ import warnings from typing import Any, List, Optional, Sequence, Union -from numpy.typing import NDArray +import onnxruntime as rt +from bioimageio.spec._internal.type_guards import is_list, is_tuple from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.utils import download @@ -10,14 +11,6 @@ from ..tensor import Tensor from ._model_adapter import ModelAdapter -try: - import onnxruntime as rt -except Exception as e: - rt = None - rt_error = str(e) -else: - rt_error = None - class ONNXModelAdapter(ModelAdapter): def __init__( @@ -26,9 +19,6 @@ def __init__( model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], devices: Optional[Sequence[str]] = None, ): - if rt is None: - raise ImportError(f"failed to import onnxruntime: {rt_error}") - super().__init__() self._internal_output_axes = [ tuple(a.id for a in get_axes_infos(out)) @@ -51,14 +41,13 @@ def __init__( def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: assert len(input_tensors) == len(self._input_names) input_arrays = [None if ipt is None else ipt.data.data for ipt in input_tensors] - result: Union[Sequence[Optional[NDArray[Any]]], Optional[NDArray[Any]]] - result = self._session.run( # pyright: ignore[reportUnknownVariableType] + result: Any = self._session.run( None, dict(zip(self._input_names, input_arrays)) ) - if isinstance(result, (list, tuple)): - result_seq: Sequence[Optional[NDArray[Any]]] = result + if is_list(result) or is_tuple(result): + result_seq = result else: - result_seq = [result] # type: ignore + result_seq = [result] return [ None if r is None else Tensor(r, dims=axes) From ed8f1db75870af1a2e47ff3842f4a05cdcc217d0 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 2 Dec 2024 15:27:54 +0100 Subject: [PATCH 007/187] update TorchscriptModelAdapter typing --- .../_torchscript_model_adapter.py | 33 +++++-------------- 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/bioimageio/core/model_adapters/_torchscript_model_adapter.py b/bioimageio/core/model_adapters/_torchscript_model_adapter.py index 0e9f3aef..346718a9 100644 --- a/bioimageio/core/model_adapters/_torchscript_model_adapter.py +++ b/bioimageio/core/model_adapters/_torchscript_model_adapter.py @@ -1,10 +1,10 @@ import gc import warnings -from typing import Any, List, Optional, Sequence, Tuple, Union +from typing import Any, List, Optional, Sequence, Union -import numpy as np -from numpy.typing import NDArray +import torch +from bioimageio.spec._internal.type_guards import is_list, is_ndarray, is_tuple from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.utils import download @@ -12,14 +12,6 @@ from ..tensor import Tensor from ._model_adapter import ModelAdapter -try: - import torch -except Exception as e: - torch = None - torch_error = str(e) -else: - torch_error = None - class TorchscriptModelAdapter(ModelAdapter): def __init__( @@ -28,9 +20,6 @@ def __init__( model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], devices: Optional[Sequence[str]] = None, ): - if torch is None: - raise ImportError(f"failed to import torch: {torch_error}") - super().__init__() if model_description.weights.torchscript is None: raise ValueError( @@ -57,19 +46,14 @@ def __init__( ] def forward(self, *batch: Optional[Tensor]) -> List[Optional[Tensor]]: - assert torch is not None with torch.no_grad(): torch_tensor = [ None if b is None else torch.from_numpy(b.data.data).to(self.devices[0]) for b in batch ] - _result: Union[ # pyright: ignore[reportUnknownVariableType] - Tuple[Optional[NDArray[Any]], ...], - List[Optional[NDArray[Any]]], - Optional[NDArray[Any]], - ] = self._model.forward(*torch_tensor) - if isinstance(_result, (tuple, list)): - result: Sequence[Optional[NDArray[Any]]] = _result + _result: Any = self._model.forward(*torch_tensor) + if is_list(_result) or is_tuple(_result): + result: Sequence[Any] = _result else: result = [_result] @@ -77,19 +61,18 @@ def forward(self, *batch: Optional[Tensor]) -> List[Optional[Tensor]]: ( None if r is None - else r.cpu().numpy() if not isinstance(r, np.ndarray) else r + else r.cpu().numpy() if isinstance(r, torch.Tensor) else r ) for r in result ] assert len(result) == len(self._internal_output_axes) return [ - None if r is None else Tensor(r, dims=axes) + None if r is None else Tensor(r, dims=axes) if is_ndarray(r) else r for r, axes in zip(result, self._internal_output_axes) ] def unload(self) -> None: - assert torch is not None self._devices = None del self._model _ = gc.collect() # deallocate memory From 9ae626d3665a977bd55c3cd746c247e08015424b Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 3 Dec 2024 10:47:57 +0100 Subject: [PATCH 008/187] update unzipping in tensorflow model adapter --- .../_tensorflow_model_adapter.py | 40 +++++++++++-------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/bioimageio/core/model_adapters/_tensorflow_model_adapter.py b/bioimageio/core/model_adapters/_tensorflow_model_adapter.py index cfb264f0..b469cde7 100644 --- a/bioimageio/core/model_adapters/_tensorflow_model_adapter.py +++ b/bioimageio/core/model_adapters/_tensorflow_model_adapter.py @@ -1,10 +1,14 @@ import zipfile +from io import TextIOWrapper +from pathlib import Path +from shutil import copyfileobj from typing import List, Literal, Optional, Sequence, Union import numpy as np +import tensorflow as tf # pyright: ignore[reportMissingImports] from loguru import logger -from bioimageio.spec.common import FileSource +from bioimageio.spec.common import FileSource, ZipPath from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.utils import download @@ -12,14 +16,6 @@ from ..tensor import Tensor from ._model_adapter import ModelAdapter -try: - import tensorflow as tf # pyright: ignore[reportMissingImports] -except Exception as e: - tf = None - tf_error = str(e) -else: - tf_error = None - class TensorflowModelAdapterBase(ModelAdapter): weight_format: Literal["keras_hdf5", "tensorflow_saved_model_bundle"] @@ -36,9 +32,6 @@ def __init__( ], model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], ): - if tf is None: - raise ImportError(f"failed to import tensorflow: {tf_error}") - super().__init__() self.model_description = model_description tf_version = v0_5.Version( @@ -81,16 +74,29 @@ def __init__( for out in model_description.outputs ] + # TODO: check how to load tf weights without unzipping def require_unzipped(self, weight_file: FileSource): - loacl_weights_file = download(weight_file).path - if zipfile.is_zipfile(loacl_weights_file): - out_path = loacl_weights_file.with_suffix(".unzipped") - with zipfile.ZipFile(loacl_weights_file, "r") as f: + local_weights_file = download(weight_file).path + if isinstance(local_weights_file, ZipPath): + # weights file is in a bioimageio zip package + out_path = ( + Path("bioimageio_unzipped_tf_weights") / local_weights_file.filename + ) + with local_weights_file.open("rb") as src, out_path.open("wb") as dst: + assert not isinstance(src, TextIOWrapper) + copyfileobj(src, dst) + + local_weights_file = out_path + + if zipfile.is_zipfile(local_weights_file): + # weights file itself is a zipfile + out_path = local_weights_file.with_suffix(".unzipped") + with zipfile.ZipFile(local_weights_file, "r") as f: f.extractall(out_path) return out_path else: - return loacl_weights_file + return local_weights_file def _get_network( # pyright: ignore[reportUnknownParameterType] self, weight_file: FileSource From fceed3cd253ba9eff72b5c57a90d4514dc5edd45 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 3 Dec 2024 13:51:48 +0100 Subject: [PATCH 009/187] add upper bounds to dependencies --- dev/env-py38.yaml | 5 +++-- dev/env-tf.yaml | 3 ++- dev/env-wo-python.yaml | 6 +++--- dev/env.yaml | 9 ++++++--- setup.py | 10 ++++------ 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 22353103..2ff00627 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -3,6 +3,7 @@ name: core38 channels: - conda-forge - nodefaults + - pytorch dependencies: - bioimageio.spec>=0.5.3.5 - black @@ -12,7 +13,7 @@ dependencies: - imageio>=2.5 - jupyter - jupyter-black - # - keras>=3.0 # removed + - keras>=3.0,<4 # removed - loguru - numpy - onnxruntime @@ -28,7 +29,7 @@ dependencies: - pytest-cov - pytest-xdist - python=3.8 # changed - - pytorch>=2.1 + - pytorch>=2.1,<3 - requests - rich - ruff diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index 0df6fd07..3874009f 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -3,6 +3,7 @@ name: core-tf # changed channels: - conda-forge - nodefaults + # - pytroch # removed dependencies: - bioimageio.spec>=0.5.3.5 - black @@ -28,7 +29,7 @@ dependencies: - pytest-cov - pytest-xdist # - python=3.9 # removed - # - pytorch>=2.1 # removed + # - pytorch>=2.1,<3 # removed - requests - rich # - ruff # removed diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index d8cba289..dc76f005 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -3,7 +3,7 @@ name: core channels: - conda-forge - nodefaults - - pytorch # added + - pytorch dependencies: - bioimageio.spec>=0.5.3.5 - black @@ -13,7 +13,7 @@ dependencies: - imageio>=2.5 - jupyter - jupyter-black - - keras>=3.0 + - keras>=3.0,<4 - loguru - numpy - onnxruntime @@ -29,7 +29,7 @@ dependencies: - pytest-cov - pytest-xdist # - python=3.9 # removed - - pytorch>=2.1 + - pytorch>=2.1,<3 - requests - rich - ruff diff --git a/dev/env.yaml b/dev/env.yaml index 20d60a18..ed16d72e 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -1,6 +1,8 @@ name: core channels: - conda-forge + - nodefaults + - pytorch dependencies: - bioimageio.spec>=0.5.3.5 - black @@ -12,7 +14,7 @@ dependencies: - jupyter-black - ipykernel - matplotlib - - keras>=3.0 + - keras>=3.0,<4 - loguru - numpy - onnxruntime @@ -27,12 +29,13 @@ dependencies: - pytest - pytest-cov - pytest-xdist - - python=3.9 - - pytorch>=2.1 + - python=3.12 + - pytorch>=2.1,<3 - requests - rich - ruff - ruyaml + - tensorflow>=2,<3 - torchvision - tqdm - typing-extensions diff --git a/setup.py b/setup.py index 99747946..79c3b0c9 100644 --- a/setup.py +++ b/setup.py @@ -45,17 +45,17 @@ ], include_package_data=True, extras_require={ - "pytorch": ["torch>=1.6", "torchvision", "keras>=3.0"], - "tensorflow": ["tensorflow", "keras>=2.15"], + "pytorch": (pytorch_deps := ["torch>=1.6,<3", "torchvision", "keras>=3.0,<4"]), + "tensorflow": ["tensorflow", "keras>=2.15,<4"], "onnx": ["onnxruntime"], - "dev": [ + "dev": pytorch_deps + + [ "black", # "crick", # currently requires python<=3.9 "filelock", "jupyter", "jupyter-black", "matplotlib", - "keras>=3.0", "onnxruntime", "packaging>=17.0", "pre-commit", @@ -65,8 +65,6 @@ "pytest-cov", "pytest-xdist", # parallel pytest "pytest", - "torch>=1.6", - "torchvision", ], }, project_urls={ From 77e1e844dea4e4ab7c97d5b243eb7af8f7f97017 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 3 Dec 2024 14:01:32 +0100 Subject: [PATCH 010/187] update dev envs --- dev/env-py38.yaml | 6 ++++-- dev/env-tf.yaml | 3 ++- dev/env-wo-python.yaml | 4 +++- dev/env.yaml | 5 ++--- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 2ff00627..7f0d6da9 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -13,8 +13,9 @@ dependencies: - imageio>=2.5 - jupyter - jupyter-black - - keras>=3.0,<4 # removed + - # keras>=3.0,<4 # removed - loguru + - matplotlib - numpy - onnxruntime - packaging>=17.0 @@ -34,9 +35,10 @@ dependencies: - rich - ruff - ruyaml + # - tensorflow>=2,<3 removed - torchvision - tqdm - typing-extensions - xarray - pip: - - -e .. + - -e --no-deps .. diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index 3874009f..bd12ca02 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -15,6 +15,7 @@ dependencies: - jupyter-black - keras>=2.15 # changed - loguru + - matplotlib - numpy - onnxruntime - packaging>=17.0 @@ -40,4 +41,4 @@ dependencies: - typing-extensions - xarray - pip: - - -e .. + - -e --no-deps .. diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index dc76f005..ff0410d9 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -15,6 +15,7 @@ dependencies: - jupyter-black - keras>=3.0,<4 - loguru + - matplotlib - numpy - onnxruntime - packaging>=17.0 @@ -34,9 +35,10 @@ dependencies: - rich - ruff - ruyaml + - tensorflow>=2,<3 - torchvision - tqdm - typing-extensions - xarray - pip: - - -e .. + - -e --no-deps .. diff --git a/dev/env.yaml b/dev/env.yaml index ed16d72e..c9b62c50 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -12,10 +12,9 @@ dependencies: - imageio>=2.5 - jupyter - jupyter-black - - ipykernel - - matplotlib - keras>=3.0,<4 - loguru + - matplotlib - numpy - onnxruntime - packaging>=17.0 @@ -41,4 +40,4 @@ dependencies: - typing-extensions - xarray - pip: - - -e .. + - -e --no-deps .. From a131369b8e72217ba5ae1f3f3c71b6ea3f09456e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 3 Dec 2024 14:19:20 +0100 Subject: [PATCH 011/187] WIP setup run expensive tests --- .github/workflows/build.yaml | 15 +++-- bioimageio/core/test_collection.py | 3 + ...t_prediction_pipeline_device_management.py | 2 +- .../core/utils/testing.py => tests/utils.py | 64 +++++++++++-------- 4 files changed, 51 insertions(+), 33 deletions(-) create mode 100644 bioimageio/core/test_collection.py rename bioimageio/core/utils/testing.py => tests/utils.py (80%) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index c890e8df..634820ad 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -27,6 +27,9 @@ jobs: strategy: matrix: python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + include: + - python-version: '3.12' + run-expensive-tests: true steps: - uses: actions/checkout@v4 - name: Install Conda environment with Micromamba @@ -63,6 +66,8 @@ jobs: run: pytest --disable-pytest-warnings env: BIOIMAGEIO_CACHE_PATH: bioimageio_cache + RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} + test-spec-main: runs-on: ubuntu-latest @@ -71,7 +76,8 @@ jobs: python-version: ['3.8', '3.12'] include: - python-version: '3.12' - is-dev-version: true + report-coverage: true + run-expensive-tests: true steps: - uses: actions/checkout@v4 - name: Install Conda environment with Micromamba @@ -112,17 +118,18 @@ jobs: run: pytest --disable-pytest-warnings env: BIOIMAGEIO_CACHE_PATH: bioimageio_cache - - if: matrix.is-dev-version && github.event_name == 'pull_request' + RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} + - if: matrix.report-coverage && github.event_name == 'pull_request' uses: orgoro/coverage@v3.2 with: coverageFile: coverage.xml token: ${{ secrets.GITHUB_TOKEN }} - - if: matrix.is-dev-version && github.ref == 'refs/heads/main' + - if: matrix.report-coverage && github.ref == 'refs/heads/main' run: | pip install genbadge[coverage] genbadge coverage --input-file coverage.xml --output-file ./dist/coverage/coverage-badge.svg coverage html -d dist/coverage - - if: matrix.is-dev-version && github.ref == 'refs/heads/main' + - if: matrix.report-coverage && github.ref == 'refs/heads/main' uses: actions/upload-artifact@v4 with: name: coverage diff --git a/bioimageio/core/test_collection.py b/bioimageio/core/test_collection.py new file mode 100644 index 00000000..a0b3025a --- /dev/null +++ b/bioimageio/core/test_collection.py @@ -0,0 +1,3 @@ +from tests.utils import expensive_test + +@expensive_test diff --git a/tests/test_prediction_pipeline_device_management.py b/tests/test_prediction_pipeline_device_management.py index 0e241df1..aa692356 100644 --- a/tests/test_prediction_pipeline_device_management.py +++ b/tests/test_prediction_pipeline_device_management.py @@ -2,9 +2,9 @@ from numpy.testing import assert_array_almost_equal -from bioimageio.core.utils.testing import skip_on from bioimageio.spec.model.v0_4 import ModelDescr as ModelDescr04 from bioimageio.spec.model.v0_5 import ModelDescr, WeightsFormat +from tests.utils import skip_on class TooFewDevicesException(Exception): diff --git a/bioimageio/core/utils/testing.py b/tests/utils.py similarity index 80% rename from bioimageio/core/utils/testing.py rename to tests/utils.py index acd65d95..9cd7445e 100644 --- a/bioimageio/core/utils/testing.py +++ b/tests/utils.py @@ -1,28 +1,36 @@ -# TODO: move to tests/ -from functools import wraps -from typing import Any, Protocol, Type - - -class test_func(Protocol): - def __call__(*args: Any, **kwargs: Any): ... - - -def skip_on(exception: Type[Exception], reason: str): - """adapted from https://stackoverflow.com/a/63522579""" - import pytest - - # Func below is the real decorator and will receive the test function as param - def decorator_func(f: test_func): - @wraps(f) - def wrapper(*args: Any, **kwargs: Any): - try: - # Try to run the test - return f(*args, **kwargs) - except exception: - # If exception of given type happens - # just swallow it and raise pytest.Skip with given reason - pytest.skip(reason) - - return wrapper - - return decorator_func +import os +from functools import wraps +from typing import Any, Protocol, Type + +import pytest + + +class test_func(Protocol): + def __call__(*args: Any, **kwargs: Any): ... + + +def skip_on(exception: Type[Exception], reason: str): + """adapted from https://stackoverflow.com/a/63522579""" + import pytest + + # Func below is the real decorator and will receive the test function as param + def decorator_func(f: test_func): + @wraps(f) + def wrapper(*args: Any, **kwargs: Any): + try: + # Try to run the test + return f(*args, **kwargs) + except exception: + # If exception of given type happens + # just swallow it and raise pytest.Skip with given reason + pytest.skip(reason) + + return wrapper + + return decorator_func + + +expensive_test = pytest.mark.skipif( + (run := os.getenv("RUN_EXPENSIVE_TESTS")) != "true", + reason="Skipping expensive test (enable by RUN_EXPENSIVE_TESTS='true')", +) From 0888c52049ab62a271d9bd0291fbd61edc577382 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 3 Dec 2024 16:54:03 +0100 Subject: [PATCH 012/187] WIP resource tests --- bioimageio/core/test_bioimageio_collection.py | 60 +++++++++++++++++++ bioimageio/core/test_collection.py | 3 - tests/utils.py | 7 ++- 3 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 bioimageio/core/test_bioimageio_collection.py delete mode 100644 bioimageio/core/test_collection.py diff --git a/bioimageio/core/test_bioimageio_collection.py b/bioimageio/core/test_bioimageio_collection.py new file mode 100644 index 00000000..4de6a26f --- /dev/null +++ b/bioimageio/core/test_bioimageio_collection.py @@ -0,0 +1,60 @@ +from typing import Any, Collection, Dict, Iterable, Mapping, Tuple + +import pytest +import requests +from pydantic import HttpUrl + +from bioimageio.spec import InvalidDescr +from bioimageio.spec.common import Sha256 +from tests.utils import ParameterSet, expensive_test + +BASE_URL = "https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/" + + +def _get_latest_rdf_sources(): + entries: Any = requests.get(BASE_URL + "all_versions.json").json()["entries"] + ret: Dict[str, Tuple[HttpUrl, Sha256]] = {} + for entry in entries: + version = entry["versions"][0] + ret[f"{entry['concept']}/{version['v']}"] = ( + HttpUrl(version["source"]), + Sha256(version["sha256"]), + ) + + return ret + + +ALL_LATEST_RDF_SOURCES: Mapping[str, Tuple[HttpUrl, Sha256]] = _get_latest_rdf_sources() + + +def yield_bioimageio_yaml_urls() -> Iterable[ParameterSet]: + for descr_url, sha in ALL_LATEST_RDF_SOURCES.values(): + key = ( + str(descr_url) + .replace(BASE_URL, "") + .replace("/files/rdf.yaml", "") + .replace("/files/bioimageio.yaml", "") + ) + yield pytest.param(descr_url, sha, key, id=key) + + +KNOWN_INVALID: Collection[str] = set() + + +@expensive_test +@pytest.mark.parametrize("descr_url,sha,key", list(yield_bioimageio_yaml_urls())) +def test_rdf( + descr_url: HttpUrl, + sha: Sha256, + key: str, +): + if key in KNOWN_INVALID: + pytest.skip("known failure") + + from bioimageio.core import load_description_and_test + + descr = load_description_and_test(descr_url, sha256=sha) + assert not isinstance(descr, InvalidDescr) + assert ( + descr.validation_summary.status == "passed" + ), descr.validation_summary.format() diff --git a/bioimageio/core/test_collection.py b/bioimageio/core/test_collection.py deleted file mode 100644 index a0b3025a..00000000 --- a/bioimageio/core/test_collection.py +++ /dev/null @@ -1,3 +0,0 @@ -from tests.utils import expensive_test - -@expensive_test diff --git a/tests/utils.py b/tests/utils.py index 9cd7445e..3a8e695b 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,10 +1,15 @@ import os from functools import wraps -from typing import Any, Protocol, Type +from typing import Any, Protocol, Sequence, Type import pytest +class ParameterSet(Protocol): + def __init__(self, values: Sequence[Any], marks: Any, id: str) -> None: + super().__init__() + + class test_func(Protocol): def __call__(*args: Any, **kwargs: Any): ... From 40dfe2594199b182d7ad81c120ac4db1cc81360d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 3 Dec 2024 16:54:24 +0100 Subject: [PATCH 013/187] expose sha256 arg --- bioimageio/core/_resource_tests.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 6ace6d5c..8c04b1b8 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -14,7 +14,7 @@ load_description, ) from bioimageio.spec._internal.common_nodes import ResourceDescrBase -from bioimageio.spec.common import BioimageioYamlContent, PermissiveFileSource +from bioimageio.spec.common import BioimageioYamlContent, PermissiveFileSource, Sha256 from bioimageio.spec.get_conda_env import get_conda_env from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.model.v0_5 import WeightsFormat @@ -102,6 +102,7 @@ def test_model( decimal: Optional[int] = None, *, determinism: Literal["seed_only", "full"] = "seed_only", + sha256: Optional[Sha256] = None, ) -> ValidationSummary: """Test model inference""" return test_description( @@ -113,6 +114,7 @@ def test_model( decimal=decimal, determinism=determinism, expected_type="model", + sha256=sha256, ) @@ -127,6 +129,7 @@ def test_description( decimal: Optional[int] = None, determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, + sha256: Optional[Sha256] = None, ) -> ValidationSummary: """Test a bioimage.io resource dynamically, e.g. prediction of test tensors for models""" rd = load_description_and_test( @@ -139,6 +142,7 @@ def test_description( decimal=decimal, determinism=determinism, expected_type=expected_type, + sha256=sha256, ) return rd.validation_summary @@ -154,6 +158,7 @@ def load_description_and_test( decimal: Optional[int] = None, determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, + sha256: Optional[Sha256] = None, ) -> Union[ResourceDescr, InvalidDescr]: """Test RDF dynamically, e.g. model inference of test inputs""" if ( @@ -171,7 +176,7 @@ def load_description_and_test( elif isinstance(source, dict): rd = build_description(source, format_version=format_version) else: - rd = load_description(source, format_version=format_version) + rd = load_description(source, format_version=format_version, sha256=sha256) rd.validation_summary.env.add( InstalledPackage(name="bioimageio.core", version=VERSION) From bb539d4d676b19d3ce5371b2d6f8830001424910 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 4 Dec 2024 14:40:09 +0100 Subject: [PATCH 014/187] update torchscript adapter --- .../core/weight_converter/torch/_torchscript.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/bioimageio/core/weight_converter/torch/_torchscript.py b/bioimageio/core/weight_converter/torch/_torchscript.py index 5ca16069..766bc7e3 100644 --- a/bioimageio/core/weight_converter/torch/_torchscript.py +++ b/bioimageio/core/weight_converter/torch/_torchscript.py @@ -1,20 +1,15 @@ -# type: ignore # TODO: type from pathlib import Path from typing import List, Sequence, Union import numpy as np +import torch from numpy.testing import assert_array_almost_equal from typing_extensions import Any, assert_never from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.model.v0_5 import Version -from ._utils import load_torch_model - -try: - import torch -except ImportError: - torch = None +from ...model_adapters._pytorch_model_adapter import PytorchModelAdapter # FIXME: remove Any @@ -119,7 +114,9 @@ def convert_weights_to_torchscript( with torch.no_grad(): input_data = [torch.from_numpy(inp.astype("float32")) for inp in input_data] - model = load_torch_model(state_dict_weights_descr) + model = PytorchModelAdapter.get_network( + state_dict_weights_descr, load_state=True + ) # FIXME: remove Any if use_tracing: From fedd43ce1fd158b1da821b91f4b8292088a226a7 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 4 Dec 2024 14:40:31 +0100 Subject: [PATCH 015/187] bump spec lib version --- bioimageio/core/test_bioimageio_collection.py | 2 +- dev/env-py38.yaml | 2 +- dev/env-tf.yaml | 2 +- dev/env-wo-python.yaml | 2 +- dev/env.yaml | 4 ++-- setup.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bioimageio/core/test_bioimageio_collection.py b/bioimageio/core/test_bioimageio_collection.py index 4de6a26f..2cf9ced0 100644 --- a/bioimageio/core/test_bioimageio_collection.py +++ b/bioimageio/core/test_bioimageio_collection.py @@ -17,7 +17,7 @@ def _get_latest_rdf_sources(): for entry in entries: version = entry["versions"][0] ret[f"{entry['concept']}/{version['v']}"] = ( - HttpUrl(version["source"]), + HttpUrl(version["source"]), # pyright: ignore[reportCallIssue] Sha256(version["sha256"]), ) diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 7f0d6da9..69030cc9 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -5,7 +5,7 @@ channels: - nodefaults - pytorch dependencies: - - bioimageio.spec>=0.5.3.5 + - bioimageio.spec>=0.5.3.6 - black - crick # uncommented - filelock diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index bd12ca02..799d2a59 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -5,7 +5,7 @@ channels: - nodefaults # - pytroch # removed dependencies: - - bioimageio.spec>=0.5.3.5 + - bioimageio.spec>=0.5.3.6 - black # - crick # currently requires python<=3.9 - filelock diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index ff0410d9..a0b7c978 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -5,7 +5,7 @@ channels: - nodefaults - pytorch dependencies: - - bioimageio.spec>=0.5.3.5 + - bioimageio.spec>=0.5.3.6 - black # - crick # currently requires python<=3.9 - filelock diff --git a/dev/env.yaml b/dev/env.yaml index c9b62c50..a65158d9 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -1,10 +1,10 @@ -name: core +name: full channels: - conda-forge - nodefaults - pytorch dependencies: - - bioimageio.spec>=0.5.3.5 + - bioimageio.spec>=0.5.3.6 - black # - crick # currently requires python<=3.9 - filelock diff --git a/setup.py b/setup.py index 79c3b0c9..af913c1d 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ ], packages=find_namespace_packages(exclude=["tests"]), install_requires=[ - "bioimageio.spec ==0.5.3.5", + "bioimageio.spec ==0.5.3.6", "h5py", "imageio>=2.10", "loguru", From 7f6fdf1510f66e17e9c698fdb493adcbca8aa173 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 5 Dec 2024 09:27:16 +0100 Subject: [PATCH 016/187] WIP refactor backend libs --- bioimageio/core/_create_model_adapter.py | 127 ++++++++ bioimageio/core/_model_adapter.py | 93 ++++++ bioimageio/core/backend/__init__.py | 0 .../keras.py} | 2 +- bioimageio/core/backend/pytorch.py | 176 +++++++++++ bioimageio/core/model_adapters.py | 8 + bioimageio/core/model_adapters/__init__.py | 7 - .../core/model_adapters/_model_adapter.py | 2 +- .../model_adapters/_onnx_model_adapter.py | 60 ---- .../model_adapters/_pytorch_model_adapter.py | 176 ----------- .../_tensorflow_model_adapter.py | 281 ------------------ .../_torchscript_model_adapter.py | 79 ----- bioimageio/core/weight_converters/__init__.py | 0 .../core/weight_converters/_add_weights.py | 25 ++ .../core/weight_converters/pytorch_to_onnx.py | 124 ++++++++ 15 files changed, 555 insertions(+), 605 deletions(-) create mode 100644 bioimageio/core/_create_model_adapter.py create mode 100644 bioimageio/core/_model_adapter.py create mode 100644 bioimageio/core/backend/__init__.py rename bioimageio/core/{model_adapters/_keras_model_adapter.py => backend/keras.py} (98%) create mode 100644 bioimageio/core/backend/pytorch.py create mode 100644 bioimageio/core/model_adapters.py delete mode 100644 bioimageio/core/model_adapters/__init__.py delete mode 100644 bioimageio/core/model_adapters/_onnx_model_adapter.py delete mode 100644 bioimageio/core/model_adapters/_tensorflow_model_adapter.py delete mode 100644 bioimageio/core/model_adapters/_torchscript_model_adapter.py create mode 100644 bioimageio/core/weight_converters/__init__.py create mode 100644 bioimageio/core/weight_converters/_add_weights.py create mode 100644 bioimageio/core/weight_converters/pytorch_to_onnx.py diff --git a/bioimageio/core/_create_model_adapter.py b/bioimageio/core/_create_model_adapter.py new file mode 100644 index 00000000..ee79f260 --- /dev/null +++ b/bioimageio/core/_create_model_adapter.py @@ -0,0 +1,127 @@ +import warnings +from abc import abstractmethod +from typing import List, Optional, Sequence, Tuple, Union, final + +from bioimageio.spec.model import v0_4, v0_5 + +from ._model_adapter import ( + DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER, + ModelAdapter, + WeightsFormat, +) +from .tensor import Tensor + + +def create_model_adapter( + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + *, + devices: Optional[Sequence[str]] = None, + weight_format_priority_order: Optional[Sequence[WeightsFormat]] = None, +): + """ + Creates model adapter based on the passed spec + Note: All specific adapters should happen inside this function to prevent different framework + initializations interfering with each other + """ + if not isinstance(model_description, (v0_4.ModelDescr, v0_5.ModelDescr)): + raise TypeError( + f"expected v0_4.ModelDescr or v0_5.ModelDescr, but got {type(model_description)}" + ) + + weights = model_description.weights + errors: List[Tuple[WeightsFormat, Exception]] = [] + weight_format_priority_order = ( + DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER + if weight_format_priority_order is None + else weight_format_priority_order + ) + # limit weight formats to the ones present + weight_format_priority_order = [ + w for w in weight_format_priority_order if getattr(weights, w) is not None + ] + + for wf in weight_format_priority_order: + if wf == "pytorch_state_dict" and weights.pytorch_state_dict is not None: + try: + from .model_adapters_old._pytorch_model_adapter import ( + PytorchModelAdapter, + ) + + return PytorchModelAdapter( + outputs=model_description.outputs, + weights=weights.pytorch_state_dict, + devices=devices, + ) + except Exception as e: + errors.append((wf, e)) + elif ( + wf == "tensorflow_saved_model_bundle" + and weights.tensorflow_saved_model_bundle is not None + ): + try: + from .model_adapters_old._tensorflow_model_adapter import ( + TensorflowModelAdapter, + ) + + return TensorflowModelAdapter( + model_description=model_description, devices=devices + ) + except Exception as e: + errors.append((wf, e)) + elif wf == "onnx" and weights.onnx is not None: + try: + from .model_adapters_old._onnx_model_adapter import ONNXModelAdapter + + return ONNXModelAdapter( + model_description=model_description, devices=devices + ) + except Exception as e: + errors.append((wf, e)) + elif wf == "torchscript" and weights.torchscript is not None: + try: + from .model_adapters_old._torchscript_model_adapter import ( + TorchscriptModelAdapter, + ) + + return TorchscriptModelAdapter( + model_description=model_description, devices=devices + ) + except Exception as e: + errors.append((wf, e)) + elif wf == "keras_hdf5" and weights.keras_hdf5 is not None: + # keras can either be installed as a separate package or used as part of tensorflow + # we try to first import the keras model adapter using the separate package and, + # if it is not available, try to load the one using tf + try: + from .backend.keras import ( + KerasModelAdapter, + keras, # type: ignore + ) + + if keras is None: + from .model_adapters_old._tensorflow_model_adapter import ( + KerasModelAdapter, + ) + + return KerasModelAdapter( + model_description=model_description, devices=devices + ) + except Exception as e: + errors.append((wf, e)) + + assert errors + if len(weight_format_priority_order) == 1: + assert len(errors) == 1 + raise ValueError( + f"The '{weight_format_priority_order[0]}' model adapter could not be created" + + f" in this environment:\n{errors[0][1].__class__.__name__}({errors[0][1]}).\n\n" + ) from errors[0][1] + + else: + error_list = "\n - ".join( + f"{wf}: {e.__class__.__name__}({e})" for wf, e in errors + ) + raise ValueError( + "None of the weight format specific model adapters could be created" + + f" in this environment. Errors are:\n\n{error_list}.\n\n" + ) diff --git a/bioimageio/core/_model_adapter.py b/bioimageio/core/_model_adapter.py new file mode 100644 index 00000000..0438d35e --- /dev/null +++ b/bioimageio/core/_model_adapter.py @@ -0,0 +1,93 @@ +import warnings +from abc import ABC, abstractmethod +from typing import List, Optional, Sequence, Tuple, Union, final + +from bioimageio.spec.model import v0_4, v0_5 + +from .tensor import Tensor + +WeightsFormat = Union[v0_4.WeightsFormat, v0_5.WeightsFormat] + +__all__ = [ + "ModelAdapter", + "create_model_adapter", + "get_weight_formats", +] + +# Known weight formats in order of priority +# First match wins +DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER: Tuple[WeightsFormat, ...] = ( + "pytorch_state_dict", + "tensorflow_saved_model_bundle", + "torchscript", + "onnx", + "keras_hdf5", +) + + +class ModelAdapter(ABC): + """ + Represents model *without* any preprocessing or postprocessing. + + ``` + from bioimageio.core import load_description + + model = load_description(...) + + # option 1: + adapter = ModelAdapter.create(model) + adapter.forward(...) + adapter.unload() + + # option 2: + with ModelAdapter.create(model) as adapter: + adapter.forward(...) + ``` + """ + + @final + @classmethod + def create( + cls, + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + *, + devices: Optional[Sequence[str]] = None, + weight_format_priority_order: Optional[Sequence[WeightsFormat]] = None, + ): + """ + Creates model adapter based on the passed spec + Note: All specific adapters should happen inside this function to prevent different framework + initializations interfering with each other + """ + from ._create_model_adapter import create_model_adapter + + return create_model_adapter( + model_description, + devices=devices, + weight_format_priority_order=weight_format_priority_order, + ) + + @final + def load(self, *, devices: Optional[Sequence[str]] = None) -> None: + warnings.warn("Deprecated. ModelAdapter is loaded on initialization") + + @abstractmethod + def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: + """ + Run forward pass of model to get model predictions + """ + # TODO: handle tensor.transpose in here and make _forward_impl the abstract impl + + @abstractmethod + def unload(self): + """ + Unload model from any devices, freeing their memory. + The moder adapter should be considered unusable afterwards. + """ + + +def get_weight_formats() -> List[str]: + """ + Return list of supported weight types + """ + return list(DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER) diff --git a/bioimageio/core/backend/__init__.py b/bioimageio/core/backend/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bioimageio/core/model_adapters/_keras_model_adapter.py b/bioimageio/core/backend/keras.py similarity index 98% rename from bioimageio/core/model_adapters/_keras_model_adapter.py rename to bioimageio/core/backend/keras.py index e6864ccc..1d273cfc 100644 --- a/bioimageio/core/model_adapters/_keras_model_adapter.py +++ b/bioimageio/core/backend/keras.py @@ -10,8 +10,8 @@ from .._settings import settings from ..digest_spec import get_axes_infos +from ..model_adapters import ModelAdapter from ..tensor import Tensor -from ._model_adapter import ModelAdapter os.environ["KERAS_BACKEND"] = settings.keras_backend diff --git a/bioimageio/core/backend/pytorch.py b/bioimageio/core/backend/pytorch.py new file mode 100644 index 00000000..1992f406 --- /dev/null +++ b/bioimageio/core/backend/pytorch.py @@ -0,0 +1,176 @@ +import gc +import warnings +from contextlib import nullcontext +from io import TextIOWrapper +from pathlib import Path +from typing import Any, List, Literal, Optional, Sequence, Tuple, Union + +import torch +from loguru import logger +from torch import nn +from typing_extensions import assert_never + +from bioimageio.spec.common import ZipPath +from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.utils import download + +from ..digest_spec import get_axes_infos, import_callable +from ..tensor import Tensor +from ._model_adapter import ModelAdapter + + +class PytorchModelAdapter(ModelAdapter): + def __init__( + self, + *, + outputs: Union[ + Sequence[v0_4.OutputTensorDescr], Sequence[v0_5.OutputTensorDescr] + ], + weights: Union[ + v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr + ], + devices: Optional[Sequence[Union[str, torch.device]]] = None, + mode: Literal["eval", "train"] = "eval", + ): + super().__init__() + self.output_dims = [tuple(a.id for a in get_axes_infos(out)) for out in outputs] + devices = self.get_devices(devices) + self._network = self.get_network(weights, load_state=True, devices=devices) + if mode == "eval": + self._network = self._network.eval() + elif mode == "train": + self._network = self._network.train() + else: + assert_never(mode) + + self._mode: Literal["eval", "train"] = mode + self._primary_device = devices[0] + + def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: + if self._mode == "eval": + ctxt = torch.no_grad + elif self._mode == "train": + ctxt = nullcontext + else: + assert_never(self._mode) + + with ctxt(): + tensors = [ + None if ipt is None else torch.from_numpy(ipt.data.data) + for ipt in input_tensors + ] + tensors = [ + (None if t is None else t.to(self._primary_device)) for t in tensors + ] + result: Union[Tuple[Any, ...], List[Any], Any] + result = self._network(*tensors) + if not isinstance(result, (tuple, list)): + result = [result] + + result = [ + ( + None + if r is None + else r.detach().cpu().numpy() if isinstance(r, torch.Tensor) else r + ) + for r in result # pyright: ignore[reportUnknownVariableType] + ] + if len(result) > len(self.output_dims): + raise ValueError( + f"Expected at most {len(self.output_dims)} outputs, but got {len(result)}" + ) + + return [ + None if r is None else Tensor(r, dims=out) + for r, out in zip(result, self.output_dims) + ] + + def unload(self) -> None: + del self._network + _ = gc.collect() # deallocate memory + assert torch is not None + torch.cuda.empty_cache() # release reserved memory + + @classmethod + def get_network( + cls, + weight_spec: Union[ + v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr + ], + *, + load_state: bool = False, + devices: Optional[Sequence[Union[str, torch.device]]] = None, + ) -> nn.Module: + arch = import_callable( + weight_spec.architecture, + sha256=( + weight_spec.architecture_sha256 + if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) + else weight_spec.sha256 + ), + ) + model_kwargs = ( + weight_spec.kwargs + if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) + else weight_spec.architecture.kwargs + ) + network = arch(**model_kwargs) + if not isinstance(network, nn.Module): + raise ValueError( + f"calling {weight_spec.architecture.callable} did not return a torch.nn.Module" + ) + + if load_state or devices: + use_devices = cls.get_devices(devices) + network = network.to(use_devices[0]) + if load_state: + network = cls.load_state( + network, + path=download(weight_spec).path, + devices=use_devices, + ) + return network + + @staticmethod + def load_state( + network: nn.Module, + path: Union[Path, ZipPath], + devices: Sequence[torch.device], + ) -> nn.Module: + network = network.to(devices[0]) + with path.open("rb") as f: + assert not isinstance(f, TextIOWrapper) + state = torch.load(f, map_location=devices[0]) + + incompatible = network.load_state_dict(state) + if incompatible.missing_keys: + logger.warning("Missing state dict keys: {}", incompatible.missing_keys) + + if incompatible.unexpected_keys: + logger.warning( + "Unexpected state dict keys: {}", incompatible.unexpected_keys + ) + return network + + @staticmethod + def get_devices( + devices: Optional[Sequence[Union[torch.device, str]]] = None, + ) -> List[torch.device]: + if not devices: + torch_devices = [ + ( + torch.device("cuda") + if torch.cuda.is_available() + else torch.device("cpu") + ) + ] + else: + torch_devices = [torch.device(d) for d in devices] + + if len(torch_devices) > 1: + warnings.warn( + f"Multiple devices for single pytorch model not yet implemented; ignoring {torch_devices[1:]}" + ) + torch_devices = torch_devices[:1] + + return torch_devices diff --git a/bioimageio/core/model_adapters.py b/bioimageio/core/model_adapters.py new file mode 100644 index 00000000..86fcfe4b --- /dev/null +++ b/bioimageio/core/model_adapters.py @@ -0,0 +1,8 @@ +from ._create_model_adapter import create_model_adapter +from ._model_adapter import ModelAdapter, get_weight_formats + +__all__ = [ + "ModelAdapter", + "create_model_adapter", + "get_weight_formats", +] diff --git a/bioimageio/core/model_adapters/__init__.py b/bioimageio/core/model_adapters/__init__.py deleted file mode 100644 index 01899de9..00000000 --- a/bioimageio/core/model_adapters/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from ._model_adapter import ModelAdapter, create_model_adapter, get_weight_formats - -__all__ = [ - "ModelAdapter", - "create_model_adapter", - "get_weight_formats", -] diff --git a/bioimageio/core/model_adapters/_model_adapter.py b/bioimageio/core/model_adapters/_model_adapter.py index da2a2ea9..3921f81b 100644 --- a/bioimageio/core/model_adapters/_model_adapter.py +++ b/bioimageio/core/model_adapters/_model_adapter.py @@ -117,7 +117,7 @@ def create( # we try to first import the keras model adapter using the separate package and, # if it is not available, try to load the one using tf try: - from ._keras_model_adapter import ( + from ._keras import ( KerasModelAdapter, keras, # type: ignore ) diff --git a/bioimageio/core/model_adapters/_onnx_model_adapter.py b/bioimageio/core/model_adapters/_onnx_model_adapter.py deleted file mode 100644 index 87045897..00000000 --- a/bioimageio/core/model_adapters/_onnx_model_adapter.py +++ /dev/null @@ -1,60 +0,0 @@ -import warnings -from typing import Any, List, Optional, Sequence, Union - -import onnxruntime as rt - -from bioimageio.spec._internal.type_guards import is_list, is_tuple -from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download - -from ..digest_spec import get_axes_infos -from ..tensor import Tensor -from ._model_adapter import ModelAdapter - - -class ONNXModelAdapter(ModelAdapter): - def __init__( - self, - *, - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - devices: Optional[Sequence[str]] = None, - ): - super().__init__() - self._internal_output_axes = [ - tuple(a.id for a in get_axes_infos(out)) - for out in model_description.outputs - ] - if model_description.weights.onnx is None: - raise ValueError("No ONNX weights specified for {model_description.name}") - - self._session = rt.InferenceSession( - str(download(model_description.weights.onnx.source).path) - ) - onnx_inputs = self._session.get_inputs() # type: ignore - self._input_names: List[str] = [ipt.name for ipt in onnx_inputs] # type: ignore - - if devices is not None: - warnings.warn( - f"Device management is not implemented for onnx yet, ignoring the devices {devices}" - ) - - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - assert len(input_tensors) == len(self._input_names) - input_arrays = [None if ipt is None else ipt.data.data for ipt in input_tensors] - result: Any = self._session.run( - None, dict(zip(self._input_names, input_arrays)) - ) - if is_list(result) or is_tuple(result): - result_seq = result - else: - result_seq = [result] - - return [ - None if r is None else Tensor(r, dims=axes) - for r, axes in zip(result_seq, self._internal_output_axes) - ] - - def unload(self) -> None: - warnings.warn( - "Device management is not implemented for onnx yet, cannot unload model" - ) diff --git a/bioimageio/core/model_adapters/_pytorch_model_adapter.py b/bioimageio/core/model_adapters/_pytorch_model_adapter.py index 1992f406..e69de29b 100644 --- a/bioimageio/core/model_adapters/_pytorch_model_adapter.py +++ b/bioimageio/core/model_adapters/_pytorch_model_adapter.py @@ -1,176 +0,0 @@ -import gc -import warnings -from contextlib import nullcontext -from io import TextIOWrapper -from pathlib import Path -from typing import Any, List, Literal, Optional, Sequence, Tuple, Union - -import torch -from loguru import logger -from torch import nn -from typing_extensions import assert_never - -from bioimageio.spec.common import ZipPath -from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download - -from ..digest_spec import get_axes_infos, import_callable -from ..tensor import Tensor -from ._model_adapter import ModelAdapter - - -class PytorchModelAdapter(ModelAdapter): - def __init__( - self, - *, - outputs: Union[ - Sequence[v0_4.OutputTensorDescr], Sequence[v0_5.OutputTensorDescr] - ], - weights: Union[ - v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr - ], - devices: Optional[Sequence[Union[str, torch.device]]] = None, - mode: Literal["eval", "train"] = "eval", - ): - super().__init__() - self.output_dims = [tuple(a.id for a in get_axes_infos(out)) for out in outputs] - devices = self.get_devices(devices) - self._network = self.get_network(weights, load_state=True, devices=devices) - if mode == "eval": - self._network = self._network.eval() - elif mode == "train": - self._network = self._network.train() - else: - assert_never(mode) - - self._mode: Literal["eval", "train"] = mode - self._primary_device = devices[0] - - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - if self._mode == "eval": - ctxt = torch.no_grad - elif self._mode == "train": - ctxt = nullcontext - else: - assert_never(self._mode) - - with ctxt(): - tensors = [ - None if ipt is None else torch.from_numpy(ipt.data.data) - for ipt in input_tensors - ] - tensors = [ - (None if t is None else t.to(self._primary_device)) for t in tensors - ] - result: Union[Tuple[Any, ...], List[Any], Any] - result = self._network(*tensors) - if not isinstance(result, (tuple, list)): - result = [result] - - result = [ - ( - None - if r is None - else r.detach().cpu().numpy() if isinstance(r, torch.Tensor) else r - ) - for r in result # pyright: ignore[reportUnknownVariableType] - ] - if len(result) > len(self.output_dims): - raise ValueError( - f"Expected at most {len(self.output_dims)} outputs, but got {len(result)}" - ) - - return [ - None if r is None else Tensor(r, dims=out) - for r, out in zip(result, self.output_dims) - ] - - def unload(self) -> None: - del self._network - _ = gc.collect() # deallocate memory - assert torch is not None - torch.cuda.empty_cache() # release reserved memory - - @classmethod - def get_network( - cls, - weight_spec: Union[ - v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr - ], - *, - load_state: bool = False, - devices: Optional[Sequence[Union[str, torch.device]]] = None, - ) -> nn.Module: - arch = import_callable( - weight_spec.architecture, - sha256=( - weight_spec.architecture_sha256 - if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) - else weight_spec.sha256 - ), - ) - model_kwargs = ( - weight_spec.kwargs - if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) - else weight_spec.architecture.kwargs - ) - network = arch(**model_kwargs) - if not isinstance(network, nn.Module): - raise ValueError( - f"calling {weight_spec.architecture.callable} did not return a torch.nn.Module" - ) - - if load_state or devices: - use_devices = cls.get_devices(devices) - network = network.to(use_devices[0]) - if load_state: - network = cls.load_state( - network, - path=download(weight_spec).path, - devices=use_devices, - ) - return network - - @staticmethod - def load_state( - network: nn.Module, - path: Union[Path, ZipPath], - devices: Sequence[torch.device], - ) -> nn.Module: - network = network.to(devices[0]) - with path.open("rb") as f: - assert not isinstance(f, TextIOWrapper) - state = torch.load(f, map_location=devices[0]) - - incompatible = network.load_state_dict(state) - if incompatible.missing_keys: - logger.warning("Missing state dict keys: {}", incompatible.missing_keys) - - if incompatible.unexpected_keys: - logger.warning( - "Unexpected state dict keys: {}", incompatible.unexpected_keys - ) - return network - - @staticmethod - def get_devices( - devices: Optional[Sequence[Union[torch.device, str]]] = None, - ) -> List[torch.device]: - if not devices: - torch_devices = [ - ( - torch.device("cuda") - if torch.cuda.is_available() - else torch.device("cpu") - ) - ] - else: - torch_devices = [torch.device(d) for d in devices] - - if len(torch_devices) > 1: - warnings.warn( - f"Multiple devices for single pytorch model not yet implemented; ignoring {torch_devices[1:]}" - ) - torch_devices = torch_devices[:1] - - return torch_devices diff --git a/bioimageio/core/model_adapters/_tensorflow_model_adapter.py b/bioimageio/core/model_adapters/_tensorflow_model_adapter.py deleted file mode 100644 index b469cde7..00000000 --- a/bioimageio/core/model_adapters/_tensorflow_model_adapter.py +++ /dev/null @@ -1,281 +0,0 @@ -import zipfile -from io import TextIOWrapper -from pathlib import Path -from shutil import copyfileobj -from typing import List, Literal, Optional, Sequence, Union - -import numpy as np -import tensorflow as tf # pyright: ignore[reportMissingImports] -from loguru import logger - -from bioimageio.spec.common import FileSource, ZipPath -from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download - -from ..digest_spec import get_axes_infos -from ..tensor import Tensor -from ._model_adapter import ModelAdapter - - -class TensorflowModelAdapterBase(ModelAdapter): - weight_format: Literal["keras_hdf5", "tensorflow_saved_model_bundle"] - - def __init__( - self, - *, - devices: Optional[Sequence[str]] = None, - weights: Union[ - v0_4.KerasHdf5WeightsDescr, - v0_4.TensorflowSavedModelBundleWeightsDescr, - v0_5.KerasHdf5WeightsDescr, - v0_5.TensorflowSavedModelBundleWeightsDescr, - ], - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - ): - super().__init__() - self.model_description = model_description - tf_version = v0_5.Version( - tf.__version__ # pyright: ignore[reportUnknownArgumentType] - ) - model_tf_version = weights.tensorflow_version - if model_tf_version is None: - logger.warning( - "The model does not specify the tensorflow version." - + f"Cannot check if it is compatible with intalled tensorflow {tf_version}." - ) - elif model_tf_version > tf_version: - logger.warning( - f"The model specifies a newer tensorflow version than installed: {model_tf_version} > {tf_version}." - ) - elif (model_tf_version.major, model_tf_version.minor) != ( - tf_version.major, - tf_version.minor, - ): - logger.warning( - "The tensorflow version specified by the model does not match the installed: " - + f"{model_tf_version} != {tf_version}." - ) - - self.use_keras_api = ( - tf_version.major > 1 - or self.weight_format == KerasModelAdapter.weight_format - ) - - # TODO tf device management - if devices is not None: - logger.warning( - f"Device management is not implemented for tensorflow yet, ignoring the devices {devices}" - ) - - weight_file = self.require_unzipped(weights.source) - self._network = self._get_network(weight_file) - self._internal_output_axes = [ - tuple(a.id for a in get_axes_infos(out)) - for out in model_description.outputs - ] - - # TODO: check how to load tf weights without unzipping - def require_unzipped(self, weight_file: FileSource): - local_weights_file = download(weight_file).path - if isinstance(local_weights_file, ZipPath): - # weights file is in a bioimageio zip package - out_path = ( - Path("bioimageio_unzipped_tf_weights") / local_weights_file.filename - ) - with local_weights_file.open("rb") as src, out_path.open("wb") as dst: - assert not isinstance(src, TextIOWrapper) - copyfileobj(src, dst) - - local_weights_file = out_path - - if zipfile.is_zipfile(local_weights_file): - # weights file itself is a zipfile - out_path = local_weights_file.with_suffix(".unzipped") - with zipfile.ZipFile(local_weights_file, "r") as f: - f.extractall(out_path) - - return out_path - else: - return local_weights_file - - def _get_network( # pyright: ignore[reportUnknownParameterType] - self, weight_file: FileSource - ): - weight_file = self.require_unzipped(weight_file) - assert tf is not None - if self.use_keras_api: - try: - return tf.keras.layers.TFSMLayer( - weight_file, call_endpoint="serve" - ) # pyright: ignore[reportUnknownVariableType] - except Exception as e: - try: - return tf.keras.layers.TFSMLayer( - weight_file, call_endpoint="serving_default" - ) # pyright: ignore[reportUnknownVariableType] - except Exception as ee: - logger.opt(exception=ee).info( - "keras.layers.TFSMLayer error for alternative call_endpoint='serving_default'" - ) - raise e - else: - # NOTE in tf1 the model needs to be loaded inside of the session, so we cannot preload the model - return str(weight_file) - - # TODO currently we relaod the model every time. it would be better to keep the graph and session - # alive in between of forward passes (but then the sessions need to be properly opened / closed) - def _forward_tf( # pyright: ignore[reportUnknownParameterType] - self, *input_tensors: Optional[Tensor] - ): - assert tf is not None - input_keys = [ - ipt.name if isinstance(ipt, v0_4.InputTensorDescr) else ipt.id - for ipt in self.model_description.inputs - ] - output_keys = [ - out.name if isinstance(out, v0_4.OutputTensorDescr) else out.id - for out in self.model_description.outputs - ] - # TODO read from spec - tag = ( # pyright: ignore[reportUnknownVariableType] - tf.saved_model.tag_constants.SERVING - ) - signature_key = ( # pyright: ignore[reportUnknownVariableType] - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY - ) - - graph = tf.Graph() # pyright: ignore[reportUnknownVariableType] - with graph.as_default(): - with tf.Session( - graph=graph - ) as sess: # pyright: ignore[reportUnknownVariableType] - # load the model and the signature - graph_def = tf.saved_model.loader.load( # pyright: ignore[reportUnknownVariableType] - sess, [tag], self._network - ) - signature = ( # pyright: ignore[reportUnknownVariableType] - graph_def.signature_def - ) - - # get the tensors into the graph - in_names = [ # pyright: ignore[reportUnknownVariableType] - signature[signature_key].inputs[key].name for key in input_keys - ] - out_names = [ # pyright: ignore[reportUnknownVariableType] - signature[signature_key].outputs[key].name for key in output_keys - ] - in_tensors = [ # pyright: ignore[reportUnknownVariableType] - graph.get_tensor_by_name(name) - for name in in_names # pyright: ignore[reportUnknownVariableType] - ] - out_tensors = [ # pyright: ignore[reportUnknownVariableType] - graph.get_tensor_by_name(name) - for name in out_names # pyright: ignore[reportUnknownVariableType] - ] - - # run prediction - res = sess.run( # pyright: ignore[reportUnknownVariableType] - dict( - zip( - out_names, # pyright: ignore[reportUnknownArgumentType] - out_tensors, # pyright: ignore[reportUnknownArgumentType] - ) - ), - dict( - zip( - in_tensors, # pyright: ignore[reportUnknownArgumentType] - input_tensors, - ) - ), - ) - # from dict to list of tensors - res = [ # pyright: ignore[reportUnknownVariableType] - res[out] - for out in out_names # pyright: ignore[reportUnknownVariableType] - ] - - return res # pyright: ignore[reportUnknownVariableType] - - def _forward_keras( # pyright: ignore[reportUnknownParameterType] - self, *input_tensors: Optional[Tensor] - ): - assert self.use_keras_api - assert not isinstance(self._network, str) - assert tf is not None - tf_tensor = [ # pyright: ignore[reportUnknownVariableType] - None if ipt is None else tf.convert_to_tensor(ipt) for ipt in input_tensors - ] - - result = self._network(*tf_tensor) # pyright: ignore[reportUnknownVariableType] - - assert isinstance(result, dict) - - # TODO: Use RDF's `outputs[i].id` here - result = list(result.values()) - - return [ # pyright: ignore[reportUnknownVariableType] - (None if r is None else r if isinstance(r, np.ndarray) else r.numpy()) - for r in result # pyright: ignore[reportUnknownVariableType] - ] - - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - data = [None if ipt is None else ipt.data for ipt in input_tensors] - if self.use_keras_api: - result = self._forward_keras( # pyright: ignore[reportUnknownVariableType] - *data - ) - else: - result = self._forward_tf( # pyright: ignore[reportUnknownVariableType] - *data - ) - - return [ - None if r is None else Tensor(r, dims=axes) - for r, axes in zip( # pyright: ignore[reportUnknownVariableType] - result, # pyright: ignore[reportUnknownArgumentType] - self._internal_output_axes, - ) - ] - - def unload(self) -> None: - logger.warning( - "Device management is not implemented for keras yet, cannot unload model" - ) - - -class TensorflowModelAdapter(TensorflowModelAdapterBase): - weight_format = "tensorflow_saved_model_bundle" - - def __init__( - self, - *, - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - devices: Optional[Sequence[str]] = None, - ): - if model_description.weights.tensorflow_saved_model_bundle is None: - raise ValueError("missing tensorflow_saved_model_bundle weights") - - super().__init__( - devices=devices, - weights=model_description.weights.tensorflow_saved_model_bundle, - model_description=model_description, - ) - - -class KerasModelAdapter(TensorflowModelAdapterBase): - weight_format = "keras_hdf5" - - def __init__( - self, - *, - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - devices: Optional[Sequence[str]] = None, - ): - if model_description.weights.keras_hdf5 is None: - raise ValueError("missing keras_hdf5 weights") - - super().__init__( - model_description=model_description, - devices=devices, - weights=model_description.weights.keras_hdf5, - ) diff --git a/bioimageio/core/model_adapters/_torchscript_model_adapter.py b/bioimageio/core/model_adapters/_torchscript_model_adapter.py deleted file mode 100644 index 346718a9..00000000 --- a/bioimageio/core/model_adapters/_torchscript_model_adapter.py +++ /dev/null @@ -1,79 +0,0 @@ -import gc -import warnings -from typing import Any, List, Optional, Sequence, Union - -import torch - -from bioimageio.spec._internal.type_guards import is_list, is_ndarray, is_tuple -from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download - -from ..digest_spec import get_axes_infos -from ..tensor import Tensor -from ._model_adapter import ModelAdapter - - -class TorchscriptModelAdapter(ModelAdapter): - def __init__( - self, - *, - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - devices: Optional[Sequence[str]] = None, - ): - super().__init__() - if model_description.weights.torchscript is None: - raise ValueError( - f"No torchscript weights found for model {model_description.name}" - ) - - weight_path = download(model_description.weights.torchscript.source).path - if devices is None: - self.devices = ["cuda" if torch.cuda.is_available() else "cpu"] - else: - self.devices = [torch.device(d) for d in devices] - - if len(self.devices) > 1: - warnings.warn( - "Multiple devices for single torchscript model not yet implemented" - ) - - self._model = torch.jit.load(weight_path) - self._model.to(self.devices[0]) - self._model = self._model.eval() - self._internal_output_axes = [ - tuple(a.id for a in get_axes_infos(out)) - for out in model_description.outputs - ] - - def forward(self, *batch: Optional[Tensor]) -> List[Optional[Tensor]]: - with torch.no_grad(): - torch_tensor = [ - None if b is None else torch.from_numpy(b.data.data).to(self.devices[0]) - for b in batch - ] - _result: Any = self._model.forward(*torch_tensor) - if is_list(_result) or is_tuple(_result): - result: Sequence[Any] = _result - else: - result = [_result] - - result = [ - ( - None - if r is None - else r.cpu().numpy() if isinstance(r, torch.Tensor) else r - ) - for r in result - ] - - assert len(result) == len(self._internal_output_axes) - return [ - None if r is None else Tensor(r, dims=axes) if is_ndarray(r) else r - for r, axes in zip(result, self._internal_output_axes) - ] - - def unload(self) -> None: - self._devices = None - del self._model - _ = gc.collect() # deallocate memory - torch.cuda.empty_cache() # release reserved memory diff --git a/bioimageio/core/weight_converters/__init__.py b/bioimageio/core/weight_converters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bioimageio/core/weight_converters/_add_weights.py b/bioimageio/core/weight_converters/_add_weights.py new file mode 100644 index 00000000..76041550 --- /dev/null +++ b/bioimageio/core/weight_converters/_add_weights.py @@ -0,0 +1,25 @@ +from abc import ABC +from typing import Optional, Sequence, Union, assert_never, final + +from bioimageio.spec.model import v0_4, v0_5 + + +def increase_available_weight_formats( + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + source_format: v0_5.WeightsFormat, + target_format: v0_5.WeightsFormat, + *, + devices: Optional[Sequence[str]] = None, +): + if not isinstance(model_description, (v0_4.ModelDescr, v0_5.ModelDescr)): + raise TypeError( + f"expected v0_4.ModelDescr or v0_5.ModelDescr, but got {type(model_description)}" + ) + + if (source_format, target_format) == ("pytorch_state_dict", "onnx"): + from .pytorch_to_onnx import convert_pytorch_to_onnx + + else: + raise NotImplementedError( + f"Converting from '{source_format}' to '{target_format}' is not yet implemented. Please create an issue at https://github.com/bioimage-io/core-bioimage-io-python/issues/new/choose" + ) diff --git a/bioimageio/core/weight_converters/pytorch_to_onnx.py b/bioimageio/core/weight_converters/pytorch_to_onnx.py new file mode 100644 index 00000000..acb621e2 --- /dev/null +++ b/bioimageio/core/weight_converters/pytorch_to_onnx.py @@ -0,0 +1,124 @@ +import abc +import os +import shutil +from pathlib import Path +from typing import Any, List, Sequence, Union, cast, no_type_check +from zipfile import ZipFile + +import numpy as np +import torch +from numpy.testing import assert_array_almost_equal +from torch.jit import ScriptModule +from typing_extensions import assert_never + +from bioimageio.core.digest_spec import get_member_id, get_test_inputs +from bioimageio.core.model_adapters._pytorch_model_adapter import PytorchModelAdapter +from bioimageio.spec._internal.io_utils import download +from bioimageio.spec._internal.version_type import Version +from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.model.v0_5 import WeightsEntryDescrBase + + +def convert( + model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], + *, + # output_path: Path, + use_tracing: bool = True, + test_decimal: int = 4, + verbose: bool = False, + opset_version: int = 15, +) -> v0_5.OnnxWeightsDescr: + """ + Convert model weights from the PyTorch state_dict format to the ONNX format. + + # TODO: update Args + Args: + model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + The model description object that contains the model and its weights. + output_path (Path): + The file path where the ONNX model will be saved. + use_tracing (bool, optional): + Whether to use tracing or scripting to export the ONNX format. Defaults to True. + test_decimal (int, optional): + The decimal precision for comparing the results between the original and converted models. + This is used in the `assert_array_almost_equal` function to check if the outputs match. + Defaults to 4. + verbose (bool, optional): + If True, will print out detailed information during the ONNX export process. Defaults to False. + opset_version (int, optional): + The ONNX opset version to use for the export. Defaults to 15. + Raises: + ValueError: + If the provided model does not have weights in the PyTorch state_dict format. + ImportError: + If ONNX Runtime is not available for checking the exported ONNX model. + ValueError: + If the results before and after weights conversion do not agree. + Returns: + v0_5.OnnxWeightsDescr: + A descriptor object that contains information about the exported ONNX weights. + """ + + state_dict_weights_descr = model_descr.weights.pytorch_state_dict + if state_dict_weights_descr is None: + raise ValueError( + "The provided model does not have weights in the pytorch state dict format" + ) + + with torch.no_grad(): + sample = get_test_inputs(model_descr) + input_data = [ + sample.members[get_member_id(ipt)].data.data for ipt in model_descr.inputs + ] + input_tensors = [torch.from_numpy(ipt) for ipt in input_data] + model = load_torch_model(state_dict_weights_descr) + + expected_tensors = model(*input_tensors) + if isinstance(expected_tensors, torch.Tensor): + expected_tensors = [expected_tensors] + expected_outputs: List[np.ndarray[Any, Any]] = [ + out.numpy() for out in expected_tensors + ] + + if use_tracing: + torch.onnx.export( + model, + (tuple(input_tensors) if len(input_tensors) > 1 else input_tensors[0]), + str(output_path), + verbose=verbose, + opset_version=opset_version, + ) + else: + raise NotImplementedError + + try: + import onnxruntime as rt # pyright: ignore [reportMissingTypeStubs] + except ImportError: + raise ImportError( + "The onnx weights were exported, but onnx rt is not available and weights cannot be checked." + ) + + # check the onnx model + sess = rt.InferenceSession(str(output_path)) + onnx_input_node_args = cast( + List[Any], sess.get_inputs() + ) # fixme: remove cast, try using rt.NodeArg instead of Any + onnx_inputs = { + input_name.name: inp + for input_name, inp in zip(onnx_input_node_args, input_data) + } + outputs = cast( + Sequence[np.ndarray[Any, Any]], sess.run(None, onnx_inputs) + ) # FIXME: remove cast + + try: + for exp, out in zip(expected_outputs, outputs): + assert_array_almost_equal(exp, out, decimal=test_decimal) + except AssertionError as e: + raise ValueError( + f"Results before and after weights conversion do not agree:\n {str(e)}" + ) + + return v0_5.OnnxWeightsDescr( + source=output_path, parent="pytorch_state_dict", opset_version=opset_version + ) From 7fea808f0570cfec4b74f36efb2835ab18bc3e9c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 5 Dec 2024 11:39:24 +0100 Subject: [PATCH 017/187] add summary_path arg --- bioimageio/core/cli.py | 4 ++++ bioimageio/core/commands.py | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index fad44ab3..b81b1e5e 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -133,6 +133,9 @@ class TestCmd(CmdBase, WithSource): decimal: int = 4 """Precision for numerical comparisons""" + summary_path: Optional[Path] = None + """Path to save validation summary as JSON file.""" + def run(self): sys.exit( test( @@ -140,6 +143,7 @@ def run(self): weight_format=self.weight_format, devices=self.devices, decimal=self.decimal, + summary_path=self.summary_path, ) ) diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index c71d495f..6ad54ab7 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -1,6 +1,7 @@ """These functions implement the logic of the bioimageio command line interface defined in `bioimageio.core.cli`.""" +import json from pathlib import Path from typing import Optional, Sequence, Union @@ -26,6 +27,7 @@ def test( weight_format: WeightFormatArgAll = "all", devices: Optional[Union[str, Sequence[str]]] = None, decimal: int = 4, + summary_path: Optional[Path] = None, ) -> int: """test a bioimageio resource @@ -35,6 +37,7 @@ def test( weight_format: (model only) The weight format to use devices: Device(s) to use for testing decimal: Precision for numerical comparisons + summary_path: Path to save validation summary as JSON file. """ if isinstance(descr, InvalidDescr): descr.validation_summary.display() @@ -47,6 +50,9 @@ def test( decimal=decimal, ) summary.display() + if summary_path is not None: + _ = summary_path.write_text(summary.model_dump_json(indent=4)) + return 0 if summary.status == "passed" else 1 From 4564f7ca92085b81adbce41d94530ab95f9276da Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 5 Dec 2024 21:20:24 +0100 Subject: [PATCH 018/187] update annotation --- bioimageio/core/_resource_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 6ace6d5c..23a9b8f8 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -94,7 +94,7 @@ def enable_determinism(mode: Literal["seed_only", "full"]): def test_model( - source: Union[v0_5.ModelDescr, PermissiveFileSource], + source: Union[v0_4.ModelDescr, v0_5.ModelDescr, PermissiveFileSource], weight_format: Optional[WeightsFormat] = None, devices: Optional[List[str]] = None, absolute_tolerance: float = 1.5e-4, From 00e6ba15236cc2182a2dedd1c5cb783064661239 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 5 Dec 2024 21:20:53 +0100 Subject: [PATCH 019/187] fix tf seeding --- bioimageio/core/_resource_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 23a9b8f8..88323492 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -81,11 +81,11 @@ def enable_determinism(mode: Literal["seed_only", "full"]): try: try: - import tensorflow as tf # pyright: ignore[reportMissingImports] + import tensorflow as tf except ImportError: pass else: - tf.random.seed(0) + tf.random.set_seed(0) if mode == "full": tf.config.experimental.enable_op_determinism() # TODO: find possibility to switch it off again?? From 5d1e2ce1543762d4a0aa3d3bfc3a7512d039f63c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 5 Dec 2024 22:06:12 +0100 Subject: [PATCH 020/187] expose test_description_in_conda_env --- README.md | 5 + bioimageio/core/__init__.py | 2 + bioimageio/core/_dynamic_conda_env.py | 165 ++++++++++++++++++++++++++ 3 files changed, 172 insertions(+) create mode 100644 bioimageio/core/_dynamic_conda_env.py diff --git a/README.md b/README.md index 49e6cbbd..2233ec97 100644 --- a/README.md +++ b/README.md @@ -375,6 +375,11 @@ The model specification and its validation tools can be found at ValidationSummary: + """Run test_model in a dedicated conda env + + Args: + source: Path or URL to model description. + weight_format: Weight format to test. + Default: All weight formats present in **source**. + conda_env: conda environment including bioimageio.core dependency. + Default: Use `bioimageio.spec.get_conda_env` to obtain a model weight + specific conda environment. + devices: Devices to test with, e.g. 'cpu', 'cuda'. + Default (may be weight format dependent): ['cuda'] if available, ['cpu'] otherwise. + absolute_tolerance: Maximum absolute tolerance of reproduced output tensors. + relative_tolerance: Maximum relative tolerance of reproduced output tensors. + determinism: Modes to improve reproducibility of test outputs. + run_command: Function to execute terminal commands. + """ + + try: + run_command(["which", "conda"]) + except Exception as e: + raise RuntimeError("Conda not available") from e + + descr = load_description(source) + if not isinstance(descr, (v0_4.ModelDescr, v0_5.ModelDescr)): + raise NotImplementedError("Not yet implemented for non-model resources") + + if weight_format is None: + all_present_wfs = [ + wf for wf in get_args(WeightsFormat) if getattr(descr.weights, wf) + ] + ignore_wfs = [wf for wf in all_present_wfs if wf in ["tensorflow_js"]] + logger.info( + "Found weight formats {}. Start testing all{}...", + all_present_wfs, + f" (except: {', '.join(ignore_wfs)}) " if ignore_wfs else "", + ) + summary = test_description_in_env( + source, + weight_format=all_present_wfs[0], + devices=devices, + absolute_tolerance=absolute_tolerance, + relative_tolerance=relative_tolerance, + determinism=determinism, + ) + for wf in all_present_wfs[1:]: + additional_summary = test_description_in_env( + source, + weight_format=all_present_wfs[0], + devices=devices, + absolute_tolerance=absolute_tolerance, + relative_tolerance=relative_tolerance, + determinism=determinism, + ) + for d in additional_summary.details: + # TODO: filter reduntant details; group details + summary.add_detail(d) + return summary + + if weight_format == "pytorch_state_dict": + wf = descr.weights.pytorch_state_dict + elif weight_format == "torchscript": + wf = descr.weights.torchscript + elif weight_format == "keras_hdf5": + wf = descr.weights.keras_hdf5 + elif weight_format == "onnx": + wf = descr.weights.onnx + elif weight_format == "tensorflow_saved_model_bundle": + wf = descr.weights.tensorflow_saved_model_bundle + elif weight_format == "tensorflow_js": + raise RuntimeError( + "testing 'tensorflow_js' is not supported by bioimageio.core" + ) + else: + assert_never(weight_format) + + assert wf is not None + if conda_env is None: + conda_env = get_conda_env(entry=wf) + + # remove name as we crate a name based on the env description hash value + conda_env.name = None + + dumped_env = conda_env.model_dump(mode="json", exclude_none=True) + if not is_yaml_value(dumped_env): + raise ValueError(f"Failed to dump conda env to valid YAML {conda_env}") + + env_io = StringIO() + write_yaml(dumped_env, file=env_io) + encoded_env = env_io.getvalue().encode() + env_name = sha256(encoded_env).hexdigest() + + with TemporaryDirectory() as _d: + folder = Path(_d) + try: + run_command(["conda", "activate", env_name]) + except Exception: + path = folder / "env.yaml" + _ = path.write_bytes(encoded_env) + + run_command( + ["conda", "env", "create", "--file", str(path), "--name", env_name] + ) + run_command(["conda", "activate", env_name]) + + summary_path = folder / "summary.json" + run_command( + [ + "conda", + "run", + "-n", + env_name, + "bioimageio", + "test", + str(source), + "--summary-path", + str(summary_path), + ] + ) + return ValidationSummary.model_validate_json(summary_path.read_bytes()) From df36d15e413cebe0be3b1b6919a4788b0a0a5ab2 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 5 Dec 2024 22:06:26 +0100 Subject: [PATCH 021/187] docstring formatting --- bioimageio/core/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index b81b1e5e..ad75a51f 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -113,14 +113,14 @@ def descr_id(self) -> str: class ValidateFormatCmd(CmdBase, WithSource): - """validate the meta data format of a bioimageio resource.""" + """Validate the meta data format of a bioimageio resource.""" def run(self): sys.exit(validate_format(self.descr)) class TestCmd(CmdBase, WithSource): - """Test a bioimageio resource (beyond meta data formatting)""" + """Test a bioimageio resource (beyond meta data formatting).""" weight_format: WeightFormatArgAll = "all" """The weight format to limit testing to. @@ -149,7 +149,7 @@ def run(self): class PackageCmd(CmdBase, WithSource): - """save a resource's metadata with its associated files.""" + """Save a resource's metadata with its associated files.""" path: CliPositionalArg[Path] """The path to write the (zipped) package to. From 376507f24972356258c65a9f2509d68e0fecd567 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 6 Dec 2024 10:54:51 +0100 Subject: [PATCH 022/187] absorb test_description_in_conda_env into test_description --- bioimageio/core/__init__.py | 1 - bioimageio/core/_dynamic_conda_env.py | 165 ------------------- bioimageio/core/_resource_tests.py | 228 ++++++++++++++++++++++++-- bioimageio/core/cli.py | 18 +- bioimageio/core/commands.py | 15 +- 5 files changed, 236 insertions(+), 191 deletions(-) delete mode 100644 bioimageio/core/_dynamic_conda_env.py diff --git a/bioimageio/core/__init__.py b/bioimageio/core/__init__.py index 9e4d83d9..f47f8f63 100644 --- a/bioimageio/core/__init__.py +++ b/bioimageio/core/__init__.py @@ -32,7 +32,6 @@ stat_measures, tensor, ) -from ._dynamic_conda_env import test_description_in_conda_env from ._prediction_pipeline import PredictionPipeline, create_prediction_pipeline from ._resource_tests import ( enable_determinism, diff --git a/bioimageio/core/_dynamic_conda_env.py b/bioimageio/core/_dynamic_conda_env.py deleted file mode 100644 index 26e3ba35..00000000 --- a/bioimageio/core/_dynamic_conda_env.py +++ /dev/null @@ -1,165 +0,0 @@ -import subprocess -from hashlib import sha256 -from io import StringIO -from pathlib import Path -from tempfile import TemporaryDirectory -from typing import ( - Callable, - List, - Literal, - Optional, - Sequence, - assert_never, -) - -from loguru import logger -from typing_extensions import get_args - -from bioimageio.spec import ( - BioimageioCondaEnv, - ValidationSummary, - get_conda_env, - load_description, -) -from bioimageio.spec._internal.io import is_yaml_value -from bioimageio.spec._internal.io_utils import write_yaml -from bioimageio.spec.common import PermissiveFileSource -from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.model.v0_5 import WeightsFormat - - -def default_run_command(args: Sequence[str]): - logger.info("running '{}'...", " ".join(args)) - _ = subprocess.run(args, shell=True, text=True, check=True) - - -def test_description_in_conda_env( - source: PermissiveFileSource, - *, - weight_format: Optional[WeightsFormat] = None, - conda_env: Optional[BioimageioCondaEnv] = None, - devices: Optional[List[str]] = None, - absolute_tolerance: float = 1.5e-4, - relative_tolerance: float = 1e-4, - determinism: Literal["seed_only", "full"] = "seed_only", - run_command: Callable[[Sequence[str]], None] = default_run_command, -) -> ValidationSummary: - """Run test_model in a dedicated conda env - - Args: - source: Path or URL to model description. - weight_format: Weight format to test. - Default: All weight formats present in **source**. - conda_env: conda environment including bioimageio.core dependency. - Default: Use `bioimageio.spec.get_conda_env` to obtain a model weight - specific conda environment. - devices: Devices to test with, e.g. 'cpu', 'cuda'. - Default (may be weight format dependent): ['cuda'] if available, ['cpu'] otherwise. - absolute_tolerance: Maximum absolute tolerance of reproduced output tensors. - relative_tolerance: Maximum relative tolerance of reproduced output tensors. - determinism: Modes to improve reproducibility of test outputs. - run_command: Function to execute terminal commands. - """ - - try: - run_command(["which", "conda"]) - except Exception as e: - raise RuntimeError("Conda not available") from e - - descr = load_description(source) - if not isinstance(descr, (v0_4.ModelDescr, v0_5.ModelDescr)): - raise NotImplementedError("Not yet implemented for non-model resources") - - if weight_format is None: - all_present_wfs = [ - wf for wf in get_args(WeightsFormat) if getattr(descr.weights, wf) - ] - ignore_wfs = [wf for wf in all_present_wfs if wf in ["tensorflow_js"]] - logger.info( - "Found weight formats {}. Start testing all{}...", - all_present_wfs, - f" (except: {', '.join(ignore_wfs)}) " if ignore_wfs else "", - ) - summary = test_description_in_env( - source, - weight_format=all_present_wfs[0], - devices=devices, - absolute_tolerance=absolute_tolerance, - relative_tolerance=relative_tolerance, - determinism=determinism, - ) - for wf in all_present_wfs[1:]: - additional_summary = test_description_in_env( - source, - weight_format=all_present_wfs[0], - devices=devices, - absolute_tolerance=absolute_tolerance, - relative_tolerance=relative_tolerance, - determinism=determinism, - ) - for d in additional_summary.details: - # TODO: filter reduntant details; group details - summary.add_detail(d) - return summary - - if weight_format == "pytorch_state_dict": - wf = descr.weights.pytorch_state_dict - elif weight_format == "torchscript": - wf = descr.weights.torchscript - elif weight_format == "keras_hdf5": - wf = descr.weights.keras_hdf5 - elif weight_format == "onnx": - wf = descr.weights.onnx - elif weight_format == "tensorflow_saved_model_bundle": - wf = descr.weights.tensorflow_saved_model_bundle - elif weight_format == "tensorflow_js": - raise RuntimeError( - "testing 'tensorflow_js' is not supported by bioimageio.core" - ) - else: - assert_never(weight_format) - - assert wf is not None - if conda_env is None: - conda_env = get_conda_env(entry=wf) - - # remove name as we crate a name based on the env description hash value - conda_env.name = None - - dumped_env = conda_env.model_dump(mode="json", exclude_none=True) - if not is_yaml_value(dumped_env): - raise ValueError(f"Failed to dump conda env to valid YAML {conda_env}") - - env_io = StringIO() - write_yaml(dumped_env, file=env_io) - encoded_env = env_io.getvalue().encode() - env_name = sha256(encoded_env).hexdigest() - - with TemporaryDirectory() as _d: - folder = Path(_d) - try: - run_command(["conda", "activate", env_name]) - except Exception: - path = folder / "env.yaml" - _ = path.write_bytes(encoded_env) - - run_command( - ["conda", "env", "create", "--file", str(path), "--name", env_name] - ) - run_command(["conda", "activate", env_name]) - - summary_path = folder / "summary.json" - run_command( - [ - "conda", - "run", - "-n", - env_name, - "bioimageio", - "test", - str(source), - "--summary-path", - str(summary_path), - ] - ) - return ValidationSummary.model_validate_json(summary_path.read_bytes()) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 88323492..bcffbc53 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -1,21 +1,43 @@ +import hashlib +import platform +import subprocess import traceback import warnings +from io import StringIO from itertools import product -from typing import Dict, Hashable, List, Literal, Optional, Sequence, Set, Tuple, Union +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import ( + Callable, + Dict, + Hashable, + List, + Literal, + Optional, + Sequence, + Set, + Tuple, + Union, +) import numpy as np from loguru import logger +from typing_extensions import assert_never, get_args from bioimageio.spec import ( + BioimageioCondaEnv, InvalidDescr, ResourceDescr, build_description, dump_description, + get_conda_env, load_description, + save_bioimageio_package, ) from bioimageio.spec._internal.common_nodes import ResourceDescrBase +from bioimageio.spec._internal.io import is_yaml_value +from bioimageio.spec._internal.io_utils import read_yaml, write_yaml from bioimageio.spec.common import BioimageioYamlContent, PermissiveFileSource -from bioimageio.spec.get_conda_env import get_conda_env from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.model.v0_5 import WeightsFormat from bioimageio.spec.summary import ( @@ -116,6 +138,11 @@ def test_model( ) +def default_run_command(args: Sequence[str]): + logger.info("running '{}'...", " ".join(args)) + _ = subprocess.run(args, shell=True, text=True, check=True) + + def test_description( source: Union[ResourceDescr, PermissiveFileSource, BioimageioYamlContent], *, @@ -127,20 +154,193 @@ def test_description( decimal: Optional[int] = None, determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, + runtime_env: Union[ + Literal["currently-active", "as-described"], Path, BioimageioCondaEnv + ] = ("currently-active"), + run_command: Callable[[Sequence[str]], None] = default_run_command, ) -> ValidationSummary: - """Test a bioimage.io resource dynamically, e.g. prediction of test tensors for models""" - rd = load_description_and_test( - source, - format_version=format_version, - weight_format=weight_format, - devices=devices, - absolute_tolerance=absolute_tolerance, - relative_tolerance=relative_tolerance, - decimal=decimal, - determinism=determinism, - expected_type=expected_type, + """Test a bioimage.io resource dynamically, e.g. prediction of test tensors for models. + + Args: + source: model description source. + weight_format: Weight format to test. + Default: All weight formats present in **source**. + devices: Devices to test with, e.g. 'cpu', 'cuda'. + Default (may be weight format dependent): ['cuda'] if available, ['cpu'] otherwise. + absolute_tolerance: Maximum absolute tolerance of reproduced output tensors. + relative_tolerance: Maximum relative tolerance of reproduced output tensors. + determinism: Modes to improve reproducibility of test outputs. + runtime_env: (Experimental feature!) The Python environment to run the tests in + - `"currently-active"`: Use active Python interpreter. + - `"as-described"`: Use `bioimageio.spec.get_conda_env` to generate a conda + environment YAML file based on the model weights description. + - A `BioimageioCondaEnv` or a path to a conda environment YAML file. + Note: The `bioimageio.core` dependency will be added automatically if not present. + run_command: (Experimental feature!) Function to execute (conda) terminal commands in a subprocess + (ignored if **runtime_env** is `"currently-active"`). + """ + if runtime_env == "currently-active": + rd = load_description_and_test( + source, + format_version=format_version, + weight_format=weight_format, + devices=devices, + absolute_tolerance=absolute_tolerance, + relative_tolerance=relative_tolerance, + decimal=decimal, + determinism=determinism, + expected_type=expected_type, + ) + return rd.validation_summary + + if runtime_env == "as-described": + conda_env = None + elif isinstance(runtime_env, (str, Path)): + conda_env = BioimageioCondaEnv.model_validate(read_yaml(Path(runtime_env))) + elif isinstance(runtime_env, BioimageioCondaEnv): + conda_env = runtime_env + else: + assert_never(runtime_env) + + with TemporaryDirectory(ignore_cleanup_errors=True) as _d: + working_dir = Path(_d) + if isinstance(source, (dict, ResourceDescrBase)): + file_source = save_bioimageio_package( + source, output_path=working_dir / "package.zip" + ) + else: + file_source = source + + return _test_in_env( + file_source, + working_dir=working_dir, + weight_format=weight_format, + conda_env=conda_env, + devices=devices, + absolute_tolerance=absolute_tolerance, + relative_tolerance=relative_tolerance, + determinism=determinism, + run_command=run_command, + ) + + +def _test_in_env( + source: PermissiveFileSource, + *, + working_dir: Path, + weight_format: Optional[WeightsFormat], + conda_env: Optional[BioimageioCondaEnv], + devices: Optional[Sequence[str]], + absolute_tolerance: float, + relative_tolerance: float, + determinism: Literal["seed_only", "full"], + run_command: Callable[[Sequence[str]], None], +) -> ValidationSummary: + descr = load_description(source) + + if not isinstance(descr, (v0_4.ModelDescr, v0_5.ModelDescr)): + raise NotImplementedError("Not yet implemented for non-model resources") + + if weight_format is None: + all_present_wfs = [ + wf for wf in get_args(WeightsFormat) if getattr(descr.weights, wf) + ] + ignore_wfs = [wf for wf in all_present_wfs if wf in ["tensorflow_js"]] + logger.info( + "Found weight formats {}. Start testing all{}...", + all_present_wfs, + f" (except: {', '.join(ignore_wfs)}) " if ignore_wfs else "", + ) + summary = _test_in_env( + source, + working_dir=working_dir / all_present_wfs[0], + weight_format=all_present_wfs[0], + devices=devices, + absolute_tolerance=absolute_tolerance, + relative_tolerance=relative_tolerance, + determinism=determinism, + conda_env=conda_env, + run_command=run_command, + ) + for wf in all_present_wfs[1:]: + additional_summary = _test_in_env( + source, + working_dir=working_dir / wf, + weight_format=wf, + devices=devices, + absolute_tolerance=absolute_tolerance, + relative_tolerance=relative_tolerance, + determinism=determinism, + conda_env=conda_env, + run_command=run_command, + ) + for d in additional_summary.details: + # TODO: filter reduntant details; group details + summary.add_detail(d) + return summary + + if weight_format == "pytorch_state_dict": + wf = descr.weights.pytorch_state_dict + elif weight_format == "torchscript": + wf = descr.weights.torchscript + elif weight_format == "keras_hdf5": + wf = descr.weights.keras_hdf5 + elif weight_format == "onnx": + wf = descr.weights.onnx + elif weight_format == "tensorflow_saved_model_bundle": + wf = descr.weights.tensorflow_saved_model_bundle + elif weight_format == "tensorflow_js": + raise RuntimeError( + "testing 'tensorflow_js' is not supported by bioimageio.core" + ) + else: + assert_never(weight_format) + + assert wf is not None + if conda_env is None: + conda_env = get_conda_env(entry=wf) + + # remove name as we crate a name based on the env description hash value + conda_env.name = None + + dumped_env = conda_env.model_dump(mode="json", exclude_none=True) + if not is_yaml_value(dumped_env): + raise ValueError(f"Failed to dump conda env to valid YAML {conda_env}") + + env_io = StringIO() + write_yaml(dumped_env, file=env_io) + encoded_env = env_io.getvalue().encode() + env_name = hashlib.sha256(encoded_env).hexdigest() + + try: + run_command(["where" if platform.system() == "Windows" else "which", "conda"]) + except Exception as e: + raise RuntimeError("Conda not available") from e + + working_dir.mkdir(parents=True, exist_ok=True) + try: + run_command(["conda", "activate", env_name]) + except Exception: + path = working_dir / "env.yaml" + _ = path.write_bytes(encoded_env) + logger.debug("written conda env to {}", path) + run_command(["conda", "env", "create", f"--file={path}", f"--name={env_name}"]) + run_command(["conda", "activate", env_name]) + + summary_path = working_dir / "summary.json" + run_command( + [ + "conda", + "run", + "-n", + env_name, + "bioimageio", + "test", + str(source), + f"--summary-path={summary_path}", + ] ) - return rd.validation_summary + return ValidationSummary.model_validate_json(summary_path.read_bytes()) def load_description_and_test( diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index ad75a51f..1fc95310 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -18,6 +18,7 @@ Dict, Iterable, List, + Literal, Mapping, Optional, Sequence, @@ -133,7 +134,19 @@ class TestCmd(CmdBase, WithSource): decimal: int = 4 """Precision for numerical comparisons""" - summary_path: Optional[Path] = None + runtime_env: Union[Literal["currently-active", "as-described"], Path] = Field( + "currently-active", alias="runtime-env" + ) + """The python environment to run the tests in + + - `"currently-active"`: use active Python interpreter + - `"as-described"`: generate a conda environment YAML file based on the model + weights description. + - A path to a conda environment YAML. + Note: The `bioimageio.core` dependency will be added automatically if not present. + """ + + summary_path: Optional[Path] = Field(None, alias="summary-path") """Path to save validation summary as JSON file.""" def run(self): @@ -144,6 +157,7 @@ def run(self): devices=self.devices, decimal=self.decimal, summary_path=self.summary_path, + runtime_env=self.runtime_env, ) ) @@ -555,10 +569,10 @@ def input_dataset(stat: Stat): class Bioimageio( BaseSettings, + cli_implicit_flags=True, cli_parse_args=True, cli_prog_name="bioimageio", cli_use_class_docs_for_groups=True, - cli_implicit_flags=True, use_attribute_docstrings=True, ): """bioimageio - CLI for bioimage.io resources 🦒""" diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 6ad54ab7..9804a93e 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -1,7 +1,6 @@ """These functions implement the logic of the bioimageio command line interface defined in `bioimageio.core.cli`.""" -import json from pathlib import Path from typing import Optional, Sequence, Union @@ -28,16 +27,13 @@ def test( devices: Optional[Union[str, Sequence[str]]] = None, decimal: int = 4, summary_path: Optional[Path] = None, + runtime_env: Union[ + Literal["currently-active", "as-described"], Path + ] = "currently-active", ) -> int: - """test a bioimageio resource + """Test a bioimageio resource. - Args: - source: Path or URL to the bioimageio resource description file - (bioimageio.yaml or rdf.yaml) or to a zipped resource - weight_format: (model only) The weight format to use - devices: Device(s) to use for testing - decimal: Precision for numerical comparisons - summary_path: Path to save validation summary as JSON file. + Arguments as described in `bioimageio.core.cli.TestCmd` """ if isinstance(descr, InvalidDescr): descr.validation_summary.display() @@ -48,6 +44,7 @@ def test( weight_format=None if weight_format == "all" else weight_format, devices=[devices] if isinstance(devices, str) else devices, decimal=decimal, + runtime_env=runtime_env, ) summary.display() if summary_path is not None: From 96905740bf1fdacd4bcf4459d4aa9c067a76ad97 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 6 Dec 2024 15:56:45 +0100 Subject: [PATCH 023/187] all model adapters in backends --- bioimageio/core/__init__.py | 3 +- bioimageio/core/_create_model_adapter.py | 127 -------- bioimageio/core/_model_adapter.py | 93 ------ bioimageio/core/_resource_tests.py | 3 +- bioimageio/core/backend/__init__.py | 0 bioimageio/core/backends/__init__.py | 3 + .../_model_adapter.py | 36 +-- .../keras.py => backends/keras_backend.py} | 41 ++- bioimageio/core/backends/onnx_backend.py | 60 ++++ .../pytorch_backend.py} | 0 .../core/backends/tensorflow_backend.py | 289 ++++++++++++++++++ .../core/backends/torchscript_backend.py | 79 +++++ bioimageio/core/model_adapters.py | 18 +- .../model_adapters/_pytorch_model_adapter.py | 0 14 files changed, 482 insertions(+), 270 deletions(-) delete mode 100644 bioimageio/core/_create_model_adapter.py delete mode 100644 bioimageio/core/_model_adapter.py delete mode 100644 bioimageio/core/backend/__init__.py create mode 100644 bioimageio/core/backends/__init__.py rename bioimageio/core/{model_adapters => backends}/_model_adapter.py (84%) rename bioimageio/core/{backend/keras.py => backends/keras_backend.py} (74%) create mode 100644 bioimageio/core/backends/onnx_backend.py rename bioimageio/core/{backend/pytorch.py => backends/pytorch_backend.py} (100%) create mode 100644 bioimageio/core/backends/tensorflow_backend.py create mode 100644 bioimageio/core/backends/torchscript_backend.py delete mode 100644 bioimageio/core/model_adapters/_pytorch_model_adapter.py diff --git a/bioimageio/core/__init__.py b/bioimageio/core/__init__.py index f47f8f63..a8dd1043 100644 --- a/bioimageio/core/__init__.py +++ b/bioimageio/core/__init__.py @@ -41,6 +41,7 @@ ) from ._settings import settings from .axis import Axis, AxisId +from .backends import create_model_adapter from .block_meta import BlockMeta from .common import MemberId from .prediction import predict, predict_many @@ -73,6 +74,7 @@ "commands", "common", "compute_dataset_measures", + "create_model_adapter", "create_prediction_pipeline", "digest_spec", "dump_description", @@ -104,7 +106,6 @@ "Stat", "tensor", "Tensor", - "test_description_in_conda_env", "test_description", "test_model", "test_resource", diff --git a/bioimageio/core/_create_model_adapter.py b/bioimageio/core/_create_model_adapter.py deleted file mode 100644 index ee79f260..00000000 --- a/bioimageio/core/_create_model_adapter.py +++ /dev/null @@ -1,127 +0,0 @@ -import warnings -from abc import abstractmethod -from typing import List, Optional, Sequence, Tuple, Union, final - -from bioimageio.spec.model import v0_4, v0_5 - -from ._model_adapter import ( - DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER, - ModelAdapter, - WeightsFormat, -) -from .tensor import Tensor - - -def create_model_adapter( - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - *, - devices: Optional[Sequence[str]] = None, - weight_format_priority_order: Optional[Sequence[WeightsFormat]] = None, -): - """ - Creates model adapter based on the passed spec - Note: All specific adapters should happen inside this function to prevent different framework - initializations interfering with each other - """ - if not isinstance(model_description, (v0_4.ModelDescr, v0_5.ModelDescr)): - raise TypeError( - f"expected v0_4.ModelDescr or v0_5.ModelDescr, but got {type(model_description)}" - ) - - weights = model_description.weights - errors: List[Tuple[WeightsFormat, Exception]] = [] - weight_format_priority_order = ( - DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER - if weight_format_priority_order is None - else weight_format_priority_order - ) - # limit weight formats to the ones present - weight_format_priority_order = [ - w for w in weight_format_priority_order if getattr(weights, w) is not None - ] - - for wf in weight_format_priority_order: - if wf == "pytorch_state_dict" and weights.pytorch_state_dict is not None: - try: - from .model_adapters_old._pytorch_model_adapter import ( - PytorchModelAdapter, - ) - - return PytorchModelAdapter( - outputs=model_description.outputs, - weights=weights.pytorch_state_dict, - devices=devices, - ) - except Exception as e: - errors.append((wf, e)) - elif ( - wf == "tensorflow_saved_model_bundle" - and weights.tensorflow_saved_model_bundle is not None - ): - try: - from .model_adapters_old._tensorflow_model_adapter import ( - TensorflowModelAdapter, - ) - - return TensorflowModelAdapter( - model_description=model_description, devices=devices - ) - except Exception as e: - errors.append((wf, e)) - elif wf == "onnx" and weights.onnx is not None: - try: - from .model_adapters_old._onnx_model_adapter import ONNXModelAdapter - - return ONNXModelAdapter( - model_description=model_description, devices=devices - ) - except Exception as e: - errors.append((wf, e)) - elif wf == "torchscript" and weights.torchscript is not None: - try: - from .model_adapters_old._torchscript_model_adapter import ( - TorchscriptModelAdapter, - ) - - return TorchscriptModelAdapter( - model_description=model_description, devices=devices - ) - except Exception as e: - errors.append((wf, e)) - elif wf == "keras_hdf5" and weights.keras_hdf5 is not None: - # keras can either be installed as a separate package or used as part of tensorflow - # we try to first import the keras model adapter using the separate package and, - # if it is not available, try to load the one using tf - try: - from .backend.keras import ( - KerasModelAdapter, - keras, # type: ignore - ) - - if keras is None: - from .model_adapters_old._tensorflow_model_adapter import ( - KerasModelAdapter, - ) - - return KerasModelAdapter( - model_description=model_description, devices=devices - ) - except Exception as e: - errors.append((wf, e)) - - assert errors - if len(weight_format_priority_order) == 1: - assert len(errors) == 1 - raise ValueError( - f"The '{weight_format_priority_order[0]}' model adapter could not be created" - + f" in this environment:\n{errors[0][1].__class__.__name__}({errors[0][1]}).\n\n" - ) from errors[0][1] - - else: - error_list = "\n - ".join( - f"{wf}: {e.__class__.__name__}({e})" for wf, e in errors - ) - raise ValueError( - "None of the weight format specific model adapters could be created" - + f" in this environment. Errors are:\n\n{error_list}.\n\n" - ) diff --git a/bioimageio/core/_model_adapter.py b/bioimageio/core/_model_adapter.py deleted file mode 100644 index 0438d35e..00000000 --- a/bioimageio/core/_model_adapter.py +++ /dev/null @@ -1,93 +0,0 @@ -import warnings -from abc import ABC, abstractmethod -from typing import List, Optional, Sequence, Tuple, Union, final - -from bioimageio.spec.model import v0_4, v0_5 - -from .tensor import Tensor - -WeightsFormat = Union[v0_4.WeightsFormat, v0_5.WeightsFormat] - -__all__ = [ - "ModelAdapter", - "create_model_adapter", - "get_weight_formats", -] - -# Known weight formats in order of priority -# First match wins -DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER: Tuple[WeightsFormat, ...] = ( - "pytorch_state_dict", - "tensorflow_saved_model_bundle", - "torchscript", - "onnx", - "keras_hdf5", -) - - -class ModelAdapter(ABC): - """ - Represents model *without* any preprocessing or postprocessing. - - ``` - from bioimageio.core import load_description - - model = load_description(...) - - # option 1: - adapter = ModelAdapter.create(model) - adapter.forward(...) - adapter.unload() - - # option 2: - with ModelAdapter.create(model) as adapter: - adapter.forward(...) - ``` - """ - - @final - @classmethod - def create( - cls, - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - *, - devices: Optional[Sequence[str]] = None, - weight_format_priority_order: Optional[Sequence[WeightsFormat]] = None, - ): - """ - Creates model adapter based on the passed spec - Note: All specific adapters should happen inside this function to prevent different framework - initializations interfering with each other - """ - from ._create_model_adapter import create_model_adapter - - return create_model_adapter( - model_description, - devices=devices, - weight_format_priority_order=weight_format_priority_order, - ) - - @final - def load(self, *, devices: Optional[Sequence[str]] = None) -> None: - warnings.warn("Deprecated. ModelAdapter is loaded on initialization") - - @abstractmethod - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - """ - Run forward pass of model to get model predictions - """ - # TODO: handle tensor.transpose in here and make _forward_impl the abstract impl - - @abstractmethod - def unload(self): - """ - Unload model from any devices, freeing their memory. - The moder adapter should be considered unusable afterwards. - """ - - -def get_weight_formats() -> List[str]: - """ - Return list of supported weight types - """ - return list(DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 8f24d363..e6675b73 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -37,6 +37,7 @@ from bioimageio.spec._internal.common_nodes import ResourceDescrBase from bioimageio.spec._internal.io import is_yaml_value from bioimageio.spec._internal.io_utils import read_yaml, write_yaml +from bioimageio.spec.common import BioimageioYamlContent, PermissiveFileSource, Sha256 from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.model.v0_5 import WeightsFormat from bioimageio.spec.summary import ( @@ -192,7 +193,7 @@ def test_description( decimal=decimal, determinism=determinism, expected_type=expected_type, - sha256=sha256, + sha256=sha256, ) return rd.validation_summary diff --git a/bioimageio/core/backend/__init__.py b/bioimageio/core/backend/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/bioimageio/core/backends/__init__.py b/bioimageio/core/backends/__init__.py new file mode 100644 index 00000000..c39b58b5 --- /dev/null +++ b/bioimageio/core/backends/__init__.py @@ -0,0 +1,3 @@ +from ._model_adapter import create_model_adapter + +__all__ = ["create_model_adapter"] diff --git a/bioimageio/core/model_adapters/_model_adapter.py b/bioimageio/core/backends/_model_adapter.py similarity index 84% rename from bioimageio/core/model_adapters/_model_adapter.py rename to bioimageio/core/backends/_model_adapter.py index 3921f81b..66153f09 100644 --- a/bioimageio/core/model_adapters/_model_adapter.py +++ b/bioimageio/core/backends/_model_adapter.py @@ -73,7 +73,7 @@ def create( for wf in weight_format_priority_order: if wf == "pytorch_state_dict" and weights.pytorch_state_dict is not None: try: - from ._pytorch_model_adapter import PytorchModelAdapter + from .pytorch_backend import PytorchModelAdapter return PytorchModelAdapter( outputs=model_description.outputs, @@ -87,7 +87,7 @@ def create( and weights.tensorflow_saved_model_bundle is not None ): try: - from ._tensorflow_model_adapter import TensorflowModelAdapter + from .tensorflow_backend import TensorflowModelAdapter return TensorflowModelAdapter( model_description=model_description, devices=devices @@ -96,7 +96,7 @@ def create( errors.append((wf, e)) elif wf == "onnx" and weights.onnx is not None: try: - from ._onnx_model_adapter import ONNXModelAdapter + from .onnx_backend import ONNXModelAdapter return ONNXModelAdapter( model_description=model_description, devices=devices @@ -105,7 +105,7 @@ def create( errors.append((wf, e)) elif wf == "torchscript" and weights.torchscript is not None: try: - from ._torchscript_model_adapter import TorchscriptModelAdapter + from .torchscript_backend import TorchscriptModelAdapter return TorchscriptModelAdapter( model_description=model_description, devices=devices @@ -117,13 +117,10 @@ def create( # we try to first import the keras model adapter using the separate package and, # if it is not available, try to load the one using tf try: - from ._keras import ( - KerasModelAdapter, - keras, # type: ignore - ) - - if keras is None: - from ._tensorflow_model_adapter import KerasModelAdapter + try: + from .keras_backend import KerasModelAdapter + except Exception: + from .tensorflow_backend import KerasModelAdapter return KerasModelAdapter( model_description=model_description, devices=devices @@ -134,10 +131,11 @@ def create( assert errors if len(weight_format_priority_order) == 1: assert len(errors) == 1 + wf, e = errors[0] raise ValueError( - f"The '{weight_format_priority_order[0]}' model adapter could not be created" - + f" in this environment:\n{errors[0][1].__class__.__name__}({errors[0][1]}).\n\n" - ) from errors[0][1] + f"The '{wf}' model adapter could not be created" + + f" in this environment:\n{e.__class__.__name__}({e}).\n\n" + ) from e else: error_list = "\n - ".join( @@ -165,13 +163,3 @@ def unload(self): Unload model from any devices, freeing their memory. The moder adapter should be considered unusable afterwards. """ - - -def get_weight_formats() -> List[str]: - """ - Return list of supported weight types - """ - return list(DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER) - - -create_model_adapter = ModelAdapter.create diff --git a/bioimageio/core/backend/keras.py b/bioimageio/core/backends/keras_backend.py similarity index 74% rename from bioimageio/core/backend/keras.py rename to bioimageio/core/backends/keras_backend.py index 1d273cfc..35ee79fe 100644 --- a/bioimageio/core/backend/keras.py +++ b/bioimageio/core/backends/keras_backend.py @@ -10,30 +10,22 @@ from .._settings import settings from ..digest_spec import get_axes_infos -from ..model_adapters import ModelAdapter from ..tensor import Tensor +from ._model_adapter import ModelAdapter os.environ["KERAS_BACKEND"] = settings.keras_backend # by default, we use the keras integrated with tensorflow +# TODO: check if we should prefer keras try: - import tensorflow as tf # pyright: ignore[reportMissingImports] - from tensorflow import ( # pyright: ignore[reportMissingImports] - keras, # pyright: ignore[reportUnknownVariableType] + import tensorflow as tf + from tensorflow import ( + keras, # pyright: ignore[reportUnknownVariableType,reportAttributeAccessIssue] ) - tf_version = Version(tf.__version__) # pyright: ignore[reportUnknownArgumentType] + tf_version = Version(tf.__version__) except Exception: - try: - import keras # pyright: ignore[reportMissingImports] - except Exception as e: - keras = None - keras_error = str(e) - else: - keras_error = None - tf_version = None -else: - keras_error = None + import keras class KerasModelAdapter(ModelAdapter): @@ -43,9 +35,6 @@ def __init__( model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], devices: Optional[Sequence[str]] = None, ) -> None: - if keras is None: - raise ImportError(f"failed to import keras: {keras_error}") - super().__init__() if model_description.weights.keras_hdf5 is None: raise ValueError("model has not keras_hdf5 weights specified") @@ -86,18 +75,26 @@ def __init__( def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: _result: Union[Sequence[NDArray[Any]], NDArray[Any]] - _result = self._network.predict( # pyright: ignore[reportUnknownVariableType] + _result = self._network.predict( # type: ignore *[None if t is None else t.data.data for t in input_tensors] ) if isinstance(_result, (tuple, list)): - result: Sequence[NDArray[Any]] = _result + result = _result # pyright: ignore[reportUnknownVariableType] else: result = [_result] # type: ignore - assert len(result) == len(self._output_axes) + assert len(result) == len( # pyright: ignore[reportUnknownArgumentType] + self._output_axes + ) ret: List[Optional[Tensor]] = [] ret.extend( - [Tensor(r, dims=axes) for r, axes, in zip(result, self._output_axes)] + [ + Tensor(r, dims=axes) # pyright: ignore[reportArgumentType] + for r, axes, in zip( # pyright: ignore[reportUnknownVariableType] + result, # pyright: ignore[reportUnknownArgumentType] + self._output_axes, + ) + ] ) return ret diff --git a/bioimageio/core/backends/onnx_backend.py b/bioimageio/core/backends/onnx_backend.py new file mode 100644 index 00000000..21bbcc09 --- /dev/null +++ b/bioimageio/core/backends/onnx_backend.py @@ -0,0 +1,60 @@ +import warnings +from typing import Any, List, Optional, Sequence, Union + +import onnxruntime as rt + +from bioimageio.spec._internal.type_guards import is_list, is_tuple +from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.utils import download + +from ..digest_spec import get_axes_infos +from ..model_adapters import ModelAdapter +from ..tensor import Tensor + + +class ONNXModelAdapter(ModelAdapter): + def __init__( + self, + *, + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + devices: Optional[Sequence[str]] = None, + ): + super().__init__() + self._internal_output_axes = [ + tuple(a.id for a in get_axes_infos(out)) + for out in model_description.outputs + ] + if model_description.weights.onnx is None: + raise ValueError("No ONNX weights specified for {model_description.name}") + + self._session = rt.InferenceSession( + str(download(model_description.weights.onnx.source).path) + ) + onnx_inputs = self._session.get_inputs() # type: ignore + self._input_names: List[str] = [ipt.name for ipt in onnx_inputs] # type: ignore + + if devices is not None: + warnings.warn( + f"Device management is not implemented for onnx yet, ignoring the devices {devices}" + ) + + def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: + assert len(input_tensors) == len(self._input_names) + input_arrays = [None if ipt is None else ipt.data.data for ipt in input_tensors] + result: Any = self._session.run( + None, dict(zip(self._input_names, input_arrays)) + ) + if is_list(result) or is_tuple(result): + result_seq = result + else: + result_seq = [result] + + return [ + None if r is None else Tensor(r, dims=axes) + for r, axes in zip(result_seq, self._internal_output_axes) + ] + + def unload(self) -> None: + warnings.warn( + "Device management is not implemented for onnx yet, cannot unload model" + ) diff --git a/bioimageio/core/backend/pytorch.py b/bioimageio/core/backends/pytorch_backend.py similarity index 100% rename from bioimageio/core/backend/pytorch.py rename to bioimageio/core/backends/pytorch_backend.py diff --git a/bioimageio/core/backends/tensorflow_backend.py b/bioimageio/core/backends/tensorflow_backend.py new file mode 100644 index 00000000..3f9cee9d --- /dev/null +++ b/bioimageio/core/backends/tensorflow_backend.py @@ -0,0 +1,289 @@ +import zipfile +from io import TextIOWrapper +from pathlib import Path +from shutil import copyfileobj +from typing import List, Literal, Optional, Sequence, Union + +import numpy as np +import tensorflow as tf +from loguru import logger + +from bioimageio.spec.common import FileSource, ZipPath +from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.utils import download + +from ..digest_spec import get_axes_infos +from ..tensor import Tensor +from ._model_adapter import ModelAdapter + + +class TensorflowModelAdapterBase(ModelAdapter): + weight_format: Literal["keras_hdf5", "tensorflow_saved_model_bundle"] + + def __init__( + self, + *, + devices: Optional[Sequence[str]] = None, + weights: Union[ + v0_4.KerasHdf5WeightsDescr, + v0_4.TensorflowSavedModelBundleWeightsDescr, + v0_5.KerasHdf5WeightsDescr, + v0_5.TensorflowSavedModelBundleWeightsDescr, + ], + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + ): + super().__init__() + self.model_description = model_description + tf_version = v0_5.Version(tf.__version__) + model_tf_version = weights.tensorflow_version + if model_tf_version is None: + logger.warning( + "The model does not specify the tensorflow version." + + f"Cannot check if it is compatible with intalled tensorflow {tf_version}." + ) + elif model_tf_version > tf_version: + logger.warning( + f"The model specifies a newer tensorflow version than installed: {model_tf_version} > {tf_version}." + ) + elif (model_tf_version.major, model_tf_version.minor) != ( + tf_version.major, + tf_version.minor, + ): + logger.warning( + "The tensorflow version specified by the model does not match the installed: " + + f"{model_tf_version} != {tf_version}." + ) + + self.use_keras_api = ( + tf_version.major > 1 + or self.weight_format == KerasModelAdapter.weight_format + ) + + # TODO tf device management + if devices is not None: + logger.warning( + f"Device management is not implemented for tensorflow yet, ignoring the devices {devices}" + ) + + weight_file = self.require_unzipped(weights.source) + self._network = self._get_network(weight_file) + self._internal_output_axes = [ + tuple(a.id for a in get_axes_infos(out)) + for out in model_description.outputs + ] + + # TODO: check how to load tf weights without unzipping + def require_unzipped(self, weight_file: FileSource): + local_weights_file = download(weight_file).path + if isinstance(local_weights_file, ZipPath): + # weights file is in a bioimageio zip package + out_path = ( + Path("bioimageio_unzipped_tf_weights") / local_weights_file.filename + ) + with local_weights_file.open("rb") as src, out_path.open("wb") as dst: + assert not isinstance(src, TextIOWrapper) + copyfileobj(src, dst) + + local_weights_file = out_path + + if zipfile.is_zipfile(local_weights_file): + # weights file itself is a zipfile + out_path = local_weights_file.with_suffix(".unzipped") + with zipfile.ZipFile(local_weights_file, "r") as f: + f.extractall(out_path) + + return out_path + else: + return local_weights_file + + def _get_network( # pyright: ignore[reportUnknownParameterType] + self, weight_file: FileSource + ): + weight_file = self.require_unzipped(weight_file) + assert tf is not None + if self.use_keras_api: + try: + return tf.keras.layers.TFSMLayer( # pyright: ignore[reportAttributeAccessIssue,reportUnknownVariableType] + weight_file, + call_endpoint="serve", + ) + except Exception as e: + try: + return tf.keras.layers.TFSMLayer( # pyright: ignore[reportAttributeAccessIssue,reportUnknownVariableType] + weight_file, call_endpoint="serving_default" + ) + except Exception as ee: + logger.opt(exception=ee).info( + "keras.layers.TFSMLayer error for alternative call_endpoint='serving_default'" + ) + raise e + else: + # NOTE in tf1 the model needs to be loaded inside of the session, so we cannot preload the model + return str(weight_file) + + # TODO currently we relaod the model every time. it would be better to keep the graph and session + # alive in between of forward passes (but then the sessions need to be properly opened / closed) + def _forward_tf( # pyright: ignore[reportUnknownParameterType] + self, *input_tensors: Optional[Tensor] + ): + assert tf is not None + input_keys = [ + ipt.name if isinstance(ipt, v0_4.InputTensorDescr) else ipt.id + for ipt in self.model_description.inputs + ] + output_keys = [ + out.name if isinstance(out, v0_4.OutputTensorDescr) else out.id + for out in self.model_description.outputs + ] + # TODO read from spec + tag = ( # pyright: ignore[reportUnknownVariableType] + tf.saved_model.tag_constants.SERVING # pyright: ignore[reportAttributeAccessIssue] + ) + signature_key = ( # pyright: ignore[reportUnknownVariableType] + tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY # pyright: ignore[reportAttributeAccessIssue] + ) + + graph = tf.Graph() + with graph.as_default(): + with tf.Session( # pyright: ignore[reportAttributeAccessIssue] + graph=graph + ) as sess: # pyright: ignore[reportUnknownVariableType] + # load the model and the signature + graph_def = tf.saved_model.loader.load( # pyright: ignore[reportUnknownVariableType,reportAttributeAccessIssue] + sess, [tag], self._network + ) + signature = ( # pyright: ignore[reportUnknownVariableType] + graph_def.signature_def + ) + + # get the tensors into the graph + in_names = [ # pyright: ignore[reportUnknownVariableType] + signature[signature_key].inputs[key].name for key in input_keys + ] + out_names = [ # pyright: ignore[reportUnknownVariableType] + signature[signature_key].outputs[key].name for key in output_keys + ] + in_tensors = [ + graph.get_tensor_by_name( + name # pyright: ignore[reportUnknownArgumentType] + ) + for name in in_names # pyright: ignore[reportUnknownVariableType] + ] + out_tensors = [ + graph.get_tensor_by_name( + name # pyright: ignore[reportUnknownArgumentType] + ) + for name in out_names # pyright: ignore[reportUnknownVariableType] + ] + + # run prediction + res = sess.run( # pyright: ignore[reportUnknownVariableType] + dict( + zip( + out_names, # pyright: ignore[reportUnknownArgumentType] + out_tensors, + ) + ), + dict( + zip( + in_tensors, + [None if t is None else t.data for t in input_tensors], + ) + ), + ) + # from dict to list of tensors + res = [ # pyright: ignore[reportUnknownVariableType] + res[out] + for out in out_names # pyright: ignore[reportUnknownVariableType] + ] + + return res # pyright: ignore[reportUnknownVariableType] + + def _forward_keras( # pyright: ignore[reportUnknownParameterType] + self, *input_tensors: Optional[Tensor] + ): + assert self.use_keras_api + assert not isinstance(self._network, str) + assert tf is not None + tf_tensor = [ + None if ipt is None else tf.convert_to_tensor(ipt) for ipt in input_tensors + ] + + result = self._network(*tf_tensor) # pyright: ignore[reportUnknownVariableType] + + assert isinstance(result, dict) + + # TODO: Use RDF's `outputs[i].id` here + result = list( # pyright: ignore[reportUnknownVariableType] + result.values() # pyright: ignore[reportUnknownArgumentType] + ) + + return [ # pyright: ignore[reportUnknownVariableType] + (None if r is None else r if isinstance(r, np.ndarray) else r.numpy()) + for r in result # pyright: ignore[reportUnknownVariableType] + ] + + def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: + if self.use_keras_api: + result = self._forward_keras( # pyright: ignore[reportUnknownVariableType] + *input_tensors + ) + else: + result = self._forward_tf( # pyright: ignore[reportUnknownVariableType] + *input_tensors + ) + + return [ + ( + None + if r is None + else Tensor(r, dims=axes) # pyright: ignore[reportUnknownArgumentType] + ) + for r, axes in zip( # pyright: ignore[reportUnknownVariableType] + result, # pyright: ignore[reportUnknownArgumentType] + self._internal_output_axes, + ) + ] + + def unload(self) -> None: + logger.warning( + "Device management is not implemented for keras yet, cannot unload model" + ) + + +class TensorflowModelAdapter(TensorflowModelAdapterBase): + weight_format = "tensorflow_saved_model_bundle" + + def __init__( + self, + *, + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + devices: Optional[Sequence[str]] = None, + ): + if model_description.weights.tensorflow_saved_model_bundle is None: + raise ValueError("missing tensorflow_saved_model_bundle weights") + + super().__init__( + devices=devices, + weights=model_description.weights.tensorflow_saved_model_bundle, + model_description=model_description, + ) + + +class KerasModelAdapter(TensorflowModelAdapterBase): + weight_format = "keras_hdf5" + + def __init__( + self, + *, + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + devices: Optional[Sequence[str]] = None, + ): + if model_description.weights.keras_hdf5 is None: + raise ValueError("missing keras_hdf5 weights") + + super().__init__( + model_description=model_description, + devices=devices, + weights=model_description.weights.keras_hdf5, + ) diff --git a/bioimageio/core/backends/torchscript_backend.py b/bioimageio/core/backends/torchscript_backend.py new file mode 100644 index 00000000..d1882180 --- /dev/null +++ b/bioimageio/core/backends/torchscript_backend.py @@ -0,0 +1,79 @@ +import gc +import warnings +from typing import Any, List, Optional, Sequence, Union + +import torch + +from bioimageio.spec._internal.type_guards import is_list, is_ndarray, is_tuple +from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.utils import download + +from ..digest_spec import get_axes_infos +from ..model_adapters import ModelAdapter +from ..tensor import Tensor + + +class TorchscriptModelAdapter(ModelAdapter): + def __init__( + self, + *, + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + devices: Optional[Sequence[str]] = None, + ): + super().__init__() + if model_description.weights.torchscript is None: + raise ValueError( + f"No torchscript weights found for model {model_description.name}" + ) + + weight_path = download(model_description.weights.torchscript.source).path + if devices is None: + self.devices = ["cuda" if torch.cuda.is_available() else "cpu"] + else: + self.devices = [torch.device(d) for d in devices] + + if len(self.devices) > 1: + warnings.warn( + "Multiple devices for single torchscript model not yet implemented" + ) + + self._model = torch.jit.load(weight_path) + self._model.to(self.devices[0]) + self._model = self._model.eval() + self._internal_output_axes = [ + tuple(a.id for a in get_axes_infos(out)) + for out in model_description.outputs + ] + + def forward(self, *batch: Optional[Tensor]) -> List[Optional[Tensor]]: + with torch.no_grad(): + torch_tensor = [ + None if b is None else torch.from_numpy(b.data.data).to(self.devices[0]) + for b in batch + ] + _result: Any = self._model.forward(*torch_tensor) + if is_list(_result) or is_tuple(_result): + result: Sequence[Any] = _result + else: + result = [_result] + + result = [ + ( + None + if r is None + else r.cpu().numpy() if isinstance(r, torch.Tensor) else r + ) + for r in result + ] + + assert len(result) == len(self._internal_output_axes) + return [ + None if r is None else Tensor(r, dims=axes) if is_ndarray(r) else r + for r, axes in zip(result, self._internal_output_axes) + ] + + def unload(self) -> None: + self._devices = None + del self._model + _ = gc.collect() # deallocate memory + torch.cuda.empty_cache() # release reserved memory diff --git a/bioimageio/core/model_adapters.py b/bioimageio/core/model_adapters.py index 86fcfe4b..db92d013 100644 --- a/bioimageio/core/model_adapters.py +++ b/bioimageio/core/model_adapters.py @@ -1,8 +1,22 @@ -from ._create_model_adapter import create_model_adapter -from ._model_adapter import ModelAdapter, get_weight_formats +"""DEPRECATED""" + +from typing import List + +from .backends._model_adapter import ( + DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER, + ModelAdapter, + create_model_adapter, +) __all__ = [ "ModelAdapter", "create_model_adapter", "get_weight_formats", ] + + +def get_weight_formats() -> List[str]: + """ + Return list of supported weight types + """ + return list(DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER) diff --git a/bioimageio/core/model_adapters/_pytorch_model_adapter.py b/bioimageio/core/model_adapters/_pytorch_model_adapter.py deleted file mode 100644 index e69de29b..00000000 From f52a894231fc81792045033d726c6eda0db6e82b Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 6 Dec 2024 16:42:16 +0100 Subject: [PATCH 024/187] sort tests --- tests/test_proc_ops.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/test_proc_ops.py b/tests/test_proc_ops.py index e408d220..0b93f08b 100644 --- a/tests/test_proc_ops.py +++ b/tests/test_proc_ops.py @@ -105,6 +105,22 @@ def test_zero_mean_unit_variance_fixed(tid: MemberId): xr.testing.assert_allclose(expected, sample.members[tid].data, rtol=1e-5, atol=1e-7) +def test_zero_mean_unit_variance_fixed2(tid: MemberId): + from bioimageio.core.proc_ops import FixedZeroMeanUnitVariance + + np_data = np.arange(9).reshape(3, 3) + mean = float(np_data.mean()) + std = float(np_data.mean()) + eps = 1.0e-7 + op = FixedZeroMeanUnitVariance(tid, tid, mean=mean, std=std, eps=eps) + + data = xr.DataArray(np_data, dims=("x", "y")) + sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) + expected = xr.DataArray((np_data - mean) / (std + eps), dims=("x", "y")) + op(sample) + xr.testing.assert_allclose(expected, sample.members[tid].data, rtol=1e-5, atol=1e-7) + + def test_zero_mean_unit_across_axes(tid: MemberId): from bioimageio.core.proc_ops import ZeroMeanUnitVariance @@ -126,22 +142,6 @@ def test_zero_mean_unit_across_axes(tid: MemberId): xr.testing.assert_allclose(expected, sample.members[tid].data, rtol=1e-5, atol=1e-7) -def test_zero_mean_unit_variance_fixed2(tid: MemberId): - from bioimageio.core.proc_ops import FixedZeroMeanUnitVariance - - np_data = np.arange(9).reshape(3, 3) - mean = float(np_data.mean()) - std = float(np_data.mean()) - eps = 1.0e-7 - op = FixedZeroMeanUnitVariance(tid, tid, mean=mean, std=std, eps=eps) - - data = xr.DataArray(np_data, dims=("x", "y")) - sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) - expected = xr.DataArray((np_data - mean) / (std + eps), dims=("x", "y")) - op(sample) - xr.testing.assert_allclose(expected, sample.members[tid].data, rtol=1e-5, atol=1e-7) - - def test_binarize(tid: MemberId): from bioimageio.core.proc_ops import Binarize From 523c54b4f60a3076df7ddd24ed4ac310a6b8a875 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 9 Dec 2024 13:43:39 +0100 Subject: [PATCH 025/187] add create_model_adapter --- bioimageio/core/backends/_model_adapter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bioimageio/core/backends/_model_adapter.py b/bioimageio/core/backends/_model_adapter.py index 66153f09..99919aac 100644 --- a/bioimageio/core/backends/_model_adapter.py +++ b/bioimageio/core/backends/_model_adapter.py @@ -163,3 +163,6 @@ def unload(self): Unload model from any devices, freeing their memory. The moder adapter should be considered unusable afterwards. """ + + +create_model_adapter = ModelAdapter.create From d438a123597cc707124a3b6e1569d317fec1b3ef Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 9 Dec 2024 13:43:52 +0100 Subject: [PATCH 026/187] pin pyright --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index af913c1d..95414371 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ "pre-commit", "pdoc", "psutil", # parallel pytest with 'pytest -n auto' - "pyright", + "pyright==1.1.390", "pytest-cov", "pytest-xdist", # parallel pytest "pytest", From f9a1a67a7e84e17f6390350d712ae7075d70929d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 9 Dec 2024 14:43:02 +0100 Subject: [PATCH 027/187] continue refactor of weight converters and backends --- bioimageio/core/_resource_tests.py | 12 +- bioimageio/core/backends/pytorch_backend.py | 159 +++--- .../core/backends/tensorflow_backend.py | 40 +- bioimageio/core/io.py | 30 +- bioimageio/core/test_bioimageio_collection.py | 60 --- bioimageio/core/weight_converters.py | 492 ------------------ .../weight_converters/keras_to_tensorflow.py | 184 +++++++ .../core/weight_converters/pytorch_to_onnx.py | 17 +- .../pytorch_to_torchscript.py | 154 ++++++ tests/conftest.py | 4 +- tests/test_weight_converters.py | 19 +- tests/utils.py | 2 + 12 files changed, 476 insertions(+), 697 deletions(-) delete mode 100644 bioimageio/core/test_bioimageio_collection.py delete mode 100644 bioimageio/core/weight_converters.py create mode 100644 bioimageio/core/weight_converters/keras_to_tensorflow.py create mode 100644 bioimageio/core/weight_converters/pytorch_to_torchscript.py diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index e6675b73..0dae30ff 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -428,7 +428,7 @@ def _test_model_inference( rtol: float, ) -> None: test_name = f"Reproduce test outputs from test inputs ({weight_format})" - logger.info("starting '{}'", test_name) + logger.debug("starting '{}'", test_name) error: Optional[str] = None tb: List[str] = [] @@ -516,11 +516,13 @@ def _test_model_inference_parametrized( # no batch axis batch_sizes = {1} - test_cases: Set[Tuple[v0_5.ParameterizedSize_N, BatchSize]] = { - (n, b) for n, b in product(sorted(ns), sorted(batch_sizes)) + test_cases: Set[Tuple[BatchSize, v0_5.ParameterizedSize_N]] = { + (b, n) for b, n in product(sorted(batch_sizes), sorted(ns)) } logger.info( - "Testing inference with {} different input tensor sizes", len(test_cases) + "Testing inference with {} different inputs (B, N): {}", + len(test_cases), + test_cases, ) def generate_test_cases(): @@ -534,7 +536,7 @@ def get_ns(n: int): if isinstance(a.size, v0_5.ParameterizedSize) } - for n, batch_size in sorted(test_cases): + for batch_size, n in sorted(test_cases): input_target_sizes, expected_output_sizes = model.get_axis_sizes( get_ns(n), batch_size=batch_size ) diff --git a/bioimageio/core/backends/pytorch_backend.py b/bioimageio/core/backends/pytorch_backend.py index 1992f406..74e59f30 100644 --- a/bioimageio/core/backends/pytorch_backend.py +++ b/bioimageio/core/backends/pytorch_backend.py @@ -34,12 +34,12 @@ def __init__( ): super().__init__() self.output_dims = [tuple(a.id for a in get_axes_infos(out)) for out in outputs] - devices = self.get_devices(devices) - self._network = self.get_network(weights, load_state=True, devices=devices) + devices = get_devices(devices) + self._model = load_torch_model(weights, load_state=True, devices=devices) if mode == "eval": - self._network = self._network.eval() + self._model = self._model.eval() elif mode == "train": - self._network = self._network.train() + self._model = self._model.train() else: assert_never(mode) @@ -63,7 +63,7 @@ def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: (None if t is None else t.to(self._primary_device)) for t in tensors ] result: Union[Tuple[Any, ...], List[Any], Any] - result = self._network(*tensors) + result = self._model(*tensors) if not isinstance(result, (tuple, list)): result = [result] @@ -86,91 +86,84 @@ def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: ] def unload(self) -> None: - del self._network + del self._model _ = gc.collect() # deallocate memory assert torch is not None torch.cuda.empty_cache() # release reserved memory - @classmethod - def get_network( - cls, - weight_spec: Union[ - v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr - ], - *, - load_state: bool = False, - devices: Optional[Sequence[Union[str, torch.device]]] = None, - ) -> nn.Module: - arch = import_callable( - weight_spec.architecture, - sha256=( - weight_spec.architecture_sha256 - if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) - else weight_spec.sha256 - ), - ) - model_kwargs = ( - weight_spec.kwargs + +def load_torch_model( + weight_spec: Union[ + v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr + ], + *, + load_state: bool = False, + devices: Optional[Sequence[Union[str, torch.device]]] = None, +) -> nn.Module: + arch = import_callable( + weight_spec.architecture, + sha256=( + weight_spec.architecture_sha256 if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) - else weight_spec.architecture.kwargs + else weight_spec.sha256 + ), + ) + model_kwargs = ( + weight_spec.kwargs + if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) + else weight_spec.architecture.kwargs + ) + network = arch(**model_kwargs) + if not isinstance(network, nn.Module): + raise ValueError( + f"calling {weight_spec.architecture.callable} did not return a torch.nn.Module" ) - network = arch(**model_kwargs) - if not isinstance(network, nn.Module): - raise ValueError( - f"calling {weight_spec.architecture.callable} did not return a torch.nn.Module" - ) - if load_state or devices: - use_devices = cls.get_devices(devices) - network = network.to(use_devices[0]) - if load_state: - network = cls.load_state( - network, - path=download(weight_spec).path, - devices=use_devices, - ) - return network - - @staticmethod - def load_state( - network: nn.Module, - path: Union[Path, ZipPath], - devices: Sequence[torch.device], - ) -> nn.Module: - network = network.to(devices[0]) - with path.open("rb") as f: - assert not isinstance(f, TextIOWrapper) - state = torch.load(f, map_location=devices[0]) - - incompatible = network.load_state_dict(state) - if incompatible.missing_keys: - logger.warning("Missing state dict keys: {}", incompatible.missing_keys) - - if incompatible.unexpected_keys: - logger.warning( - "Unexpected state dict keys: {}", incompatible.unexpected_keys + if load_state or devices: + use_devices = get_devices(devices) + network = network.to(use_devices[0]) + if load_state: + network = load_torch_state_dict( + network, + path=download(weight_spec).path, + devices=use_devices, ) - return network - - @staticmethod - def get_devices( - devices: Optional[Sequence[Union[torch.device, str]]] = None, - ) -> List[torch.device]: - if not devices: - torch_devices = [ - ( - torch.device("cuda") - if torch.cuda.is_available() - else torch.device("cpu") - ) - ] - else: - torch_devices = [torch.device(d) for d in devices] + return network + + +def load_torch_state_dict( + model: nn.Module, + path: Union[Path, ZipPath], + devices: Sequence[torch.device], +) -> nn.Module: + model = model.to(devices[0]) + with path.open("rb") as f: + assert not isinstance(f, TextIOWrapper) + state = torch.load(f, map_location=devices[0]) + + incompatible = model.load_state_dict(state) + if incompatible.missing_keys: + logger.warning("Missing state dict keys: {}", incompatible.missing_keys) + + if incompatible.unexpected_keys: + logger.warning("Unexpected state dict keys: {}", incompatible.unexpected_keys) + return model + + +def get_devices( + devices: Optional[Sequence[Union[torch.device, str]]] = None, +) -> List[torch.device]: + if not devices: + torch_devices = [ + (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")) + ] + else: + torch_devices = [torch.device(d) for d in devices] - if len(torch_devices) > 1: - warnings.warn( - f"Multiple devices for single pytorch model not yet implemented; ignoring {torch_devices[1:]}" - ) - torch_devices = torch_devices[:1] + if len(torch_devices) > 1: + warnings.warn( + f"Multiple devices for single pytorch model not yet implemented; ignoring {torch_devices[1:]}" + ) + torch_devices = torch_devices[:1] - return torch_devices + return torch_devices diff --git a/bioimageio/core/backends/tensorflow_backend.py b/bioimageio/core/backends/tensorflow_backend.py index 3f9cee9d..94a8165f 100644 --- a/bioimageio/core/backends/tensorflow_backend.py +++ b/bioimageio/core/backends/tensorflow_backend.py @@ -1,16 +1,13 @@ -import zipfile -from io import TextIOWrapper from pathlib import Path -from shutil import copyfileobj from typing import List, Literal, Optional, Sequence, Union import numpy as np import tensorflow as tf from loguru import logger -from bioimageio.spec.common import FileSource, ZipPath +from bioimageio.core.io import ensure_unzipped +from bioimageio.spec.common import FileSource from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download from ..digest_spec import get_axes_infos from ..tensor import Tensor @@ -65,41 +62,22 @@ def __init__( f"Device management is not implemented for tensorflow yet, ignoring the devices {devices}" ) - weight_file = self.require_unzipped(weights.source) + # TODO: check how to load tf weights without unzipping + weight_file = ensure_unzipped( + weights.source, Path("bioimageio_unzipped_tf_weights") + ) self._network = self._get_network(weight_file) self._internal_output_axes = [ tuple(a.id for a in get_axes_infos(out)) for out in model_description.outputs ] - # TODO: check how to load tf weights without unzipping - def require_unzipped(self, weight_file: FileSource): - local_weights_file = download(weight_file).path - if isinstance(local_weights_file, ZipPath): - # weights file is in a bioimageio zip package - out_path = ( - Path("bioimageio_unzipped_tf_weights") / local_weights_file.filename - ) - with local_weights_file.open("rb") as src, out_path.open("wb") as dst: - assert not isinstance(src, TextIOWrapper) - copyfileobj(src, dst) - - local_weights_file = out_path - - if zipfile.is_zipfile(local_weights_file): - # weights file itself is a zipfile - out_path = local_weights_file.with_suffix(".unzipped") - with zipfile.ZipFile(local_weights_file, "r") as f: - f.extractall(out_path) - - return out_path - else: - return local_weights_file - def _get_network( # pyright: ignore[reportUnknownParameterType] self, weight_file: FileSource ): - weight_file = self.require_unzipped(weight_file) + weight_file = ensure_unzipped( + weight_file, Path("bioimageio_unzipped_tf_weights") + ) assert tf is not None if self.use_keras_api: try: diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index ee60a67a..001db539 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -1,6 +1,9 @@ import collections.abc import warnings +import zipfile +from io import TextIOWrapper from pathlib import Path, PurePosixPath +from shutil import copyfileobj from typing import Any, Mapping, Optional, Sequence, Tuple, Union import h5py @@ -10,7 +13,8 @@ from numpy.typing import NDArray from pydantic import BaseModel, ConfigDict, TypeAdapter -from bioimageio.spec.utils import load_array, save_array +from bioimageio.spec.common import FileSource, ZipPath +from bioimageio.spec.utils import download, load_array, save_array from .axis import AxisLike from .common import PerMember @@ -176,3 +180,27 @@ def save_dataset_stat(stat: Mapping[DatasetMeasure, MeasureValue], path: Path): def load_dataset_stat(path: Path): seq = _stat_adapter.validate_json(path.read_bytes()) return {e.measure: e.value for e in seq} + + +def ensure_unzipped(source: Union[FileSource, ZipPath], folder: Path): + """unzip a (downloaded) **source** to a file in **folder** if source is a zip archive. + Always returns the path to the unzipped source (maybe source itself)""" + local_weights_file = download(source).path + if isinstance(local_weights_file, ZipPath): + # source is inside a zip archive + out_path = folder / local_weights_file.filename + with local_weights_file.open("rb") as src, out_path.open("wb") as dst: + assert not isinstance(src, TextIOWrapper) + copyfileobj(src, dst) + + local_weights_file = out_path + + if zipfile.is_zipfile(local_weights_file): + # source itself is a zipfile + out_path = folder / local_weights_file.with_suffix(".unzipped").name + with zipfile.ZipFile(local_weights_file, "r") as f: + f.extractall(out_path) + + return out_path + else: + return local_weights_file diff --git a/bioimageio/core/test_bioimageio_collection.py b/bioimageio/core/test_bioimageio_collection.py deleted file mode 100644 index 2cf9ced0..00000000 --- a/bioimageio/core/test_bioimageio_collection.py +++ /dev/null @@ -1,60 +0,0 @@ -from typing import Any, Collection, Dict, Iterable, Mapping, Tuple - -import pytest -import requests -from pydantic import HttpUrl - -from bioimageio.spec import InvalidDescr -from bioimageio.spec.common import Sha256 -from tests.utils import ParameterSet, expensive_test - -BASE_URL = "https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/" - - -def _get_latest_rdf_sources(): - entries: Any = requests.get(BASE_URL + "all_versions.json").json()["entries"] - ret: Dict[str, Tuple[HttpUrl, Sha256]] = {} - for entry in entries: - version = entry["versions"][0] - ret[f"{entry['concept']}/{version['v']}"] = ( - HttpUrl(version["source"]), # pyright: ignore[reportCallIssue] - Sha256(version["sha256"]), - ) - - return ret - - -ALL_LATEST_RDF_SOURCES: Mapping[str, Tuple[HttpUrl, Sha256]] = _get_latest_rdf_sources() - - -def yield_bioimageio_yaml_urls() -> Iterable[ParameterSet]: - for descr_url, sha in ALL_LATEST_RDF_SOURCES.values(): - key = ( - str(descr_url) - .replace(BASE_URL, "") - .replace("/files/rdf.yaml", "") - .replace("/files/bioimageio.yaml", "") - ) - yield pytest.param(descr_url, sha, key, id=key) - - -KNOWN_INVALID: Collection[str] = set() - - -@expensive_test -@pytest.mark.parametrize("descr_url,sha,key", list(yield_bioimageio_yaml_urls())) -def test_rdf( - descr_url: HttpUrl, - sha: Sha256, - key: str, -): - if key in KNOWN_INVALID: - pytest.skip("known failure") - - from bioimageio.core import load_description_and_test - - descr = load_description_and_test(descr_url, sha256=sha) - assert not isinstance(descr, InvalidDescr) - assert ( - descr.validation_summary.status == "passed" - ), descr.validation_summary.format() diff --git a/bioimageio/core/weight_converters.py b/bioimageio/core/weight_converters.py deleted file mode 100644 index 6e0d06ec..00000000 --- a/bioimageio/core/weight_converters.py +++ /dev/null @@ -1,492 +0,0 @@ -# type: ignore # TODO: type -from __future__ import annotations - -import abc -from bioimageio.spec.model.v0_5 import WeightsEntryDescrBase -from typing import Any, List, Sequence, cast, Union -from typing_extensions import assert_never -import numpy as np -from numpy.testing import assert_array_almost_equal -from bioimageio.spec.model import v0_4, v0_5 -from torch.jit import ScriptModule -from bioimageio.core.digest_spec import get_test_inputs, get_member_id -from bioimageio.core.model_adapters._pytorch_model_adapter import PytorchModelAdapter -import os -import shutil -from pathlib import Path -from typing import no_type_check -from zipfile import ZipFile -from bioimageio.spec._internal.version_type import Version -from bioimageio.spec._internal.io_utils import download - -try: - import torch -except ImportError: - torch = None - -try: - import tensorflow.saved_model -except Exception: - tensorflow = None - - -# additional convenience for pytorch state dict, eventually we want this in python-bioimageio too -# and for each weight format -def load_torch_model( # pyright: ignore[reportUnknownParameterType] - node: Union[v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr], -): - assert torch is not None - model = ( # pyright: ignore[reportUnknownVariableType] - PytorchModelAdapter.get_network(node) - ) - state = torch.load(download(node.source).path, map_location="cpu") - model.load_state_dict(state) # FIXME: check incompatible keys? - return model.eval() # pyright: ignore[reportUnknownVariableType] - - -class WeightConverter(abc.ABC): - @abc.abstractmethod - def convert( - self, model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], output_path: Path - ) -> WeightsEntryDescrBase: - raise NotImplementedError - - -class Pytorch2Onnx(WeightConverter): - def __init__(self): - super().__init__() - assert torch is not None - - def convert( - self, - model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], - output_path: Path, - use_tracing: bool = True, - test_decimal: int = 4, - verbose: bool = False, - opset_version: int = 15, - ) -> v0_5.OnnxWeightsDescr: - """ - Convert model weights from the PyTorch state_dict format to the ONNX format. - - Args: - model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): - The model description object that contains the model and its weights. - output_path (Path): - The file path where the ONNX model will be saved. - use_tracing (bool, optional): - Whether to use tracing or scripting to export the ONNX format. Defaults to True. - test_decimal (int, optional): - The decimal precision for comparing the results between the original and converted models. - This is used in the `assert_array_almost_equal` function to check if the outputs match. - Defaults to 4. - verbose (bool, optional): - If True, will print out detailed information during the ONNX export process. Defaults to False. - opset_version (int, optional): - The ONNX opset version to use for the export. Defaults to 15. - - Raises: - ValueError: - If the provided model does not have weights in the PyTorch state_dict format. - ImportError: - If ONNX Runtime is not available for checking the exported ONNX model. - ValueError: - If the results before and after weights conversion do not agree. - - Returns: - v0_5.OnnxWeightsDescr: - A descriptor object that contains information about the exported ONNX weights. - """ - - state_dict_weights_descr = model_descr.weights.pytorch_state_dict - if state_dict_weights_descr is None: - raise ValueError( - "The provided model does not have weights in the pytorch state dict format" - ) - - assert torch is not None - with torch.no_grad(): - sample = get_test_inputs(model_descr) - input_data = [ - sample.members[get_member_id(ipt)].data.data - for ipt in model_descr.inputs - ] - input_tensors = [torch.from_numpy(ipt) for ipt in input_data] - model = load_torch_model(state_dict_weights_descr) - - expected_tensors = model(*input_tensors) - if isinstance(expected_tensors, torch.Tensor): - expected_tensors = [expected_tensors] - expected_outputs: List[np.ndarray[Any, Any]] = [ - out.numpy() for out in expected_tensors - ] - - if use_tracing: - torch.onnx.export( - model, - ( - tuple(input_tensors) - if len(input_tensors) > 1 - else input_tensors[0] - ), - str(output_path), - verbose=verbose, - opset_version=opset_version, - ) - else: - raise NotImplementedError - - try: - import onnxruntime as rt # pyright: ignore [reportMissingTypeStubs] - except ImportError: - raise ImportError( - "The onnx weights were exported, but onnx rt is not available and weights cannot be checked." - ) - - # check the onnx model - sess = rt.InferenceSession(str(output_path)) - onnx_input_node_args = cast( - List[Any], sess.get_inputs() - ) # fixme: remove cast, try using rt.NodeArg instead of Any - onnx_inputs = { - input_name.name: inp - for input_name, inp in zip(onnx_input_node_args, input_data) - } - outputs = cast( - Sequence[np.ndarray[Any, Any]], sess.run(None, onnx_inputs) - ) # FIXME: remove cast - - try: - for exp, out in zip(expected_outputs, outputs): - assert_array_almost_equal(exp, out, decimal=test_decimal) - except AssertionError as e: - raise ValueError( - f"Results before and after weights conversion do not agree:\n {str(e)}" - ) - - return v0_5.OnnxWeightsDescr( - source=output_path, parent="pytorch_state_dict", opset_version=opset_version - ) - - -class Pytorch2Torchscipt(WeightConverter): - def __init__(self): - super().__init__() - assert torch is not None - - def convert( - self, - model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], - output_path: Path, - use_tracing: bool = True, - ) -> v0_5.TorchscriptWeightsDescr: - """ - Convert model weights from the PyTorch `state_dict` format to TorchScript. - - Args: - model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): - The model description object that contains the model and its weights in the PyTorch `state_dict` format. - output_path (Path): - The file path where the TorchScript model will be saved. - use_tracing (bool): - Whether to use tracing or scripting to export the TorchScript format. - - `True`: Use tracing, which is recommended for models with straightforward control flow. - - `False`: Use scripting, which is better for models with dynamic control flow (e.g., loops, conditionals). - - Raises: - ValueError: - If the provided model does not have weights in the PyTorch `state_dict` format. - - Returns: - v0_5.TorchscriptWeightsDescr: - A descriptor object that contains information about the exported TorchScript weights. - """ - state_dict_weights_descr = model_descr.weights.pytorch_state_dict - if state_dict_weights_descr is None: - raise ValueError( - "The provided model does not have weights in the pytorch state dict format" - ) - - input_data = model_descr.get_input_test_arrays() - - with torch.no_grad(): - input_data = [torch.from_numpy(inp.astype("float32")) for inp in input_data] - model = load_torch_model(state_dict_weights_descr) - scripted_module: ScriptModule = ( - torch.jit.trace(model, input_data) - if use_tracing - else torch.jit.script(model) - ) - self._check_predictions( - model=model, - scripted_model=scripted_module, - model_spec=model_descr, - input_data=input_data, - ) - - scripted_module.save(str(output_path)) - - return v0_5.TorchscriptWeightsDescr( - source=output_path, - pytorch_version=Version(torch.__version__), - parent="pytorch_state_dict", - ) - - def _check_predictions( - self, - model: Any, - scripted_model: Any, - model_spec: v0_4.ModelDescr | v0_5.ModelDescr, - input_data: Sequence[torch.Tensor], - ): - assert torch is not None - - def _check(input_: Sequence[torch.Tensor]) -> None: - expected_tensors = model(*input_) - if isinstance(expected_tensors, torch.Tensor): - expected_tensors = [expected_tensors] - expected_outputs: List[np.ndarray[Any, Any]] = [ - out.numpy() for out in expected_tensors - ] - - output_tensors = scripted_model(*input_) - if isinstance(output_tensors, torch.Tensor): - output_tensors = [output_tensors] - outputs: List[np.ndarray[Any, Any]] = [ - out.numpy() for out in output_tensors - ] - - try: - for exp, out in zip(expected_outputs, outputs): - assert_array_almost_equal(exp, out, decimal=4) - except AssertionError as e: - raise ValueError( - f"Results before and after weights conversion do not agree:\n {str(e)}" - ) - - _check(input_data) - - if len(model_spec.inputs) > 1: - return # FIXME: why don't we check multiple inputs? - - input_descr = model_spec.inputs[0] - if isinstance(input_descr, v0_4.InputTensorDescr): - if not isinstance(input_descr.shape, v0_4.ParameterizedInputShape): - return - min_shape = input_descr.shape.min - step = input_descr.shape.step - else: - min_shape: List[int] = [] - step: List[int] = [] - for axis in input_descr.axes: - if isinstance(axis.size, v0_5.ParameterizedSize): - min_shape.append(axis.size.min) - step.append(axis.size.step) - elif isinstance(axis.size, int): - min_shape.append(axis.size) - step.append(0) - elif axis.size is None: - raise NotImplementedError( - f"Can't verify inputs that don't specify their shape fully: {axis}" - ) - elif isinstance(axis.size, v0_5.SizeReference): - raise NotImplementedError(f"Can't handle axes like '{axis}' yet") - else: - assert_never(axis.size) - - input_data = input_data[0] - max_shape = input_data.shape - max_steps = 4 - - # check that input and output agree for decreasing input sizes - for step_factor in range(1, max_steps + 1): - slice_ = tuple( - ( - slice(None) - if step_dim == 0 - else slice(0, max_dim - step_factor * step_dim, 1) - ) - for max_dim, step_dim in zip(max_shape, step) - ) - sliced_input = input_data[slice_] - if any( - sliced_dim < min_dim - for sliced_dim, min_dim in zip(sliced_input.shape, min_shape) - ): - return - _check([sliced_input]) - - -class Tensorflow2Bundled(WeightConverter): - def __init__(self): - super().__init__() - assert tensorflow is not None - - def convert( - self, model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], output_path: Path - ) -> v0_5.TensorflowSavedModelBundleWeightsDescr: - """ - Convert model weights from the 'keras_hdf5' format to the 'tensorflow_saved_model_bundle' format. - - This method handles the conversion of Keras HDF5 model weights into a TensorFlow SavedModel bundle, - which is the recommended format for deploying TensorFlow models. The method supports both TensorFlow 1.x - and 2.x versions, with appropriate checks to ensure compatibility. - - Adapted from: - https://github.com/deepimagej/pydeepimagej/blob/5aaf0e71f9b04df591d5ca596f0af633a7e024f5/pydeepimagej/yaml/create_config.py - - Args: - model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): - The bioimage.io model description containing the model's metadata and weights. - output_path (Path): - The directory where the TensorFlow SavedModel bundle will be saved. - This path must not already exist and, if necessary, will be zipped into a .zip file. - use_tracing (bool): - Placeholder argument; currently not used in this method but required to match the abstract method signature. - - Raises: - ValueError: - - If the specified `output_path` already exists. - - If the Keras HDF5 weights are missing in the model description. - RuntimeError: - If there is a mismatch between the TensorFlow version used by the model and the version installed. - NotImplementedError: - If the model has multiple inputs or outputs and TensorFlow 1.x is being used. - - Returns: - v0_5.TensorflowSavedModelBundleWeightsDescr: - A descriptor object containing information about the converted TensorFlow SavedModel bundle. - """ - assert tensorflow is not None - tf_major_ver = int(tensorflow.__version__.split(".")[0]) - - if output_path.suffix == ".zip": - output_path = output_path.with_suffix("") - zip_weights = True - else: - zip_weights = False - - if output_path.exists(): - raise ValueError(f"The ouptut directory at {output_path} must not exist.") - - if model_descr.weights.keras_hdf5 is None: - raise ValueError("Missing Keras Hdf5 weights to convert from.") - - weight_spec = model_descr.weights.keras_hdf5 - weight_path = download(weight_spec.source).path - - if weight_spec.tensorflow_version: - model_tf_major_ver = int(weight_spec.tensorflow_version.major) - if model_tf_major_ver != tf_major_ver: - raise RuntimeError( - f"Tensorflow major versions of model {model_tf_major_ver} is not {tf_major_ver}" - ) - - if tf_major_ver == 1: - if len(model_descr.inputs) != 1 or len(model_descr.outputs) != 1: - raise NotImplementedError( - "Weight conversion for models with multiple inputs or outputs is not yet implemented." - ) - return self._convert_tf1( - weight_path, - output_path, - model_descr.inputs[0].id, - model_descr.outputs[0].id, - zip_weights, - ) - else: - return self._convert_tf2(weight_path, output_path, zip_weights) - - def _convert_tf2( - self, keras_weight_path: Path, output_path: Path, zip_weights: bool - ) -> v0_5.TensorflowSavedModelBundleWeightsDescr: - try: - # try to build the tf model with the keras import from tensorflow - from tensorflow import keras - except Exception: - # if the above fails try to export with the standalone keras - import keras - - model = keras.models.load_model(keras_weight_path) - keras.models.save_model(model, output_path) - - if zip_weights: - output_path = self._zip_model_bundle(output_path) - print("TensorFlow model exported to", output_path) - - return v0_5.TensorflowSavedModelBundleWeightsDescr( - source=output_path, - parent="keras_hdf5", - tensorflow_version=Version(tensorflow.__version__), - ) - - # adapted from - # https://github.com/deepimagej/pydeepimagej/blob/master/pydeepimagej/yaml/create_config.py#L236 - def _convert_tf1( - self, - keras_weight_path: Path, - output_path: Path, - input_name: str, - output_name: str, - zip_weights: bool, - ) -> v0_5.TensorflowSavedModelBundleWeightsDescr: - try: - # try to build the tf model with the keras import from tensorflow - from tensorflow import ( - keras, # type: ignore - ) - - except Exception: - # if the above fails try to export with the standalone keras - import keras - - @no_type_check - def build_tf_model(): - keras_model = keras.models.load_model(keras_weight_path) - assert tensorflow is not None - builder = tensorflow.saved_model.builder.SavedModelBuilder(output_path) - signature = ( - tensorflow.saved_model.signature_def_utils.predict_signature_def( - inputs={input_name: keras_model.input}, - outputs={output_name: keras_model.output}, - ) - ) - - signature_def_map = { - tensorflow.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature - } - - builder.add_meta_graph_and_variables( - keras.backend.get_session(), - [tensorflow.saved_model.tag_constants.SERVING], - signature_def_map=signature_def_map, - ) - builder.save() - - build_tf_model() - - if zip_weights: - output_path = self._zip_model_bundle(output_path) - print("TensorFlow model exported to", output_path) - - return v0_5.TensorflowSavedModelBundleWeightsDescr( - source=output_path, - parent="keras_hdf5", - tensorflow_version=Version(tensorflow.__version__), - ) - - def _zip_model_bundle(self, model_bundle_folder: Path): - zipped_model_bundle = model_bundle_folder.with_suffix(".zip") - - with ZipFile(zipped_model_bundle, "w") as zip_obj: - for root, _, files in os.walk(model_bundle_folder): - for filename in files: - src = os.path.join(root, filename) - zip_obj.write(src, os.path.relpath(src, model_bundle_folder)) - - try: - shutil.rmtree(model_bundle_folder) - except Exception: - print("TensorFlow bundled model was not removed after compression") - - return zipped_model_bundle diff --git a/bioimageio/core/weight_converters/keras_to_tensorflow.py b/bioimageio/core/weight_converters/keras_to_tensorflow.py new file mode 100644 index 00000000..083bae5b --- /dev/null +++ b/bioimageio/core/weight_converters/keras_to_tensorflow.py @@ -0,0 +1,184 @@ +import os +import shutil +from pathlib import Path +from typing import Union, no_type_check +from zipfile import ZipFile + +import tensorflow + +from bioimageio.core.io import ensure_unzipped +from bioimageio.spec._internal.io_utils import download +from bioimageio.spec._internal.version_type import Version +from bioimageio.spec.common import ZipPath +from bioimageio.spec.model import v0_4, v0_5 + +try: + # try to build the tf model with the keras import from tensorflow + from tensorflow import keras +except Exception: + # if the above fails try to export with the standalone keras + import keras + + +def convert( + model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], *, output_path: Path +) -> v0_5.TensorflowSavedModelBundleWeightsDescr: + """ + Convert model weights from the 'keras_hdf5' format to the 'tensorflow_saved_model_bundle' format. + + This method handles the conversion of Keras HDF5 model weights into a TensorFlow SavedModel bundle, + which is the recommended format for deploying TensorFlow models. The method supports both TensorFlow 1.x + and 2.x versions, with appropriate checks to ensure compatibility. + + Adapted from: + https://github.com/deepimagej/pydeepimagej/blob/5aaf0e71f9b04df591d5ca596f0af633a7e024f5/pydeepimagej/yaml/create_config.py + + Args: + model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + The bioimage.io model description containing the model's metadata and weights. + output_path (Path): + The directory where the TensorFlow SavedModel bundle will be saved. + This path must not already exist and, if necessary, will be zipped into a .zip file. + use_tracing (bool): + Placeholder argument; currently not used in this method but required to match the abstract method signature. + + Raises: + ValueError: + - If the specified `output_path` already exists. + - If the Keras HDF5 weights are missing in the model description. + RuntimeError: + If there is a mismatch between the TensorFlow version used by the model and the version installed. + NotImplementedError: + If the model has multiple inputs or outputs and TensorFlow 1.x is being used. + + Returns: + v0_5.TensorflowSavedModelBundleWeightsDescr: + A descriptor object containing information about the converted TensorFlow SavedModel bundle. + """ + tf_major_ver = int(tensorflow.__version__.split(".")[0]) + + if output_path.suffix == ".zip": + output_path = output_path.with_suffix("") + zip_weights = True + else: + zip_weights = False + + if output_path.exists(): + raise ValueError(f"The ouptut directory at {output_path} must not exist.") + + if model_descr.weights.keras_hdf5 is None: + raise ValueError("Missing Keras Hdf5 weights to convert from.") + + weight_spec = model_descr.weights.keras_hdf5 + weight_path = download(weight_spec.source).path + + if weight_spec.tensorflow_version: + model_tf_major_ver = int(weight_spec.tensorflow_version.major) + if model_tf_major_ver != tf_major_ver: + raise RuntimeError( + f"Tensorflow major versions of model {model_tf_major_ver} is not {tf_major_ver}" + ) + + if tf_major_ver == 1: + if len(model_descr.inputs) != 1 or len(model_descr.outputs) != 1: + raise NotImplementedError( + "Weight conversion for models with multiple inputs or outputs is not yet implemented." + ) + + input_name = str( + d.id + if isinstance((d := model_descr.inputs[0]), v0_5.InputTensorDescr) + else d.name + ) + output_name = str( + d.id + if isinstance((d := model_descr.outputs[0]), v0_5.OutputTensorDescr) + else d.name + ) + return _convert_tf1( + ensure_unzipped(weight_path, Path("bioimageio_unzipped_tf_weights")), + output_path, + input_name, + output_name, + zip_weights, + ) + else: + return _convert_tf2(weight_path, output_path, zip_weights) + + +def _convert_tf2( + keras_weight_path: Union[Path, ZipPath], output_path: Path, zip_weights: bool +) -> v0_5.TensorflowSavedModelBundleWeightsDescr: + model = keras.models.load_model(keras_weight_path) # type: ignore + keras.models.save_model(model, output_path) # type: ignore + + if zip_weights: + output_path = _zip_model_bundle(output_path) + print("TensorFlow model exported to", output_path) + + return v0_5.TensorflowSavedModelBundleWeightsDescr( + source=output_path, + parent="keras_hdf5", + tensorflow_version=Version(tensorflow.__version__), + ) + + +# adapted from +# https://github.com/deepimagej/pydeepimagej/blob/master/pydeepimagej/yaml/create_config.py#L236 +def _convert_tf1( + keras_weight_path: Path, + output_path: Path, + input_name: str, + output_name: str, + zip_weights: bool, +) -> v0_5.TensorflowSavedModelBundleWeightsDescr: + + @no_type_check + def build_tf_model(): + keras_model = keras.models.load_model(keras_weight_path) + assert tensorflow is not None + builder = tensorflow.saved_model.builder.SavedModelBuilder(output_path) + signature = tensorflow.saved_model.signature_def_utils.predict_signature_def( + inputs={input_name: keras_model.input}, + outputs={output_name: keras_model.output}, + ) + + signature_def_map = { + tensorflow.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature + } + + builder.add_meta_graph_and_variables( + keras.backend.get_session(), + [tensorflow.saved_model.tag_constants.SERVING], + signature_def_map=signature_def_map, + ) + builder.save() + + build_tf_model() + + if zip_weights: + output_path = _zip_model_bundle(output_path) + print("TensorFlow model exported to", output_path) + + return v0_5.TensorflowSavedModelBundleWeightsDescr( + source=output_path, + parent="keras_hdf5", + tensorflow_version=Version(tensorflow.__version__), + ) + + +def _zip_model_bundle(model_bundle_folder: Path): + zipped_model_bundle = model_bundle_folder.with_suffix(".zip") + + with ZipFile(zipped_model_bundle, "w") as zip_obj: + for root, _, files in os.walk(model_bundle_folder): + for filename in files: + src = os.path.join(root, filename) + zip_obj.write(src, os.path.relpath(src, model_bundle_folder)) + + try: + shutil.rmtree(model_bundle_folder) + except Exception: + print("TensorFlow bundled model was not removed after compression") + + return zipped_model_bundle diff --git a/bioimageio/core/weight_converters/pytorch_to_onnx.py b/bioimageio/core/weight_converters/pytorch_to_onnx.py index acb621e2..9f2b2e6f 100644 --- a/bioimageio/core/weight_converters/pytorch_to_onnx.py +++ b/bioimageio/core/weight_converters/pytorch_to_onnx.py @@ -1,28 +1,19 @@ -import abc -import os -import shutil from pathlib import Path -from typing import Any, List, Sequence, Union, cast, no_type_check -from zipfile import ZipFile +from typing import Any, List, Sequence, Union, cast import numpy as np import torch from numpy.testing import assert_array_almost_equal -from torch.jit import ScriptModule -from typing_extensions import assert_never +from bioimageio.core.backends.pytorch_backend import load_torch_model from bioimageio.core.digest_spec import get_member_id, get_test_inputs -from bioimageio.core.model_adapters._pytorch_model_adapter import PytorchModelAdapter -from bioimageio.spec._internal.io_utils import download -from bioimageio.spec._internal.version_type import Version from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.model.v0_5 import WeightsEntryDescrBase def convert( model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], *, - # output_path: Path, + output_path: Path, use_tracing: bool = True, test_decimal: int = 4, verbose: bool = False, @@ -81,7 +72,7 @@ def convert( ] if use_tracing: - torch.onnx.export( + _ = torch.onnx.export( model, (tuple(input_tensors) if len(input_tensors) > 1 else input_tensors[0]), str(output_path), diff --git a/bioimageio/core/weight_converters/pytorch_to_torchscript.py b/bioimageio/core/weight_converters/pytorch_to_torchscript.py new file mode 100644 index 00000000..a724e5f8 --- /dev/null +++ b/bioimageio/core/weight_converters/pytorch_to_torchscript.py @@ -0,0 +1,154 @@ +from pathlib import Path +from typing import Any, List, Sequence, Tuple, Union + +import numpy as np +import torch +from numpy.testing import assert_array_almost_equal +from torch.jit import ScriptModule +from typing_extensions import assert_never + +from bioimageio.core.backends.pytorch_backend import load_torch_model +from bioimageio.spec._internal.version_type import Version +from bioimageio.spec.model import v0_4, v0_5 + + +def convert( + model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], + *, + output_path: Path, + use_tracing: bool = True, +) -> v0_5.TorchscriptWeightsDescr: + """ + Convert model weights from the PyTorch `state_dict` format to TorchScript. + + Args: + model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + The model description object that contains the model and its weights in the PyTorch `state_dict` format. + output_path (Path): + The file path where the TorchScript model will be saved. + use_tracing (bool): + Whether to use tracing or scripting to export the TorchScript format. + - `True`: Use tracing, which is recommended for models with straightforward control flow. + - `False`: Use scripting, which is better for models with dynamic control flow (e.g., loops, conditionals). + + Raises: + ValueError: + If the provided model does not have weights in the PyTorch `state_dict` format. + + Returns: + v0_5.TorchscriptWeightsDescr: + A descriptor object that contains information about the exported TorchScript weights. + """ + state_dict_weights_descr = model_descr.weights.pytorch_state_dict + if state_dict_weights_descr is None: + raise ValueError( + "The provided model does not have weights in the pytorch state dict format" + ) + + input_data = model_descr.get_input_test_arrays() + + with torch.no_grad(): + input_data = [torch.from_numpy(inp.astype("float32")) for inp in input_data] + model = load_torch_model(state_dict_weights_descr) + scripted_module: Union[ # pyright: ignore[reportUnknownVariableType] + ScriptModule, Tuple[Any, ...] + ] = ( + torch.jit.trace(model, input_data) + if use_tracing + else torch.jit.script(model) + ) + assert not isinstance(scripted_module, tuple), scripted_module + _check_predictions( + model=model, + scripted_model=scripted_module, + model_spec=model_descr, + input_data=input_data, + ) + + scripted_module.save(str(output_path)) + + return v0_5.TorchscriptWeightsDescr( + source=output_path, + pytorch_version=Version(torch.__version__), + parent="pytorch_state_dict", + ) + + +def _check_predictions( + model: Any, + scripted_model: Any, + model_spec: v0_4.ModelDescr | v0_5.ModelDescr, + input_data: Sequence[torch.Tensor], +): + def _check(input_: Sequence[torch.Tensor]) -> None: + expected_tensors = model(*input_) + if isinstance(expected_tensors, torch.Tensor): + expected_tensors = [expected_tensors] + expected_outputs: List[np.ndarray[Any, Any]] = [ + out.numpy() for out in expected_tensors + ] + + output_tensors = scripted_model(*input_) + if isinstance(output_tensors, torch.Tensor): + output_tensors = [output_tensors] + outputs: List[np.ndarray[Any, Any]] = [out.numpy() for out in output_tensors] + + try: + for exp, out in zip(expected_outputs, outputs): + assert_array_almost_equal(exp, out, decimal=4) + except AssertionError as e: + raise ValueError( + f"Results before and after weights conversion do not agree:\n {str(e)}" + ) + + _check(input_data) + + if len(model_spec.inputs) > 1: + return # FIXME: why don't we check multiple inputs? + + input_descr = model_spec.inputs[0] + if isinstance(input_descr, v0_4.InputTensorDescr): + if not isinstance(input_descr.shape, v0_4.ParameterizedInputShape): + return + min_shape = input_descr.shape.min + step = input_descr.shape.step + else: + min_shape: List[int] = [] + step: List[int] = [] + for axis in input_descr.axes: + if isinstance(axis.size, v0_5.ParameterizedSize): + min_shape.append(axis.size.min) + step.append(axis.size.step) + elif isinstance(axis.size, int): + min_shape.append(axis.size) + step.append(0) + elif axis.size is None: + raise NotImplementedError( + f"Can't verify inputs that don't specify their shape fully: {axis}" + ) + elif isinstance(axis.size, v0_5.SizeReference): + raise NotImplementedError(f"Can't handle axes like '{axis}' yet") + else: + assert_never(axis.size) + + input_tensor = input_data[0] + max_shape = input_tensor.shape + max_steps = 4 + + # check that input and output agree for decreasing input sizes + for step_factor in range(1, max_steps + 1): + slice_ = tuple( + ( + slice(None) + if step_dim == 0 + else slice(0, max_dim - step_factor * step_dim, 1) + ) + for max_dim, step_dim in zip(max_shape, step) + ) + sliced_input = input_tensor[slice_] + if any( + sliced_dim < min_dim + for sliced_dim, min_dim in zip(sliced_input.shape, min_shape) + ): + return + _check([sliced_input]) diff --git a/tests/conftest.py b/tests/conftest.py index 253ade2f..32a2b6a8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,7 +14,7 @@ import torch torch_version = tuple(map(int, torch.__version__.split(".")[:2])) - logger.warning(f"detected torch version {torch_version}.x") + logger.warning(f"detected torch version {torch.__version__}") except ImportError: torch = None torch_version = None @@ -29,7 +29,7 @@ try: import tensorflow # type: ignore - tf_major_version = int(tensorflow.__version__.split(".")[0]) # type: ignore + tf_major_version = int(tensorflow.__version__.split(".")[0]) except ImportError: tensorflow = None tf_major_version = None diff --git a/tests/test_weight_converters.py b/tests/test_weight_converters.py index 88010744..71f6ccfa 100644 --- a/tests/test_weight_converters.py +++ b/tests/test_weight_converters.py @@ -1,37 +1,36 @@ # type: ignore # TODO enable type checking +import os import zipfile from pathlib import Path import pytest -import os - -from bioimageio.spec import load_description -from bioimageio.spec.model import v0_5 - from bioimageio.core.weight_converters import ( - Pytorch2Torchscipt, Pytorch2Onnx, Tensorflow2Bundled, ) +from bioimageio.spec import load_description +from bioimageio.spec.model import v0_5 def test_torchscript_converter(any_torch_model, tmp_path): + from bioimageio.core.weight_converters.pytorch_to_torchscript import convert + bio_model = load_description(any_torch_model) out_path = tmp_path / "weights.pt" - util = Pytorch2Torchscipt() - ret_val = util.convert(bio_model, out_path) + ret_val = convert(bio_model, out_path) assert out_path.exists() assert isinstance(ret_val, v0_5.TorchscriptWeightsDescr) assert ret_val.source == out_path def test_onnx_converter(convert_to_onnx, tmp_path): + from bioimageio.core.weight_converters.pytorch_to_onnx import convert + bio_model = load_description(convert_to_onnx) out_path = tmp_path / "weights.onnx" opset_version = 15 - util = Pytorch2Onnx() - ret_val = util.convert( + ret_val = convert( model_descr=bio_model, output_path=out_path, test_decimal=3, diff --git a/tests/utils.py b/tests/utils.py index 3a8e695b..805eecfa 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,3 +1,5 @@ +"""utils to test bioimageio.core""" + import os from functools import wraps from typing import Any, Protocol, Sequence, Type From 80f9ed0286da086cc4ee10c893f01979c46ed13e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 9 Dec 2024 14:49:15 +0100 Subject: [PATCH 028/187] update test_weight_converters.py --- tests/test_weight_converters.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/test_weight_converters.py b/tests/test_weight_converters.py index 71f6ccfa..57f40ce7 100644 --- a/tests/test_weight_converters.py +++ b/tests/test_weight_converters.py @@ -5,10 +5,7 @@ import pytest -from bioimageio.core.weight_converters import ( - Pytorch2Onnx, - Tensorflow2Bundled, -) +from bioimageio.core import test_model from bioimageio.spec import load_description from bioimageio.spec.model import v0_5 @@ -40,13 +37,17 @@ def test_onnx_converter(convert_to_onnx, tmp_path): assert isinstance(ret_val, v0_5.OnnxWeightsDescr) assert ret_val.opset_version == opset_version assert ret_val.source == out_path + bio_model.weights.onnx = ret_val + summary = test_model(bio_model, weights_format="onnx") + assert summary.status == "passed", summary.format() def test_tensorflow_converter(any_keras_model: Path, tmp_path: Path): + from bioimageio.core.weight_converters.keras_to_tensorflow import convert + model = load_description(any_keras_model) out_path = tmp_path / "weights.h5" - util = Tensorflow2Bundled() - ret_val = util.convert(model, out_path) + ret_val = convert(model, output_path=out_path) assert out_path.exists() assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) assert ret_val.source == out_path From dad81860c1f0d5f5b4304da96b773fd7c78924d8 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 9 Dec 2024 14:49:28 +0100 Subject: [PATCH 029/187] add test_bioimageio_collection.py --- tests/test_bioimageio_collection.py | 60 +++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 tests/test_bioimageio_collection.py diff --git a/tests/test_bioimageio_collection.py b/tests/test_bioimageio_collection.py new file mode 100644 index 00000000..2cf9ced0 --- /dev/null +++ b/tests/test_bioimageio_collection.py @@ -0,0 +1,60 @@ +from typing import Any, Collection, Dict, Iterable, Mapping, Tuple + +import pytest +import requests +from pydantic import HttpUrl + +from bioimageio.spec import InvalidDescr +from bioimageio.spec.common import Sha256 +from tests.utils import ParameterSet, expensive_test + +BASE_URL = "https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/" + + +def _get_latest_rdf_sources(): + entries: Any = requests.get(BASE_URL + "all_versions.json").json()["entries"] + ret: Dict[str, Tuple[HttpUrl, Sha256]] = {} + for entry in entries: + version = entry["versions"][0] + ret[f"{entry['concept']}/{version['v']}"] = ( + HttpUrl(version["source"]), # pyright: ignore[reportCallIssue] + Sha256(version["sha256"]), + ) + + return ret + + +ALL_LATEST_RDF_SOURCES: Mapping[str, Tuple[HttpUrl, Sha256]] = _get_latest_rdf_sources() + + +def yield_bioimageio_yaml_urls() -> Iterable[ParameterSet]: + for descr_url, sha in ALL_LATEST_RDF_SOURCES.values(): + key = ( + str(descr_url) + .replace(BASE_URL, "") + .replace("/files/rdf.yaml", "") + .replace("/files/bioimageio.yaml", "") + ) + yield pytest.param(descr_url, sha, key, id=key) + + +KNOWN_INVALID: Collection[str] = set() + + +@expensive_test +@pytest.mark.parametrize("descr_url,sha,key", list(yield_bioimageio_yaml_urls())) +def test_rdf( + descr_url: HttpUrl, + sha: Sha256, + key: str, +): + if key in KNOWN_INVALID: + pytest.skip("known failure") + + from bioimageio.core import load_description_and_test + + descr = load_description_and_test(descr_url, sha256=sha) + assert not isinstance(descr, InvalidDescr) + assert ( + descr.validation_summary.status == "passed" + ), descr.validation_summary.format() From e8354322f7ec6c29d89444a66128f1b35fae3dc7 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 9 Dec 2024 14:53:34 +0100 Subject: [PATCH 030/187] add onnx as dev dep --- dev/env-py38.yaml | 1 + dev/env-tf.yaml | 1 + dev/env-wo-python.yaml | 1 + dev/env.yaml | 1 + setup.py | 1 + 5 files changed, 5 insertions(+) diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 69030cc9..23286840 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -17,6 +17,7 @@ dependencies: - loguru - matplotlib - numpy + - onnx - onnxruntime - packaging>=17.0 - pdoc diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index 799d2a59..ac443f65 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -17,6 +17,7 @@ dependencies: - loguru - matplotlib - numpy + - onnx - onnxruntime - packaging>=17.0 - pdoc diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index a0b7c978..2a77d25b 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -17,6 +17,7 @@ dependencies: - loguru - matplotlib - numpy + - onnx - onnxruntime - packaging>=17.0 - pdoc diff --git a/dev/env.yaml b/dev/env.yaml index a65158d9..7ff6abed 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -16,6 +16,7 @@ dependencies: - loguru - matplotlib - numpy + - onnx - onnxruntime - packaging>=17.0 - pdoc diff --git a/setup.py b/setup.py index 95414371..c1a60e40 100644 --- a/setup.py +++ b/setup.py @@ -56,6 +56,7 @@ "jupyter", "jupyter-black", "matplotlib", + "onnx", "onnxruntime", "packaging>=17.0", "pre-commit", From 459696dc50defa81966528b38c20a214a485cf60 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 10 Dec 2024 10:31:56 +0100 Subject: [PATCH 031/187] add get_pre_and_postprocessing --- bioimageio/core/proc_setup.py | 34 +++++++++++++++++++ .../core/weight_converters/pytorch_to_onnx.py | 7 +++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/bioimageio/core/proc_setup.py b/bioimageio/core/proc_setup.py index b9afb711..89277da5 100644 --- a/bioimageio/core/proc_setup.py +++ b/bioimageio/core/proc_setup.py @@ -1,4 +1,5 @@ from typing import ( + Callable, Iterable, List, Mapping, @@ -45,6 +46,11 @@ class PreAndPostprocessing(NamedTuple): post: List[Processing] +class _ProcessingCallables(NamedTuple): + pre: Callable[[Sample], None] + post: Callable[[Sample], None] + + class _SetupProcessing(NamedTuple): pre: List[Processing] post: List[Processing] @@ -52,6 +58,34 @@ class _SetupProcessing(NamedTuple): post_measures: Set[Measure] +class _ApplyProcs: + def __init__(self, procs: Sequence[Processing]): + super().__init__() + self._procs = procs + + def __call__(self, sample: Sample) -> None: + for op in self._procs: + op(sample) + + +def get_pre_and_postprocessing( + model: AnyModelDescr, + *, + dataset_for_initial_statistics: Iterable[Sample], + keep_updating_initial_dataset_stats: bool = False, + fixed_dataset_stats: Optional[Mapping[DatasetMeasure, MeasureValue]] = None, +) -> _ProcessingCallables: + """Creates callables to apply pre- and postprocessing in-place to a sample""" + + setup = setup_pre_and_postprocessing( + model=model, + dataset_for_initial_statistics=dataset_for_initial_statistics, + keep_updating_initial_dataset_stats=keep_updating_initial_dataset_stats, + fixed_dataset_stats=fixed_dataset_stats, + ) + return _ProcessingCallables(_ApplyProcs(setup.pre), _ApplyProcs(setup.post)) + + def setup_pre_and_postprocessing( model: AnyModelDescr, dataset_for_initial_statistics: Iterable[Sample], diff --git a/bioimageio/core/weight_converters/pytorch_to_onnx.py b/bioimageio/core/weight_converters/pytorch_to_onnx.py index 9f2b2e6f..468e56ac 100644 --- a/bioimageio/core/weight_converters/pytorch_to_onnx.py +++ b/bioimageio/core/weight_converters/pytorch_to_onnx.py @@ -7,6 +7,7 @@ from bioimageio.core.backends.pytorch_backend import load_torch_model from bioimageio.core.digest_spec import get_member_id, get_test_inputs +from bioimageio.core.proc_setup import get_pre_and_postprocessing from bioimageio.spec.model import v0_4, v0_5 @@ -61,6 +62,10 @@ def convert( input_data = [ sample.members[get_member_id(ipt)].data.data for ipt in model_descr.inputs ] + procs = get_pre_and_postprocessing( + model_descr, dataset_for_initial_statistics=[sample] + ) + procs.pre(sample) input_tensors = [torch.from_numpy(ipt) for ipt in input_data] model = load_torch_model(state_dict_weights_descr) @@ -74,7 +79,7 @@ def convert( if use_tracing: _ = torch.onnx.export( model, - (tuple(input_tensors) if len(input_tensors) > 1 else input_tensors[0]), + tuple(input_tensors), str(output_path), verbose=verbose, opset_version=opset_version, From 169cf1760447dfd0e10c79cae29cf0b4d72c391c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 10 Dec 2024 10:36:30 +0100 Subject: [PATCH 032/187] use dim instead of deprecated dims arg name --- bioimageio/core/stat_calculators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/stat_calculators.py b/bioimageio/core/stat_calculators.py index 41233a5b..01d553e7 100644 --- a/bioimageio/core/stat_calculators.py +++ b/bioimageio/core/stat_calculators.py @@ -137,7 +137,7 @@ def compute( else: n = int(np.prod([tensor.sizes[d] for d in self._axes])) - var = xr.dot(c, c, dims=self._axes) / n + var = xr.dot(c, c, dim=self._axes) / n assert isinstance(var, xr.DataArray) std = np.sqrt(var) assert isinstance(std, xr.DataArray) From 7d8e7fc396e899ad693ffe8a7896fde626d41bfd Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 11 Dec 2024 10:56:43 +0100 Subject: [PATCH 033/187] update tests --- tests/test_weight_converters.py | 82 ++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 16 deletions(-) diff --git a/tests/test_weight_converters.py b/tests/test_weight_converters.py index 57f40ce7..d9e95ea8 100644 --- a/tests/test_weight_converters.py +++ b/tests/test_weight_converters.py @@ -10,25 +10,28 @@ from bioimageio.spec.model import v0_5 -def test_torchscript_converter(any_torch_model, tmp_path): +def test_pytorch_to_torchscript(any_torch_model, tmp_path): from bioimageio.core.weight_converters.pytorch_to_torchscript import convert - bio_model = load_description(any_torch_model) + model_descr = load_description(any_torch_model) out_path = tmp_path / "weights.pt" - ret_val = convert(bio_model, out_path) + ret_val = convert(model_descr, out_path) assert out_path.exists() assert isinstance(ret_val, v0_5.TorchscriptWeightsDescr) assert ret_val.source == out_path + model_descr.weights.torchscript = ret_val + summary = test_model(model_descr, weight_format="torchscript") + assert summary.status == "passed", summary.format() -def test_onnx_converter(convert_to_onnx, tmp_path): +def test_pytorch_to_onnx(convert_to_onnx, tmp_path): from bioimageio.core.weight_converters.pytorch_to_onnx import convert - bio_model = load_description(convert_to_onnx) + model_descr = load_description(convert_to_onnx) out_path = tmp_path / "weights.onnx" opset_version = 15 ret_val = convert( - model_descr=bio_model, + model_descr=model_descr, output_path=out_path, test_decimal=3, opset_version=opset_version, @@ -37,28 +40,34 @@ def test_onnx_converter(convert_to_onnx, tmp_path): assert isinstance(ret_val, v0_5.OnnxWeightsDescr) assert ret_val.opset_version == opset_version assert ret_val.source == out_path - bio_model.weights.onnx = ret_val - summary = test_model(bio_model, weights_format="onnx") + + model_descr.weights.onnx = ret_val + summary = test_model(model_descr, weight_format="onnx") assert summary.status == "passed", summary.format() -def test_tensorflow_converter(any_keras_model: Path, tmp_path: Path): +def test_keras_to_tensorflow(any_keras_model: Path, tmp_path: Path): from bioimageio.core.weight_converters.keras_to_tensorflow import convert - model = load_description(any_keras_model) - out_path = tmp_path / "weights.h5" - ret_val = convert(model, output_path=out_path) + model_descr = load_description(any_keras_model) + out_path = tmp_path / "weights" + ret_val = convert(model_descr, output_path=out_path) assert out_path.exists() assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) assert ret_val.source == out_path + model_descr.weights.keras = ret_val + summary = test_model(model_descr, weight_format="keras_hdf5") + assert summary.status == "passed", summary.format() + @pytest.mark.skip() -def test_tensorflow_converter_zipped(any_keras_model: Path, tmp_path: Path): +def test_keras_to_tensorflow_zipped(any_keras_model: Path, tmp_path: Path): + from bioimageio.core.weight_converters.keras_to_tensorflow import convert + out_path = tmp_path / "weights.zip" - model = load_description(any_keras_model) - util = Tensorflow2Bundled() - ret_val = util.convert(model, out_path) + model_descr = load_description(any_keras_model) + ret_val = convert(model_descr, out_path) assert out_path.exists() assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) @@ -67,3 +76,44 @@ def test_tensorflow_converter_zipped(any_keras_model: Path, tmp_path: Path): with zipfile.ZipFile(out_path, "r") as f: names = set([name for name in f.namelist()]) assert len(expected_names - names) == 0 + + model_descr.weights.keras = ret_val + summary = test_model(model_descr, weight_format="keras_hdf5") + assert summary.status == "passed", summary.format() + + +# TODO: add tensorflow_to_keras converter +# def test_tensorflow_to_keras(any_tensorflow_model: Path, tmp_path: Path): +# from bioimageio.core.weight_converters.tensorflow_to_keras import convert + +# model_descr = load_description(any_tensorflow_model) +# out_path = tmp_path / "weights.h5" +# ret_val = convert(model_descr, output_path=out_path) +# assert out_path.exists() +# assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) +# assert ret_val.source == out_path + +# model_descr.weights.keras = ret_val +# summary = test_model(model_descr, weight_format="keras_hdf5") +# assert summary.status == "passed", summary.format() + + +# @pytest.mark.skip() +# def test_tensorflow_to_keras_zipped(any_tensorflow_model: Path, tmp_path: Path): +# from bioimageio.core.weight_converters.tensorflow_to_keras import convert + +# out_path = tmp_path / "weights.zip" +# model_descr = load_description(any_tensorflow_model) +# ret_val = convert(model_descr, out_path) + +# assert out_path.exists() +# assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) + +# expected_names = {"saved_model.pb", "variables/variables.index"} +# with zipfile.ZipFile(out_path, "r") as f: +# names = set([name for name in f.namelist()]) +# assert len(expected_names - names) == 0 + +# model_descr.weights.keras = ret_val +# summary = test_model(model_descr, weight_format="keras_hdf5") +# assert summary.status == "passed", summary.format() From 8b2727e00a2a789da3c0e97676008e31263adfdd Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 11 Dec 2024 15:29:36 +0100 Subject: [PATCH 034/187] add todo --- bioimageio/core/digest_spec.py | 2 +- bioimageio/core/stat_calculators.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index 854e6a7c..441243c6 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -339,7 +339,7 @@ def create_sample_for_model( sample_id: SampleId = None, inputs: Optional[ PerMember[Union[Tensor, xr.DataArray, NDArray[Any], Path]] - ] = None, # TODO: make non-optional + ] = None, # TODO: make non-optional # TODO: accept tuple of tensor sources **kwargs: NDArray[Any], # TODO: deprecate in favor of `inputs` ) -> Sample: """Create a sample from a single set of input(s) for a specific bioimage.io model diff --git a/bioimageio/core/stat_calculators.py b/bioimageio/core/stat_calculators.py index 01d553e7..f3aa8dcd 100644 --- a/bioimageio/core/stat_calculators.py +++ b/bioimageio/core/stat_calculators.py @@ -306,7 +306,8 @@ def _initialize(self, tensor_sizes: PerAxis[int]): out_sizes[d] = s self._dims, self._shape = zip(*out_sizes.items()) - d = int(np.prod(self._shape[1:])) # type: ignore + assert self._shape is not None + d = int(np.prod(self._shape[1:])) self._digest = [TDigest() for _ in range(d)] self._indices = product(*map(range, self._shape[1:])) From 02252acc542d8edc90ef4a08610eca14c13402ae Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 11 Dec 2024 15:30:33 +0100 Subject: [PATCH 035/187] udpate pytorch_to_onnx converter --- .../core/weight_converters/pytorch_to_onnx.py | 63 +++++++++---------- tests/test_weight_converters.py | 3 +- 2 files changed, 32 insertions(+), 34 deletions(-) diff --git a/bioimageio/core/weight_converters/pytorch_to_onnx.py b/bioimageio/core/weight_converters/pytorch_to_onnx.py index 468e56ac..71ac17a6 100644 --- a/bioimageio/core/weight_converters/pytorch_to_onnx.py +++ b/bioimageio/core/weight_converters/pytorch_to_onnx.py @@ -3,7 +3,7 @@ import numpy as np import torch -from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_allclose from bioimageio.core.backends.pytorch_backend import load_torch_model from bioimageio.core.digest_spec import get_member_id, get_test_inputs @@ -16,7 +16,8 @@ def convert( *, output_path: Path, use_tracing: bool = True, - test_decimal: int = 4, + relative_tolerance: float = 1e-07, + absolute_tolerance: float = 0, verbose: bool = False, opset_version: int = 15, ) -> v0_5.OnnxWeightsDescr: @@ -31,10 +32,6 @@ def convert( The file path where the ONNX model will be saved. use_tracing (bool, optional): Whether to use tracing or scripting to export the ONNX format. Defaults to True. - test_decimal (int, optional): - The decimal precision for comparing the results between the original and converted models. - This is used in the `assert_array_almost_equal` function to check if the outputs match. - Defaults to 4. verbose (bool, optional): If True, will print out detailed information during the ONNX export process. Defaults to False. opset_version (int, optional): @@ -57,29 +54,29 @@ def convert( "The provided model does not have weights in the pytorch state dict format" ) + sample = get_test_inputs(model_descr) + procs = get_pre_and_postprocessing( + model_descr, dataset_for_initial_statistics=[sample] + ) + procs.pre(sample) + inputs_numpy = [ + sample.members[get_member_id(ipt)].data.data for ipt in model_descr.inputs + ] + inputs_torch = [torch.from_numpy(ipt) for ipt in inputs_numpy] + model = load_torch_model(state_dict_weights_descr) with torch.no_grad(): - sample = get_test_inputs(model_descr) - input_data = [ - sample.members[get_member_id(ipt)].data.data for ipt in model_descr.inputs - ] - procs = get_pre_and_postprocessing( - model_descr, dataset_for_initial_statistics=[sample] - ) - procs.pre(sample) - input_tensors = [torch.from_numpy(ipt) for ipt in input_data] - model = load_torch_model(state_dict_weights_descr) + outputs_original_torch = model(*inputs_torch) + if isinstance(outputs_original_torch, torch.Tensor): + outputs_original_torch = [outputs_original_torch] - expected_tensors = model(*input_tensors) - if isinstance(expected_tensors, torch.Tensor): - expected_tensors = [expected_tensors] - expected_outputs: List[np.ndarray[Any, Any]] = [ - out.numpy() for out in expected_tensors + outputs_original: List[np.ndarray[Any, Any]] = [ + out.numpy() for out in outputs_original_torch ] if use_tracing: _ = torch.onnx.export( model, - tuple(input_tensors), + tuple(inputs_torch), str(output_path), verbose=verbose, opset_version=opset_version, @@ -98,22 +95,24 @@ def convert( sess = rt.InferenceSession(str(output_path)) onnx_input_node_args = cast( List[Any], sess.get_inputs() - ) # fixme: remove cast, try using rt.NodeArg instead of Any - onnx_inputs = { + ) # FIXME: remove cast, try using rt.NodeArg instead of Any + inputs_onnx = { input_name.name: inp - for input_name, inp in zip(onnx_input_node_args, input_data) + for input_name, inp in zip(onnx_input_node_args, inputs_numpy) } - outputs = cast( - Sequence[np.ndarray[Any, Any]], sess.run(None, onnx_inputs) + outputs_onnx = cast( + Sequence[np.ndarray[Any, Any]], sess.run(None, inputs_onnx) ) # FIXME: remove cast try: - for exp, out in zip(expected_outputs, outputs): - assert_array_almost_equal(exp, out, decimal=test_decimal) + for out_original, out_onnx in zip(outputs_original, outputs_onnx): + assert_allclose( + out_original, out_onnx, rtol=relative_tolerance, atol=absolute_tolerance + ) except AssertionError as e: - raise ValueError( - f"Results before and after weights conversion do not agree:\n {str(e)}" - ) + raise AssertionError( + "Inference results of using original and converted weights do not match" + ) from e return v0_5.OnnxWeightsDescr( source=output_path, parent="pytorch_state_dict", opset_version=opset_version diff --git a/tests/test_weight_converters.py b/tests/test_weight_converters.py index d9e95ea8..24d2b9cb 100644 --- a/tests/test_weight_converters.py +++ b/tests/test_weight_converters.py @@ -27,13 +27,12 @@ def test_pytorch_to_torchscript(any_torch_model, tmp_path): def test_pytorch_to_onnx(convert_to_onnx, tmp_path): from bioimageio.core.weight_converters.pytorch_to_onnx import convert - model_descr = load_description(convert_to_onnx) + model_descr = load_description(convert_to_onnx, format_version="latest") out_path = tmp_path / "weights.onnx" opset_version = 15 ret_val = convert( model_descr=model_descr, output_path=out_path, - test_decimal=3, opset_version=opset_version, ) assert os.path.exists(out_path) From ab8616f2db4244af9f55e48c0434157af901cc72 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 18 Dec 2024 16:45:40 +0100 Subject: [PATCH 036/187] expose determinism to cli test command --- bioimageio/core/cli.py | 4 ++++ bioimageio/core/commands.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 1fc95310..49700b43 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -149,6 +149,9 @@ class TestCmd(CmdBase, WithSource): summary_path: Optional[Path] = Field(None, alias="summary-path") """Path to save validation summary as JSON file.""" + determinism: Literal["seed_only", "full"] = "seed_only" + """Modes to improve reproducibility of test outputs.""" + def run(self): sys.exit( test( @@ -158,6 +161,7 @@ def run(self): decimal=self.decimal, summary_path=self.summary_path, runtime_env=self.runtime_env, + determinism=self.determinism, ) ) diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 9804a93e..92d7ddbc 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -30,6 +30,7 @@ def test( runtime_env: Union[ Literal["currently-active", "as-described"], Path ] = "currently-active", + determinism: Literal["seed_only", "full"] = "seed_only", ) -> int: """Test a bioimageio resource. @@ -45,6 +46,7 @@ def test( devices=[devices] if isinstance(devices, str) else devices, decimal=decimal, runtime_env=runtime_env, + determinism=determinism, ) summary.display() if summary_path is not None: From f11b42883c8eba8c993134d3fdbae58d5ba9b78b Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 18 Dec 2024 16:48:44 +0100 Subject: [PATCH 037/187] WIP unify model adapters --- bioimageio/core/_prediction_pipeline.py | 20 +- bioimageio/core/backends/_model_adapter.py | 129 +++++++++--- bioimageio/core/backends/onnx_backend.py | 7 +- bioimageio/core/backends/pytorch_backend.py | 81 ++++---- .../core/backends/tensorflow_backend.py | 189 +++++++++--------- .../core/backends/torchscript_backend.py | 4 - 6 files changed, 242 insertions(+), 188 deletions(-) diff --git a/bioimageio/core/_prediction_pipeline.py b/bioimageio/core/_prediction_pipeline.py index 9f5ccc3e..33fd4f33 100644 --- a/bioimageio/core/_prediction_pipeline.py +++ b/bioimageio/core/_prediction_pipeline.py @@ -121,19 +121,9 @@ def predict_sample_block( self.apply_preprocessing(sample_block) output_meta = sample_block.get_transformed_meta(self._block_transform) - output = output_meta.with_data( - { - tid: out - for tid, out in zip( - self._output_ids, - self._adapter.forward( - *(sample_block.members.get(t) for t in self._input_ids) - ), - ) - if out is not None - }, - stat=sample_block.stat, - ) + local_output = self._adapter.forward(sample_block) + + output = output_meta.with_data(local_output.members, stat=local_output.stat) if not skip_postprocessing: self.apply_postprocessing(output) @@ -157,9 +147,7 @@ def predict_sample_without_blocking( out_id: out for out_id, out in zip( self._output_ids, - self._adapter.forward( - *(sample.members.get(in_id) for in_id in self._input_ids) - ), + self._adapter.forward(sample), ) if out is not None }, diff --git a/bioimageio/core/backends/_model_adapter.py b/bioimageio/core/backends/_model_adapter.py index 99919aac..6321a8df 100644 --- a/bioimageio/core/backends/_model_adapter.py +++ b/bioimageio/core/backends/_model_adapter.py @@ -1,16 +1,36 @@ import warnings from abc import ABC, abstractmethod -from typing import List, Optional, Sequence, Tuple, Union, final +from typing import ( + Any, + List, + Literal, + Optional, + Sequence, + Tuple, + Union, + assert_never, + final, +) + +from numpy.typing import NDArray -from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.core.digest_spec import get_axes_infos, get_member_ids +from bioimageio.core.sample import Sample +from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 from ..tensor import Tensor -WeightsFormat = Union[v0_4.WeightsFormat, v0_5.WeightsFormat] +SupportedWeightsFormat = Literal[ + "keras_hdf5", + "onnx", + "pytorch_state_dict", + "tensorflow_saved_model_bundle", + "torchscript", +] # Known weight formats in order of priority # First match wins -DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER: Tuple[WeightsFormat, ...] = ( +DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER: Tuple[SupportedWeightsFormat, ...] = ( "pytorch_state_dict", "tensorflow_saved_model_bundle", "torchscript", @@ -39,6 +59,22 @@ class ModelAdapter(ABC): ``` """ + def __init__(self, model_description: AnyModelDescr): + super().__init__() + self._model_descr = model_description + self._input_ids = get_member_ids(model_description.inputs) + self._output_ids = get_member_ids(model_description.outputs) + self._input_axes = [ + tuple(a.id for a in get_axes_infos(t)) for t in model_description.inputs + ] + self._output_axes = [ + tuple(a.id for a in get_axes_infos(t)) for t in model_description.outputs + ] + if isinstance(model_description, v0_4.ModelDescr): + self._input_is_optional = [False] * len(model_description.inputs) + else: + self._input_is_optional = [ipt.optional for ipt in model_description.inputs] + @final @classmethod def create( @@ -46,7 +82,7 @@ def create( model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], *, devices: Optional[Sequence[str]] = None, - weight_format_priority_order: Optional[Sequence[WeightsFormat]] = None, + weight_format_priority_order: Optional[Sequence[SupportedWeightsFormat]] = None, ): """ Creates model adapter based on the passed spec @@ -59,42 +95,44 @@ def create( ) weights = model_description.weights - errors: List[Tuple[WeightsFormat, Exception]] = [] + errors: List[Tuple[SupportedWeightsFormat, Exception]] = [] weight_format_priority_order = ( DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER if weight_format_priority_order is None else weight_format_priority_order ) # limit weight formats to the ones present - weight_format_priority_order = [ + weight_format_priority_order_present: Sequence[SupportedWeightsFormat] = [ w for w in weight_format_priority_order if getattr(weights, w) is not None ] + if not weight_format_priority_order_present: + raise ValueError( + f"None of the specified weight formats ({weight_format_priority_order}) is present ({weight_format_priority_order_present})" + ) - for wf in weight_format_priority_order: - if wf == "pytorch_state_dict" and weights.pytorch_state_dict is not None: + for wf in weight_format_priority_order_present: + if wf == "pytorch_state_dict": + assert weights.pytorch_state_dict is not None try: from .pytorch_backend import PytorchModelAdapter return PytorchModelAdapter( - outputs=model_description.outputs, - weights=weights.pytorch_state_dict, - devices=devices, + model_description=model_description, devices=devices ) except Exception as e: errors.append((wf, e)) - elif ( - wf == "tensorflow_saved_model_bundle" - and weights.tensorflow_saved_model_bundle is not None - ): + elif wf == "tensorflow_saved_model_bundle": + assert weights.tensorflow_saved_model_bundle is not None try: - from .tensorflow_backend import TensorflowModelAdapter + from .tensorflow_backend import create_tf_model_adapter - return TensorflowModelAdapter( + return create_tf_model_adapter( model_description=model_description, devices=devices ) except Exception as e: errors.append((wf, e)) - elif wf == "onnx" and weights.onnx is not None: + elif wf == "onnx": + assert weights.onnx is not None try: from .onnx_backend import ONNXModelAdapter @@ -103,7 +141,8 @@ def create( ) except Exception as e: errors.append((wf, e)) - elif wf == "torchscript" and weights.torchscript is not None: + elif wf == "torchscript": + assert weights.torchscript is not None try: from .torchscript_backend import TorchscriptModelAdapter @@ -112,7 +151,8 @@ def create( ) except Exception as e: errors.append((wf, e)) - elif wf == "keras_hdf5" and weights.keras_hdf5 is not None: + elif wf == "keras_hdf5": + assert weights.keras_hdf5 is not None # keras can either be installed as a separate package or used as part of tensorflow # we try to first import the keras model adapter using the separate package and, # if it is not available, try to load the one using tf @@ -127,6 +167,8 @@ def create( ) except Exception as e: errors.append((wf, e)) + else: + assert_never(wf) assert errors if len(weight_format_priority_order) == 1: @@ -150,12 +192,48 @@ def create( def load(self, *, devices: Optional[Sequence[str]] = None) -> None: warnings.warn("Deprecated. ModelAdapter is loaded on initialization") - @abstractmethod - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: + def forward(self, input_sample: Sample) -> Sample: """ Run forward pass of model to get model predictions + + Note: sample id and stample stat attributes are passed through """ - # TODO: handle tensor.transpose in here and make _forward_impl the abstract impl + unexpected = [mid for mid in input_sample.members if mid not in self._input_ids] + if unexpected: + warnings.warn(f"Got unexpected input tensor IDs: {unexpected}") + + input_arrays = [ + ( + None + if (a := input_sample.members.get(in_id)) is None + else a.transpose(in_order).data.data + ) + for in_id, in_order in zip(self._input_ids, self._input_axes) + ] + output_arrays = self._forward_impl(input_arrays) + assert len(output_arrays) <= len(self._output_ids) + output_tensors = [ + None if a is None else Tensor(a, dims=d) + for a, d in zip(output_arrays, self._output_axes) + ] + return Sample( + members={ + tid: out + for tid, out in zip( + self._output_ids, + output_tensors, + ) + if out is not None + }, + stat=input_sample.stat, + id=input_sample.id, + ) + + @abstractmethod + def _forward_impl( + self, input_arrays: Sequence[Optional[NDArray[Any]]] + ) -> List[Optional[NDArray[Any]]]: + """framework specific forward implementation""" @abstractmethod def unload(self): @@ -164,5 +242,8 @@ def unload(self): The moder adapter should be considered unusable afterwards. """ + def _get_input_args_numpy(self, input_sample: Sample): + """helper to extract tensor args as transposed numpy arrays""" + create_model_adapter = ModelAdapter.create diff --git a/bioimageio/core/backends/onnx_backend.py b/bioimageio/core/backends/onnx_backend.py index 21bbcc09..8e983475 100644 --- a/bioimageio/core/backends/onnx_backend.py +++ b/bioimageio/core/backends/onnx_backend.py @@ -19,11 +19,8 @@ def __init__( model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], devices: Optional[Sequence[str]] = None, ): - super().__init__() - self._internal_output_axes = [ - tuple(a.id for a in get_axes_infos(out)) - for out in model_description.outputs - ] + super().__init__(model_description=model_description) + if model_description.weights.onnx is None: raise ValueError("No ONNX weights specified for {model_description.name}") diff --git a/bioimageio/core/backends/pytorch_backend.py b/bioimageio/core/backends/pytorch_backend.py index 74e59f30..d054ad95 100644 --- a/bioimageio/core/backends/pytorch_backend.py +++ b/bioimageio/core/backends/pytorch_backend.py @@ -3,19 +3,20 @@ from contextlib import nullcontext from io import TextIOWrapper from pathlib import Path -from typing import Any, List, Literal, Optional, Sequence, Tuple, Union +from typing import Any, List, Literal, Optional, Sequence, Union import torch from loguru import logger +from numpy.typing import NDArray from torch import nn from typing_extensions import assert_never +from bioimageio.spec._internal.type_guards import is_list, is_ndarray, is_tuple from bioimageio.spec.common import ZipPath -from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 from bioimageio.spec.utils import download -from ..digest_spec import get_axes_infos, import_callable -from ..tensor import Tensor +from ..digest_spec import import_callable from ._model_adapter import ModelAdapter @@ -23,17 +24,15 @@ class PytorchModelAdapter(ModelAdapter): def __init__( self, *, - outputs: Union[ - Sequence[v0_4.OutputTensorDescr], Sequence[v0_5.OutputTensorDescr] - ], - weights: Union[ - v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr - ], + model_description: AnyModelDescr, devices: Optional[Sequence[Union[str, torch.device]]] = None, mode: Literal["eval", "train"] = "eval", ): - super().__init__() - self.output_dims = [tuple(a.id for a in get_axes_infos(out)) for out in outputs] + super().__init__(model_description=model_description) + weights = model_description.weights.pytorch_state_dict + if weights is None: + raise ValueError("No `pytorch_state_dict` weights found") + devices = get_devices(devices) self._model = load_torch_model(weights, load_state=True, devices=devices) if mode == "eval": @@ -46,7 +45,14 @@ def __init__( self._mode: Literal["eval", "train"] = mode self._primary_device = devices[0] - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: + def _forward_impl( + self, input_arrays: Sequence[NDArray[Any] | None] + ) -> List[Optional[NDArray[Any]]]: + tensors = [ + None if a is None else torch.from_numpy(a).to(self._primary_device) + for a in input_arrays + ] + if self._mode == "eval": ctxt = torch.no_grad elif self._mode == "train": @@ -55,35 +61,26 @@ def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: assert_never(self._mode) with ctxt(): - tensors = [ - None if ipt is None else torch.from_numpy(ipt.data.data) - for ipt in input_tensors - ] - tensors = [ - (None if t is None else t.to(self._primary_device)) for t in tensors - ] - result: Union[Tuple[Any, ...], List[Any], Any] - result = self._model(*tensors) - if not isinstance(result, (tuple, list)): - result = [result] - - result = [ - ( - None - if r is None - else r.detach().cpu().numpy() if isinstance(r, torch.Tensor) else r - ) - for r in result # pyright: ignore[reportUnknownVariableType] - ] - if len(result) > len(self.output_dims): - raise ValueError( - f"Expected at most {len(self.output_dims)} outputs, but got {len(result)}" - ) - - return [ - None if r is None else Tensor(r, dims=out) - for r, out in zip(result, self.output_dims) - ] + model_out = self._model(*tensors) + + if is_tuple(model_out) or is_list(model_out): + model_out_seq = model_out + else: + model_out_seq = model_out = [model_out] + + result: List[Optional[NDArray[Any]]] = [] + for i, r in enumerate(model_out_seq): + if r is None: + result.append(None) + elif isinstance(r, torch.Tensor): + r_np: NDArray[Any] = r.detach().cpu().numpy() + result.append(r_np) + elif is_ndarray(r): + result.append(r) + else: + raise TypeError(f"Model output[{i}] has unexpected type {type(r)}.") + + return result def unload(self) -> None: del self._model diff --git a/bioimageio/core/backends/tensorflow_backend.py b/bioimageio/core/backends/tensorflow_backend.py index 94a8165f..37d85812 100644 --- a/bioimageio/core/backends/tensorflow_backend.py +++ b/bioimageio/core/backends/tensorflow_backend.py @@ -1,16 +1,15 @@ from pathlib import Path -from typing import List, Literal, Optional, Sequence, Union +from typing import Any, List, Literal, Optional, Sequence, Union import numpy as np import tensorflow as tf from loguru import logger +from numpy.typing import NDArray from bioimageio.core.io import ensure_unzipped from bioimageio.spec.common import FileSource -from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 -from ..digest_spec import get_axes_infos -from ..tensor import Tensor from ._model_adapter import ModelAdapter @@ -29,8 +28,7 @@ def __init__( ], model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], ): - super().__init__() - self.model_description = model_description + super().__init__(model_description=model_description) tf_version = v0_5.Version(tf.__version__) model_tf_version = weights.tensorflow_version if model_tf_version is None: @@ -66,18 +64,37 @@ def __init__( weight_file = ensure_unzipped( weights.source, Path("bioimageio_unzipped_tf_weights") ) - self._network = self._get_network(weight_file) - self._internal_output_axes = [ - tuple(a.id for a in get_axes_infos(out)) - for out in model_description.outputs - ] - def _get_network( # pyright: ignore[reportUnknownParameterType] - self, weight_file: FileSource + def unload(self) -> None: + logger.warning( + "Device management is not implemented for keras yet, cannot unload model" + ) + + +class TensorflowModelAdapter(ModelAdapter): + weight_format = "tensorflow_saved_model_bundle" + + def __init__( + self, + *, + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + devices: Optional[Sequence[str]] = None, ): + super().__init__(model_description=model_description) + if devices is not None: + logger.warning( + f"Device management is not implemented for tensorflow yet, ignoring the devices {devices}" + ) + weight_file = ensure_unzipped( - weight_file, Path("bioimageio_unzipped_tf_weights") + weights.source, Path("bioimageio_unzipped_tf_weights") ) + self._network = str(weight_file) + + def _get_network( # pyright: ignore[reportUnknownParameterType] + self, weight_file: FileSource + ): + assert tf is not None if self.use_keras_api: try: @@ -97,22 +114,14 @@ def _get_network( # pyright: ignore[reportUnknownParameterType] raise e else: # NOTE in tf1 the model needs to be loaded inside of the session, so we cannot preload the model - return str(weight_file) + return # TODO currently we relaod the model every time. it would be better to keep the graph and session # alive in between of forward passes (but then the sessions need to be properly opened / closed) - def _forward_tf( # pyright: ignore[reportUnknownParameterType] - self, *input_tensors: Optional[Tensor] + def _forward_impl( # pyright: ignore[reportUnknownParameterType] + self, input_arrays: Sequence[Optional[NDArray[Any]]] ): assert tf is not None - input_keys = [ - ipt.name if isinstance(ipt, v0_4.InputTensorDescr) else ipt.id - for ipt in self.model_description.inputs - ] - output_keys = [ - out.name if isinstance(out, v0_4.OutputTensorDescr) else out.id - for out in self.model_description.outputs - ] # TODO read from spec tag = ( # pyright: ignore[reportUnknownVariableType] tf.saved_model.tag_constants.SERVING # pyright: ignore[reportAttributeAccessIssue] @@ -136,18 +145,19 @@ def _forward_tf( # pyright: ignore[reportUnknownParameterType] # get the tensors into the graph in_names = [ # pyright: ignore[reportUnknownVariableType] - signature[signature_key].inputs[key].name for key in input_keys + signature[signature_key].inputs[key].name for key in self._input_ids ] out_names = [ # pyright: ignore[reportUnknownVariableType] - signature[signature_key].outputs[key].name for key in output_keys + signature[signature_key].outputs[key].name + for key in self._output_ids ] - in_tensors = [ + in_tf_tensors = [ graph.get_tensor_by_name( name # pyright: ignore[reportUnknownArgumentType] ) for name in in_names # pyright: ignore[reportUnknownVariableType] ] - out_tensors = [ + out_tf_tensors = [ graph.get_tensor_by_name( name # pyright: ignore[reportUnknownArgumentType] ) @@ -159,15 +169,10 @@ def _forward_tf( # pyright: ignore[reportUnknownParameterType] dict( zip( out_names, # pyright: ignore[reportUnknownArgumentType] - out_tensors, - ) - ), - dict( - zip( - in_tensors, - [None if t is None else t.data for t in input_tensors], + out_tf_tensors, ) ), + dict(zip(in_tf_tensors, input_arrays)), ) # from dict to list of tensors res = [ # pyright: ignore[reportUnknownVariableType] @@ -177,14 +182,29 @@ def _forward_tf( # pyright: ignore[reportUnknownParameterType] return res # pyright: ignore[reportUnknownVariableType] - def _forward_keras( # pyright: ignore[reportUnknownParameterType] - self, *input_tensors: Optional[Tensor] + +class KerasModelAdapter(ModelAdapter): + def __init__( + self, + *, + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + devices: Optional[Sequence[str]] = None, + ): + if model_description.weights.tensorflow_saved_model_bundle is None: + raise ValueError("No `tensorflow_saved_model_bundle` weights found") + + super().__init__(model_description=model_description) + if devices is not None: + logger.warning( + f"Device management is not implemented for tensorflow yet, ignoring the devices {devices}" + ) + + def _forward_impl( # pyright: ignore[reportUnknownParameterType] + self, input_arrays: Sequence[Optional[NDArray[Any]]] ): - assert self.use_keras_api - assert not isinstance(self._network, str) assert tf is not None tf_tensor = [ - None if ipt is None else tf.convert_to_tensor(ipt) for ipt in input_tensors + None if ipt is None else tf.convert_to_tensor(ipt) for ipt in input_arrays ] result = self._network(*tf_tensor) # pyright: ignore[reportUnknownVariableType] @@ -201,67 +221,42 @@ def _forward_keras( # pyright: ignore[reportUnknownParameterType] for r in result # pyright: ignore[reportUnknownVariableType] ] - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - if self.use_keras_api: - result = self._forward_keras( # pyright: ignore[reportUnknownVariableType] - *input_tensors - ) - else: - result = self._forward_tf( # pyright: ignore[reportUnknownVariableType] - *input_tensors - ) - return [ - ( - None - if r is None - else Tensor(r, dims=axes) # pyright: ignore[reportUnknownArgumentType] - ) - for r, axes in zip( # pyright: ignore[reportUnknownVariableType] - result, # pyright: ignore[reportUnknownArgumentType] - self._internal_output_axes, - ) - ] +def create_tf_model_adapter( + model_description: AnyModelDescr, devices: Optional[Sequence[str]] +): + tf_version = v0_5.Version(tf.__version__) + weights = model_description.weights.tensorflow_saved_model_bundle + if weights is None: + raise ValueError("No `tensorflow_saved_model_bundle` weights found") - def unload(self) -> None: + model_tf_version = weights.tensorflow_version + if model_tf_version is None: logger.warning( - "Device management is not implemented for keras yet, cannot unload model" + "The model does not specify the tensorflow version." + + f"Cannot check if it is compatible with intalled tensorflow {tf_version}." ) - - -class TensorflowModelAdapter(TensorflowModelAdapterBase): - weight_format = "tensorflow_saved_model_bundle" - - def __init__( - self, - *, - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - devices: Optional[Sequence[str]] = None, - ): - if model_description.weights.tensorflow_saved_model_bundle is None: - raise ValueError("missing tensorflow_saved_model_bundle weights") - - super().__init__( - devices=devices, - weights=model_description.weights.tensorflow_saved_model_bundle, - model_description=model_description, + elif model_tf_version > tf_version: + logger.warning( + f"The model specifies a newer tensorflow version than installed: {model_tf_version} > {tf_version}." ) - - -class KerasModelAdapter(TensorflowModelAdapterBase): - weight_format = "keras_hdf5" - - def __init__( - self, - *, - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - devices: Optional[Sequence[str]] = None, + elif (model_tf_version.major, model_tf_version.minor) != ( + tf_version.major, + tf_version.minor, ): - if model_description.weights.keras_hdf5 is None: - raise ValueError("missing keras_hdf5 weights") + logger.warning( + "The tensorflow version specified by the model does not match the installed: " + + f"{model_tf_version} != {tf_version}." + ) - super().__init__( - model_description=model_description, - devices=devices, - weights=model_description.weights.keras_hdf5, + if tf_version.major <= 1: + return TensorflowModelAdapter( + model_description=model_description, devices=devices ) + else: + return KerasModelAdapter(model_description=model_description, devices=devices) + + # TODO: check how to load tf weights without unzipping + weight_file = ensure_unzipped( + weights.source, Path("bioimageio_unzipped_tf_weights") + ) diff --git a/bioimageio/core/backends/torchscript_backend.py b/bioimageio/core/backends/torchscript_backend.py index d1882180..b0419813 100644 --- a/bioimageio/core/backends/torchscript_backend.py +++ b/bioimageio/core/backends/torchscript_backend.py @@ -40,10 +40,6 @@ def __init__( self._model = torch.jit.load(weight_path) self._model.to(self.devices[0]) self._model = self._model.eval() - self._internal_output_axes = [ - tuple(a.id for a in get_axes_infos(out)) - for out in model_description.outputs - ] def forward(self, *batch: Optional[Tensor]) -> List[Optional[Tensor]]: with torch.no_grad(): From e5bbe7a76273c595ee6a726b8595b5148e3ac2ef Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 19 Dec 2024 09:19:58 +0100 Subject: [PATCH 038/187] fix TorchscriptModelAdapter --- bioimageio/core/backends/pytorch_backend.py | 5 +-- .../core/backends/torchscript_backend.py | 34 ++++++++----------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/bioimageio/core/backends/pytorch_backend.py b/bioimageio/core/backends/pytorch_backend.py index d054ad95..a7fecfb7 100644 --- a/bioimageio/core/backends/pytorch_backend.py +++ b/bioimageio/core/backends/pytorch_backend.py @@ -139,11 +139,12 @@ def load_torch_state_dict( state = torch.load(f, map_location=devices[0]) incompatible = model.load_state_dict(state) - if incompatible.missing_keys: + if incompatible is not None and incompatible.missing_keys: logger.warning("Missing state dict keys: {}", incompatible.missing_keys) - if incompatible.unexpected_keys: + if incompatible is not None and incompatible.unexpected_keys: logger.warning("Unexpected state dict keys: {}", incompatible.unexpected_keys) + return model diff --git a/bioimageio/core/backends/torchscript_backend.py b/bioimageio/core/backends/torchscript_backend.py index b0419813..26924e3c 100644 --- a/bioimageio/core/backends/torchscript_backend.py +++ b/bioimageio/core/backends/torchscript_backend.py @@ -3,14 +3,13 @@ from typing import Any, List, Optional, Sequence, Union import torch +from numpy.typing import NDArray -from bioimageio.spec._internal.type_guards import is_list, is_ndarray, is_tuple +from bioimageio.spec._internal.type_guards import is_list, is_tuple from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.utils import download -from ..digest_spec import get_axes_infos from ..model_adapters import ModelAdapter -from ..tensor import Tensor class TorchscriptModelAdapter(ModelAdapter): @@ -20,7 +19,7 @@ def __init__( model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], devices: Optional[Sequence[str]] = None, ): - super().__init__() + super().__init__(model_description=model_description) if model_description.weights.torchscript is None: raise ValueError( f"No torchscript weights found for model {model_description.name}" @@ -41,33 +40,30 @@ def __init__( self._model.to(self.devices[0]) self._model = self._model.eval() - def forward(self, *batch: Optional[Tensor]) -> List[Optional[Tensor]]: + def _forward_impl( + self, input_arrays: Sequence[Optional[NDArray[Any]]] + ) -> List[Optional[NDArray[Any]]]: + with torch.no_grad(): torch_tensor = [ - None if b is None else torch.from_numpy(b.data.data).to(self.devices[0]) - for b in batch + None if a is None else torch.from_numpy(a).to(self.devices[0]) + for a in input_arrays ] - _result: Any = self._model.forward(*torch_tensor) - if is_list(_result) or is_tuple(_result): - result: Sequence[Any] = _result + output: Any = self._model.forward(*torch_tensor) + if is_list(output) or is_tuple(output): + output_seq: Sequence[Any] = output else: - result = [_result] + output_seq = [output] - result = [ + return [ ( None if r is None else r.cpu().numpy() if isinstance(r, torch.Tensor) else r ) - for r in result + for r in output_seq ] - assert len(result) == len(self._internal_output_axes) - return [ - None if r is None else Tensor(r, dims=axes) if is_ndarray(r) else r - for r, axes in zip(result, self._internal_output_axes) - ] - def unload(self) -> None: self._devices = None del self._model From 3720e85059ff1c3ac9117f49f74d03eeec3f9958 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 19 Dec 2024 10:09:12 +0100 Subject: [PATCH 039/187] update predict_sample_without_blocking --- bioimageio/core/_prediction_pipeline.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/bioimageio/core/_prediction_pipeline.py b/bioimageio/core/_prediction_pipeline.py index 33fd4f33..5ef337a0 100644 --- a/bioimageio/core/_prediction_pipeline.py +++ b/bioimageio/core/_prediction_pipeline.py @@ -142,18 +142,7 @@ def predict_sample_without_blocking( if not skip_preprocessing: self.apply_preprocessing(sample) - output = Sample( - members={ - out_id: out - for out_id, out in zip( - self._output_ids, - self._adapter.forward(sample), - ) - if out is not None - }, - stat=sample.stat, - id=sample.id, - ) + output = self._adapter.forward(sample) if not skip_postprocessing: self.apply_postprocessing(output) From 76c27e9cb3dbbcf49376d153ecc13229a113aa9e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 19 Dec 2024 10:23:41 +0100 Subject: [PATCH 040/187] ensure batch and channel axes have standardized id --- bioimageio/core/axis.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bioimageio/core/axis.py b/bioimageio/core/axis.py index 34dfa3e1..14557b80 100644 --- a/bioimageio/core/axis.py +++ b/bioimageio/core/axis.py @@ -42,6 +42,12 @@ class Axis: id: AxisId type: Literal["batch", "channel", "index", "space", "time"] + def __post_init__(self): + if self.type == "batch": + self.id = AxisId("batch") + elif self.type == "channel": + self.id = AxisId("channel") + @classmethod def create(cls, axis: AxisLike) -> Axis: if isinstance(axis, cls): From 4cbfc5a71a9f87ebc74df213e2e245e5410adefd Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 20 Dec 2024 16:00:21 +0100 Subject: [PATCH 041/187] support validation context 'raise_errors' --- README.md | 5 +++-- bioimageio/core/_resource_tests.py | 7 +++++++ tests/test_resource_tests.py | 11 +++-------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2233ec97..9985207c 100644 --- a/README.md +++ b/README.md @@ -377,8 +377,9 @@ The model specification and its validation tools can be found at Date: Fri, 20 Dec 2024 16:25:16 +0100 Subject: [PATCH 042/187] fix ONNXModelAdapter --- bioimageio/core/backends/onnx_backend.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/bioimageio/core/backends/onnx_backend.py b/bioimageio/core/backends/onnx_backend.py index 8e983475..858b4cc1 100644 --- a/bioimageio/core/backends/onnx_backend.py +++ b/bioimageio/core/backends/onnx_backend.py @@ -2,6 +2,7 @@ from typing import Any, List, Optional, Sequence, Union import onnxruntime as rt +from numpy.typing import NDArray from bioimageio.spec._internal.type_guards import is_list, is_tuple from bioimageio.spec.model import v0_4, v0_5 @@ -35,9 +36,9 @@ def __init__( f"Device management is not implemented for onnx yet, ignoring the devices {devices}" ) - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - assert len(input_tensors) == len(self._input_names) - input_arrays = [None if ipt is None else ipt.data.data for ipt in input_tensors] + def _forward_impl( + self, input_arrays: Sequence[Optional[NDArray[Any]]] + ) -> List[Optional[NDArray[Any]]]: result: Any = self._session.run( None, dict(zip(self._input_names, input_arrays)) ) @@ -46,10 +47,7 @@ def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: else: result_seq = [result] - return [ - None if r is None else Tensor(r, dims=axes) - for r, axes in zip(result_seq, self._internal_output_axes) - ] + return result_seq # pyright: ignore[reportReturnType] def unload(self) -> None: warnings.warn( From 3b514f881d4e49209fb6098095286e215b74e632 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 20 Dec 2024 16:34:10 +0100 Subject: [PATCH 043/187] _get_axis_type ->_guess_axis_type user provided AxisIds might be uninterpretable --- bioimageio/core/axis.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/bioimageio/core/axis.py b/bioimageio/core/axis.py index 14557b80..30c0d281 100644 --- a/bioimageio/core/axis.py +++ b/bioimageio/core/axis.py @@ -8,19 +8,26 @@ from bioimageio.spec.model import v0_5 -def _get_axis_type(a: Literal["b", "t", "i", "c", "x", "y", "z"]): - if a == "b": +def _guess_axis_type(a: str): + if a in ("b", "batch"): return "batch" - elif a == "t": + elif a in ("t", "time"): return "time" - elif a == "i": + elif a in ("i", "index"): return "index" - elif a == "c": + elif a in ("c", "channel"): return "channel" elif a in ("x", "y", "z"): return "space" else: - return "index" # return most unspecific axis + raise ValueError( + f"Failed to infer axis type for axis id '{a}'." + + " Consider using one of: '" + + "', '".join( + ["b", "batch", "t", "time", "i", "index", "c", "channel", "x", "y", "z"] + ) + + "'. Or creating an `Axis` object instead." + ) S = TypeVar("S", bound=str) @@ -54,10 +61,10 @@ def create(cls, axis: AxisLike) -> Axis: return axis elif isinstance(axis, Axis): return Axis(id=axis.id, type=axis.type) - elif isinstance(axis, str): - return Axis(id=AxisId(axis), type=_get_axis_type(axis)) elif isinstance(axis, v0_5.AxisBase): return Axis(id=AxisId(axis.id), type=axis.type) + elif isinstance(axis, str): + return Axis(id=AxisId(axis), type=_guess_axis_type(axis)) else: assert_never(axis) From b26433112cc171ff7277d427e3a32d07560d1547 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 20 Dec 2024 16:34:28 +0100 Subject: [PATCH 044/187] fix get_axes_infos --- bioimageio/core/digest_spec.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index 441243c6..0d4800fb 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -35,7 +35,7 @@ ) from bioimageio.spec.utils import load_array -from .axis import AxisId, AxisInfo, AxisLike, PerAxis +from .axis import Axis, AxisId, AxisInfo, AxisLike, PerAxis from .block_meta import split_multiple_shapes_into_blocks from .common import Halo, MemberId, PerMember, SampleId, TotalNumberOfBlocks from .io import load_tensor @@ -104,14 +104,15 @@ def get_axes_infos( ], ) -> List[AxisInfo]: """get a unified, simplified axis representation from spec axes""" - return [ - ( - AxisInfo.create("i") - if isinstance(a, str) and a not in ("b", "i", "t", "c", "z", "y", "x") - else AxisInfo.create(a) - ) - for a in io_descr.axes - ] + ret: List[AxisInfo] = [] + for a in io_descr.axes: + if isinstance(a, v0_5.ANY_AXIS_TYPES): + ret.append(AxisInfo.create(Axis(id=a.id, type=a.type))) + else: + assert a in ("b", "i", "t", "c", "z", "y", "x") + ret.append(AxisInfo.create(a)) + + return ret def get_member_id( From 84f24fed86ba82ac4d3e61113ee7ae88c981a35d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 20 Dec 2024 16:34:39 +0100 Subject: [PATCH 045/187] bump pyright version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c1a60e40..d740a8e3 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,7 @@ "pre-commit", "pdoc", "psutil", # parallel pytest with 'pytest -n auto' - "pyright==1.1.390", + "pyright==1.1.391", "pytest-cov", "pytest-xdist", # parallel pytest "pytest", From 27ea9aa9aa483624ad1ebc28ec74c27e92081fa1 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 20 Dec 2024 16:34:52 +0100 Subject: [PATCH 046/187] add test cases --- tests/test_tensor.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/tests/test_tensor.py b/tests/test_tensor.py index 33163077..e00efe04 100644 --- a/tests/test_tensor.py +++ b/tests/test_tensor.py @@ -1,3 +1,5 @@ +from typing import Sequence + import numpy as np import pytest import xarray as xr @@ -8,9 +10,19 @@ @pytest.mark.parametrize( "axes", - ["yx", "xy", "cyx", "yxc", "bczyx", "xyz", "xyzc", "bzyxc"], + [ + "yx", + "xy", + "cyx", + "yxc", + "bczyx", + "xyz", + "xyzc", + "bzyxc", + ("batch", "channel", "x", "y"), + ], ) -def test_transpose_tensor_2d(axes: str): +def test_transpose_tensor_2d(axes: Sequence[str]): tensor = Tensor.from_numpy(np.random.rand(256, 256), dims=None) transposed = tensor.transpose([AxisId(a) for a in axes]) @@ -19,9 +31,18 @@ def test_transpose_tensor_2d(axes: str): @pytest.mark.parametrize( "axes", - ["zyx", "cyzx", "yzixc", "bczyx", "xyz", "xyzc", "bzyxtc"], + [ + "zyx", + "cyzx", + "yzixc", + "bczyx", + "xyz", + "xyzc", + "bzyxtc", + ("batch", "channel", "x", "y", "z"), + ], ) -def test_transpose_tensor_3d(axes: str): +def test_transpose_tensor_3d(axes: Sequence[str]): tensor = Tensor.from_numpy(np.random.rand(64, 64, 64), dims=None) transposed = tensor.transpose([AxisId(a) for a in axes]) assert transposed.ndim == len(axes) From de759d52bd9bdfe3772f6913fdf6404ef62aae57 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 7 Jan 2025 11:33:27 +0100 Subject: [PATCH 047/187] fix pip install with no-deps --- .github/workflows/build.yaml | 12 ++++++++++++ dev/env-py38.yaml | 2 +- dev/env-tf.yaml | 2 +- dev/env-wo-python.yaml | 2 +- dev/env.yaml | 2 +- 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 634820ad..3ed0f5df 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -42,6 +42,8 @@ jobs: create-args: >- python=${{ matrix.python-version }} post-cleanup: 'all' + env: + PIP_NO_DEPS: true - name: Install py3.8 environment if: matrix.python-version == '3.8' uses: mamba-org/setup-micromamba@v1 @@ -50,6 +52,8 @@ jobs: cache-environment: true environment-file: dev/env-py38.yaml post-cleanup: 'all' + env: + PIP_NO_DEPS: true - name: additional setup run: pip install --no-deps -e . - name: Get Date @@ -90,6 +94,8 @@ jobs: create-args: >- python=${{ matrix.python-version }} post-cleanup: 'all' + env: + PIP_NO_DEPS: true - name: Install py3.8 environment if: matrix.python-version == '3.8' uses: mamba-org/setup-micromamba@v1 @@ -98,6 +104,8 @@ jobs: cache-environment: true environment-file: dev/env-py38.yaml post-cleanup: 'all' + env: + PIP_NO_DEPS: true - name: additional setup spec run: | conda remove --yes --force bioimageio.spec || true # allow failure for cached env @@ -154,6 +162,8 @@ jobs: create-args: >- python=${{ matrix.python-version }} post-cleanup: 'all' + env: + PIP_NO_DEPS: true - name: additional setup spec run: | conda remove --yes --force bioimageio.spec || true # allow failure for cached env @@ -191,6 +201,8 @@ jobs: create-args: >- python=${{ matrix.python-version }} post-cleanup: 'all' + env: + PIP_NO_DEPS: true - name: additional setup run: pip install --no-deps -e . - name: Get Date diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 23286840..8d7e7ecf 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -42,4 +42,4 @@ dependencies: - typing-extensions - xarray - pip: - - -e --no-deps .. + - -e .. diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index ac443f65..c28c01aa 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -42,4 +42,4 @@ dependencies: - typing-extensions - xarray - pip: - - -e --no-deps .. + - -e .. diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index 2a77d25b..08c8968e 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -42,4 +42,4 @@ dependencies: - typing-extensions - xarray - pip: - - -e --no-deps .. + - -e .. diff --git a/dev/env.yaml b/dev/env.yaml index 7ff6abed..ef715090 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -41,4 +41,4 @@ dependencies: - typing-extensions - xarray - pip: - - -e --no-deps .. + - -e .. From d5abcfce17265f25b0131da923996191cfd4d53a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 10 Jan 2025 10:08:52 +0100 Subject: [PATCH 048/187] don't ignore model.v0_4.ScaleLinearKwargs with axes not implemented --- bioimageio/core/proc_ops.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/proc_ops.py b/bioimageio/core/proc_ops.py index eecf47b1..d6d59092 100644 --- a/bioimageio/core/proc_ops.py +++ b/bioimageio/core/proc_ops.py @@ -16,10 +16,11 @@ import xarray as xr from typing_extensions import Self, assert_never +from bioimageio.core.digest_spec import get_axes_infos from bioimageio.spec.model import v0_4, v0_5 from ._op_base import BlockedOperator, Operator -from .axis import AxisId, PerAxis +from .axis import AxisId, AxisInfo, PerAxis from .block import Block from .common import DTypeStr, MemberId from .sample import Sample, SampleBlock, SampleBlockWithOrigin @@ -299,9 +300,15 @@ def from_proc_descr( member_id: MemberId, ) -> Self: kwargs = descr.kwargs - if isinstance(kwargs, v0_5.ScaleLinearAlongAxisKwargs): + if isinstance(kwargs, v0_5.ScaleLinearKwargs): + axis = None + elif isinstance(kwargs, v0_5.ScaleLinearAlongAxisKwargs): axis = kwargs.axis - elif isinstance(kwargs, (v0_4.ScaleLinearKwargs, v0_5.ScaleLinearKwargs)): + elif isinstance(kwargs, v0_4.ScaleLinearKwargs): + if kwargs.axes is not None: + raise NotImplementedError( + "model.v0_4.ScaleLinearKwargs with axes not implemented, please consider updating the model to v0_5." + ) axis = None else: assert_never(kwargs) From c64ff1b8f9f4261d9fa8e2da67b6e08e542641e5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 13 Jan 2025 15:06:57 +0100 Subject: [PATCH 049/187] fix keras and tf backends --- bioimageio/core/backends/_model_adapter.py | 2 +- bioimageio/core/backends/keras_backend.py | 36 ++--- .../core/backends/tensorflow_backend.py | 134 ++++++------------ pyproject.toml | 2 +- 4 files changed, 54 insertions(+), 120 deletions(-) diff --git a/bioimageio/core/backends/_model_adapter.py b/bioimageio/core/backends/_model_adapter.py index 6321a8df..677a88f7 100644 --- a/bioimageio/core/backends/_model_adapter.py +++ b/bioimageio/core/backends/_model_adapter.py @@ -232,7 +232,7 @@ def forward(self, input_sample: Sample) -> Sample: @abstractmethod def _forward_impl( self, input_arrays: Sequence[Optional[NDArray[Any]]] - ) -> List[Optional[NDArray[Any]]]: + ) -> Union[List[Optional[NDArray[Any]]], Tuple[Optional[NDArray[Any]]]]: """framework specific forward implementation""" @abstractmethod diff --git a/bioimageio/core/backends/keras_backend.py b/bioimageio/core/backends/keras_backend.py index 35ee79fe..6ca603ad 100644 --- a/bioimageio/core/backends/keras_backend.py +++ b/bioimageio/core/backends/keras_backend.py @@ -1,16 +1,16 @@ import os -from typing import Any, List, Optional, Sequence, Union +from typing import Any, Optional, Sequence, Union from loguru import logger from numpy.typing import NDArray from bioimageio.spec._internal.io_utils import download +from bioimageio.spec._internal.type_guards import is_list, is_tuple from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.model.v0_5 import Version from .._settings import settings from ..digest_spec import get_axes_infos -from ..tensor import Tensor from ._model_adapter import ModelAdapter os.environ["KERAS_BACKEND"] = settings.keras_backend @@ -35,7 +35,7 @@ def __init__( model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], devices: Optional[Sequence[str]] = None, ) -> None: - super().__init__() + super().__init__(model_description=model_description) if model_description.weights.keras_hdf5 is None: raise ValueError("model has not keras_hdf5 weights specified") model_tf_version = model_description.weights.keras_hdf5.tensorflow_version @@ -73,30 +73,14 @@ def __init__( for out in model_description.outputs ] - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - _result: Union[Sequence[NDArray[Any]], NDArray[Any]] - _result = self._network.predict( # type: ignore - *[None if t is None else t.data.data for t in input_tensors] - ) - if isinstance(_result, (tuple, list)): - result = _result # pyright: ignore[reportUnknownVariableType] + def _forward_impl( # pyright: ignore[reportUnknownParameterType] + self, input_arrays: Sequence[Optional[NDArray[Any]]] + ): + network_output = self._network.predict(*input_arrays) # type: ignore + if is_list(network_output) or is_tuple(network_output): + return network_output else: - result = [_result] # type: ignore - - assert len(result) == len( # pyright: ignore[reportUnknownArgumentType] - self._output_axes - ) - ret: List[Optional[Tensor]] = [] - ret.extend( - [ - Tensor(r, dims=axes) # pyright: ignore[reportArgumentType] - for r, axes, in zip( # pyright: ignore[reportUnknownVariableType] - result, # pyright: ignore[reportUnknownArgumentType] - self._output_axes, - ) - ] - ) - return ret + return [network_output] # pyright: ignore[reportUnknownVariableType] def unload(self) -> None: logger.warning( diff --git a/bioimageio/core/backends/tensorflow_backend.py b/bioimageio/core/backends/tensorflow_backend.py index 37d85812..83fa4813 100644 --- a/bioimageio/core/backends/tensorflow_backend.py +++ b/bioimageio/core/backends/tensorflow_backend.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Any, List, Literal, Optional, Sequence, Union +from typing import Any, Optional, Sequence, Union import numpy as np import tensorflow as tf @@ -7,70 +7,11 @@ from numpy.typing import NDArray from bioimageio.core.io import ensure_unzipped -from bioimageio.spec.common import FileSource from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 from ._model_adapter import ModelAdapter -class TensorflowModelAdapterBase(ModelAdapter): - weight_format: Literal["keras_hdf5", "tensorflow_saved_model_bundle"] - - def __init__( - self, - *, - devices: Optional[Sequence[str]] = None, - weights: Union[ - v0_4.KerasHdf5WeightsDescr, - v0_4.TensorflowSavedModelBundleWeightsDescr, - v0_5.KerasHdf5WeightsDescr, - v0_5.TensorflowSavedModelBundleWeightsDescr, - ], - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - ): - super().__init__(model_description=model_description) - tf_version = v0_5.Version(tf.__version__) - model_tf_version = weights.tensorflow_version - if model_tf_version is None: - logger.warning( - "The model does not specify the tensorflow version." - + f"Cannot check if it is compatible with intalled tensorflow {tf_version}." - ) - elif model_tf_version > tf_version: - logger.warning( - f"The model specifies a newer tensorflow version than installed: {model_tf_version} > {tf_version}." - ) - elif (model_tf_version.major, model_tf_version.minor) != ( - tf_version.major, - tf_version.minor, - ): - logger.warning( - "The tensorflow version specified by the model does not match the installed: " - + f"{model_tf_version} != {tf_version}." - ) - - self.use_keras_api = ( - tf_version.major > 1 - or self.weight_format == KerasModelAdapter.weight_format - ) - - # TODO tf device management - if devices is not None: - logger.warning( - f"Device management is not implemented for tensorflow yet, ignoring the devices {devices}" - ) - - # TODO: check how to load tf weights without unzipping - weight_file = ensure_unzipped( - weights.source, Path("bioimageio_unzipped_tf_weights") - ) - - def unload(self) -> None: - logger.warning( - "Device management is not implemented for keras yet, cannot unload model" - ) - - class TensorflowModelAdapter(ModelAdapter): weight_format = "tensorflow_saved_model_bundle" @@ -81,47 +22,28 @@ def __init__( devices: Optional[Sequence[str]] = None, ): super().__init__(model_description=model_description) + + weight_file = model_description.weights.tensorflow_saved_model_bundle + if model_description.weights.tensorflow_saved_model_bundle is None: + raise ValueError("No `tensorflow_saved_model_bundle` weights found") + if devices is not None: logger.warning( f"Device management is not implemented for tensorflow yet, ignoring the devices {devices}" ) + # TODO: check how to load tf weights without unzipping weight_file = ensure_unzipped( - weights.source, Path("bioimageio_unzipped_tf_weights") + model_description.weights.tensorflow_saved_model_bundle.source, + Path("bioimageio_unzipped_tf_weights"), ) self._network = str(weight_file) - def _get_network( # pyright: ignore[reportUnknownParameterType] - self, weight_file: FileSource - ): - - assert tf is not None - if self.use_keras_api: - try: - return tf.keras.layers.TFSMLayer( # pyright: ignore[reportAttributeAccessIssue,reportUnknownVariableType] - weight_file, - call_endpoint="serve", - ) - except Exception as e: - try: - return tf.keras.layers.TFSMLayer( # pyright: ignore[reportAttributeAccessIssue,reportUnknownVariableType] - weight_file, call_endpoint="serving_default" - ) - except Exception as ee: - logger.opt(exception=ee).info( - "keras.layers.TFSMLayer error for alternative call_endpoint='serving_default'" - ) - raise e - else: - # NOTE in tf1 the model needs to be loaded inside of the session, so we cannot preload the model - return - # TODO currently we relaod the model every time. it would be better to keep the graph and session # alive in between of forward passes (but then the sessions need to be properly opened / closed) def _forward_impl( # pyright: ignore[reportUnknownParameterType] self, input_arrays: Sequence[Optional[NDArray[Any]]] ): - assert tf is not None # TODO read from spec tag = ( # pyright: ignore[reportUnknownVariableType] tf.saved_model.tag_constants.SERVING # pyright: ignore[reportAttributeAccessIssue] @@ -182,6 +104,11 @@ def _forward_impl( # pyright: ignore[reportUnknownParameterType] return res # pyright: ignore[reportUnknownVariableType] + def unload(self) -> None: + logger.warning( + "Device management is not implemented for tensorflow 1, cannot unload model" + ) + class KerasModelAdapter(ModelAdapter): def __init__( @@ -199,6 +126,28 @@ def __init__( f"Device management is not implemented for tensorflow yet, ignoring the devices {devices}" ) + # TODO: check how to load tf weights without unzipping + weight_file = ensure_unzipped( + model_description.weights.tensorflow_saved_model_bundle.source, + Path("bioimageio_unzipped_tf_weights"), + ) + + try: + self._network = tf.keras.layers.TFSMLayer( # pyright: ignore[reportAttributeAccessIssue] + weight_file, + call_endpoint="serve", + ) + except Exception as e: + try: + self._network = tf.keras.layers.TFSMLayer( # pyright: ignore[reportAttributeAccessIssue] + weight_file, call_endpoint="serving_default" + ) + except Exception as ee: + logger.opt(exception=ee).info( + "keras.layers.TFSMLayer error for alternative call_endpoint='serving_default'" + ) + raise e + def _forward_impl( # pyright: ignore[reportUnknownParameterType] self, input_arrays: Sequence[Optional[NDArray[Any]]] ): @@ -221,6 +170,12 @@ def _forward_impl( # pyright: ignore[reportUnknownParameterType] for r in result # pyright: ignore[reportUnknownVariableType] ] + def unload(self) -> None: + logger.warning( + "Device management is not implemented for tensorflow>=2 models" + + f" using `{self.__class__.__name__}`, cannot unload model" + ) + def create_tf_model_adapter( model_description: AnyModelDescr, devices: Optional[Sequence[str]] @@ -255,8 +210,3 @@ def create_tf_model_adapter( ) else: return KerasModelAdapter(model_description=model_description, devices=devices) - - # TODO: check how to load tf weights without unzipping - weight_file = ensure_unzipped( - weights.source, Path("bioimageio_unzipped_tf_weights") - ) diff --git a/pyproject.toml b/pyproject.toml index 91cd2cbc..968444a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.black] line-length = 88 -extend_exclude = "/presentations/" +extend-exclude = "/presentations/" target-version = ["py38", "py39", "py310", "py311", "py312"] preview = true From 852e8b10e5c3ad8b970d9d4fbc2b55388a459e98 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 14 Jan 2025 01:00:01 +0100 Subject: [PATCH 050/187] use AxisBase instead --- bioimageio/core/digest_spec.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index 0d4800fb..5789f377 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -23,8 +23,7 @@ from numpy.typing import NDArray from typing_extensions import Unpack, assert_never -from bioimageio.spec._internal.io import resolve_and_extract -from bioimageio.spec._internal.io_utils import HashKwargs +from bioimageio.spec._internal.io import HashKwargs, resolve_and_extract from bioimageio.spec.common import FileSource from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 from bioimageio.spec.model.v0_4 import CallableFromDepencency, CallableFromFile @@ -106,7 +105,7 @@ def get_axes_infos( """get a unified, simplified axis representation from spec axes""" ret: List[AxisInfo] = [] for a in io_descr.axes: - if isinstance(a, v0_5.ANY_AXIS_TYPES): + if isinstance(a, v0_5.AxisBase): ret.append(AxisInfo.create(Axis(id=a.id, type=a.type))) else: assert a in ("b", "i", "t", "c", "z", "y", "x") From 4299a8cb58661c4f570c844bf6d29a689f9a852d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 15 Jan 2025 13:16:44 +0100 Subject: [PATCH 051/187] remove TooFewDevicesException exception --- ...t_prediction_pipeline_device_management.py | 13 ++-------- tests/utils.py | 26 ++----------------- 2 files changed, 4 insertions(+), 35 deletions(-) diff --git a/tests/test_prediction_pipeline_device_management.py b/tests/test_prediction_pipeline_device_management.py index aa692356..2dde4120 100644 --- a/tests/test_prediction_pipeline_device_management.py +++ b/tests/test_prediction_pipeline_device_management.py @@ -1,14 +1,10 @@ from pathlib import Path +import pytest from numpy.testing import assert_array_almost_equal from bioimageio.spec.model.v0_4 import ModelDescr as ModelDescr04 from bioimageio.spec.model.v0_5 import ModelDescr, WeightsFormat -from tests.utils import skip_on - - -class TooFewDevicesException(Exception): - pass def _test_device_management(model_package: Path, weight_format: WeightsFormat): @@ -19,7 +15,7 @@ def _test_device_management(model_package: Path, weight_format: WeightsFormat): from bioimageio.core.digest_spec import get_test_inputs, get_test_outputs if not hasattr(torch, "cuda") or torch.cuda.device_count() == 0: - raise TooFewDevicesException("Need at least one cuda device for this test") + pytest.skip("Need at least one cuda device for this test") bio_model = load_description(model_package) assert isinstance(bio_model, (ModelDescr, ModelDescr04)) @@ -52,26 +48,21 @@ def _test_device_management(model_package: Path, weight_format: WeightsFormat): assert_array_almost_equal(out, exp, decimal=4) -@skip_on(TooFewDevicesException, reason="Too few devices") def test_device_management_torch(any_torch_model: Path): _test_device_management(any_torch_model, "pytorch_state_dict") -@skip_on(TooFewDevicesException, reason="Too few devices") def test_device_management_torchscript(any_torchscript_model: Path): _test_device_management(any_torchscript_model, "torchscript") -@skip_on(TooFewDevicesException, reason="Too few devices") def test_device_management_onnx(any_onnx_model: Path): _test_device_management(any_onnx_model, "onnx") -@skip_on(TooFewDevicesException, reason="Too few devices") def test_device_management_tensorflow(any_tensorflow_model: Path): _test_device_management(any_tensorflow_model, "tensorflow_saved_model_bundle") -@skip_on(TooFewDevicesException, reason="Too few devices") def test_device_management_keras(any_keras_model: Path): _test_device_management(any_keras_model, "keras_hdf5") diff --git a/tests/utils.py b/tests/utils.py index 805eecfa..f9116fa5 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,8 +1,7 @@ """utils to test bioimageio.core""" import os -from functools import wraps -from typing import Any, Protocol, Sequence, Type +from typing import Any, Protocol, Sequence import pytest @@ -16,28 +15,7 @@ class test_func(Protocol): def __call__(*args: Any, **kwargs: Any): ... -def skip_on(exception: Type[Exception], reason: str): - """adapted from https://stackoverflow.com/a/63522579""" - import pytest - - # Func below is the real decorator and will receive the test function as param - def decorator_func(f: test_func): - @wraps(f) - def wrapper(*args: Any, **kwargs: Any): - try: - # Try to run the test - return f(*args, **kwargs) - except exception: - # If exception of given type happens - # just swallow it and raise pytest.Skip with given reason - pytest.skip(reason) - - return wrapper - - return decorator_func - - expensive_test = pytest.mark.skipif( - (run := os.getenv("RUN_EXPENSIVE_TESTS")) != "true", + os.getenv("RUN_EXPENSIVE_TESTS") != "true", reason="Skipping expensive test (enable by RUN_EXPENSIVE_TESTS='true')", ) From 3009f5bc20ed2108bb41e95649f7d309f3d16eb6 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 15 Jan 2025 14:47:28 +0100 Subject: [PATCH 052/187] improve test_mean_var_std_calculator --- tests/test_stat_calculators.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/test_stat_calculators.py b/tests/test_stat_calculators.py index 57e86c5a..0efe02d9 100644 --- a/tests/test_stat_calculators.py +++ b/tests/test_stat_calculators.py @@ -1,7 +1,8 @@ -from typing import Tuple, Union +from typing import Tuple import numpy as np import pytest +from git import Optional from xarray.testing import assert_allclose # pyright: ignore[reportUnknownVariableType] from bioimageio.core.axis import AxisId @@ -31,14 +32,15 @@ def create_random_dataset(tid: MemberId, axes: Tuple[AxisId, ...]): "axes", [ None, - ("x", "y"), - ("channel", "y"), + (AxisId("x"), AxisId("y")), + (AxisId("channel"), AxisId("y")), + (AxisId("batch"), AxisId("channel"), AxisId("x"), AxisId("y")), ], ) -def test_mean_var_std_calculator(axes: Union[None, str, Tuple[str, ...]]): +def test_mean_var_std_calculator(axes: Optional[Tuple[AxisId, ...]]): tid = MemberId("tensor") - axes = tuple(map(AxisId, ("batch", "channel", "x", "y"))) - data, ds = create_random_dataset(tid, axes) + d_axes = tuple(map(AxisId, ("batch", "channel", "x", "y"))) + data, ds = create_random_dataset(tid, d_axes) expected_mean = data.mean(axes) expected_var = data.var(axes) expected_std = data.std(axes) From b6f84f9fc3825303915efb7d194fb6620d507696 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 24 Jan 2025 15:47:15 +0100 Subject: [PATCH 053/187] annotate relative and absolute tolerance --- bioimageio/core/_resource_tests.py | 17 +++++----- .../core/weight_converters/_add_weights.py | 32 +++++++++++++------ 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 4f49fd75..9e9c0937 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -37,6 +37,7 @@ from bioimageio.spec._internal.common_nodes import ResourceDescrBase from bioimageio.spec._internal.io import is_yaml_value from bioimageio.spec._internal.io_utils import read_yaml, write_yaml +from bioimageio.spec._internal.types import AbsoluteTolerance, RelativeTolerance from bioimageio.spec._internal.validation_context import validation_context_var from bioimageio.spec.common import BioimageioYamlContent, PermissiveFileSource, Sha256 from bioimageio.spec.model import v0_4, v0_5 @@ -120,8 +121,8 @@ def test_model( source: Union[v0_4.ModelDescr, v0_5.ModelDescr, PermissiveFileSource], weight_format: Optional[WeightsFormat] = None, devices: Optional[List[str]] = None, - absolute_tolerance: float = 1.5e-4, - relative_tolerance: float = 1e-4, + absolute_tolerance: AbsoluteTolerance = 1.5e-4, + relative_tolerance: RelativeTolerance = 1e-4, decimal: Optional[int] = None, *, determinism: Literal["seed_only", "full"] = "seed_only", @@ -152,8 +153,8 @@ def test_description( format_version: Union[Literal["discover", "latest"], str] = "discover", weight_format: Optional[WeightsFormat] = None, devices: Optional[Sequence[str]] = None, - absolute_tolerance: float = 1.5e-4, - relative_tolerance: float = 1e-4, + absolute_tolerance: AbsoluteTolerance = 1.5e-4, + relative_tolerance: RelativeTolerance = 1e-4, decimal: Optional[int] = None, determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, @@ -236,8 +237,8 @@ def _test_in_env( weight_format: Optional[WeightsFormat], conda_env: Optional[BioimageioCondaEnv], devices: Optional[Sequence[str]], - absolute_tolerance: float, - relative_tolerance: float, + absolute_tolerance: AbsoluteTolerance, + relative_tolerance: RelativeTolerance, determinism: Literal["seed_only", "full"], run_command: Callable[[Sequence[str]], None], ) -> ValidationSummary: @@ -354,8 +355,8 @@ def load_description_and_test( format_version: Union[Literal["discover", "latest"], str] = "discover", weight_format: Optional[WeightsFormat] = None, devices: Optional[Sequence[str]] = None, - absolute_tolerance: float = 1.5e-4, - relative_tolerance: float = 1e-4, + absolute_tolerance: AbsoluteTolerance = 1.5e-4, + relative_tolerance: RelativeTolerance = 1e-4, decimal: Optional[int] = None, determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, diff --git a/bioimageio/core/weight_converters/_add_weights.py b/bioimageio/core/weight_converters/_add_weights.py index 76041550..c0aa3bfb 100644 --- a/bioimageio/core/weight_converters/_add_weights.py +++ b/bioimageio/core/weight_converters/_add_weights.py @@ -1,23 +1,35 @@ -from abc import ABC -from typing import Optional, Sequence, Union, assert_never, final +from copy import deepcopy +from pathlib import Path +from typing import List, Optional, Sequence, Union from bioimageio.spec.model import v0_4, v0_5 def increase_available_weight_formats( - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - source_format: v0_5.WeightsFormat, - target_format: v0_5.WeightsFormat, + model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], *, + source_format: Optional[v0_5.WeightsFormat] = None, + target_format: Optional[v0_5.WeightsFormat] = None, + output_path: Path, devices: Optional[Sequence[str]] = None, -): - if not isinstance(model_description, (v0_4.ModelDescr, v0_5.ModelDescr)): +) -> Union[v0_4.ModelDescr, v0_5.ModelDescr]: + """Convert neural network weights to other formats and add them to the model description""" + if not isinstance(model_descr, (v0_4.ModelDescr, v0_5.ModelDescr)): raise TypeError( - f"expected v0_4.ModelDescr or v0_5.ModelDescr, but got {type(model_description)}" + f"expected v0_4.ModelDescr or v0_5.ModelDescr, but got {type(model_descr)}" ) - if (source_format, target_format) == ("pytorch_state_dict", "onnx"): - from .pytorch_to_onnx import convert_pytorch_to_onnx + if source_format is None: + available = [wf for wf, w in model_descr.weights if w is not None] + missing = [wf for wf, w in model_descr.weights if w is None] + else: + available = [source_format] + missing = [target_format] + + if "pytorch_state_dict" in available and "onnx" in missing: + from .pytorch_to_onnx import convert + + onnx = convert(model_descr) else: raise NotImplementedError( From b09d3890551be62ad9fe4c6a17e1689627af2bbe Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 24 Jan 2025 16:29:02 +0100 Subject: [PATCH 054/187] add check_reproducibility --- .../core/weight_converters/pytorch_to_onnx.py | 66 ++++++++++--------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/bioimageio/core/weight_converters/pytorch_to_onnx.py b/bioimageio/core/weight_converters/pytorch_to_onnx.py index 71ac17a6..a104cb2d 100644 --- a/bioimageio/core/weight_converters/pytorch_to_onnx.py +++ b/bioimageio/core/weight_converters/pytorch_to_onnx.py @@ -8,6 +8,7 @@ from bioimageio.core.backends.pytorch_backend import load_torch_model from bioimageio.core.digest_spec import get_member_id, get_test_inputs from bioimageio.core.proc_setup import get_pre_and_postprocessing +from bioimageio.spec._internal.types import AbsoluteTolerance, RelativeTolerance from bioimageio.spec.model import v0_4, v0_5 @@ -16,10 +17,11 @@ def convert( *, output_path: Path, use_tracing: bool = True, - relative_tolerance: float = 1e-07, - absolute_tolerance: float = 0, verbose: bool = False, opset_version: int = 15, + check_reproducibility: bool = True, + relative_tolerance: RelativeTolerance = 1e-07, + absolute_tolerance: AbsoluteTolerance = 0, ) -> v0_5.OnnxWeightsDescr: """ Convert model weights from the PyTorch state_dict format to the ONNX format. @@ -72,7 +74,6 @@ def convert( outputs_original: List[np.ndarray[Any, Any]] = [ out.numpy() for out in outputs_original_torch ] - if use_tracing: _ = torch.onnx.export( model, @@ -84,35 +85,40 @@ def convert( else: raise NotImplementedError - try: - import onnxruntime as rt # pyright: ignore [reportMissingTypeStubs] - except ImportError: - raise ImportError( - "The onnx weights were exported, but onnx rt is not available and weights cannot be checked." - ) + if check_reproducibility: + try: + import onnxruntime as rt # pyright: ignore [reportMissingTypeStubs] + except ImportError as e: + raise ImportError( + "The onnx weights were exported, but onnx rt is not available" + + " and weights cannot be checked." + ) from e - # check the onnx model - sess = rt.InferenceSession(str(output_path)) - onnx_input_node_args = cast( - List[Any], sess.get_inputs() - ) # FIXME: remove cast, try using rt.NodeArg instead of Any - inputs_onnx = { - input_name.name: inp - for input_name, inp in zip(onnx_input_node_args, inputs_numpy) - } - outputs_onnx = cast( - Sequence[np.ndarray[Any, Any]], sess.run(None, inputs_onnx) - ) # FIXME: remove cast + # check the onnx model + sess = rt.InferenceSession(str(output_path)) + onnx_input_node_args = cast( + List[Any], sess.get_inputs() + ) # FIXME: remove cast, try using rt.NodeArg instead of Any + inputs_onnx = { + input_name.name: inp + for input_name, inp in zip(onnx_input_node_args, inputs_numpy) + } + outputs_onnx = cast( + Sequence[np.ndarray[Any, Any]], sess.run(None, inputs_onnx) + ) # FIXME: remove cast - try: - for out_original, out_onnx in zip(outputs_original, outputs_onnx): - assert_allclose( - out_original, out_onnx, rtol=relative_tolerance, atol=absolute_tolerance - ) - except AssertionError as e: - raise AssertionError( - "Inference results of using original and converted weights do not match" - ) from e + try: + for out_original, out_onnx in zip(outputs_original, outputs_onnx): + assert_allclose( + out_original, + out_onnx, + rtol=relative_tolerance, + atol=absolute_tolerance, + ) + except AssertionError as e: + raise AssertionError( + "Inference results of original and converted weights do not match." + ) from e return v0_5.OnnxWeightsDescr( source=output_path, parent="pytorch_state_dict", opset_version=opset_version From e35735d10740f281e0fb73c26a49a52666ce1cec Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 24 Jan 2025 16:31:51 +0100 Subject: [PATCH 055/187] add imagecodecs dependency --- dev/env-py38.yaml | 1 + dev/env-tf.yaml | 1 + dev/env-wo-python.yaml | 1 + dev/env.yaml | 1 + setup.py | 1 + 5 files changed, 5 insertions(+) diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 8d7e7ecf..148c9668 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -10,6 +10,7 @@ dependencies: - crick # uncommented - filelock - h5py + - imagecodecs - imageio>=2.5 - jupyter - jupyter-black diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index c28c01aa..61e00c41 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -10,6 +10,7 @@ dependencies: # - crick # currently requires python<=3.9 - filelock - h5py + - imagecodecs - imageio>=2.5 - jupyter - jupyter-black diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index 08c8968e..cd72571e 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -10,6 +10,7 @@ dependencies: # - crick # currently requires python<=3.9 - filelock - h5py + - imagecodecs - imageio>=2.5 - jupyter - jupyter-black diff --git a/dev/env.yaml b/dev/env.yaml index ef715090..8fed3b25 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -9,6 +9,7 @@ dependencies: # - crick # currently requires python<=3.9 - filelock - h5py + - imagecodecs - imageio>=2.5 - jupyter - jupyter-black diff --git a/setup.py b/setup.py index d740a8e3..02f39f8d 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ install_requires=[ "bioimageio.spec ==0.5.3.6", "h5py", + "imagecodecs", "imageio>=2.10", "loguru", "numpy", From 34d547815d94fdd2e9653a712a0015a8446d73e3 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 24 Jan 2025 16:32:03 +0100 Subject: [PATCH 056/187] improve increase_available_weight_formats --- .../core/weight_converters/_add_weights.py | 101 ++++++++++++++---- 1 file changed, 78 insertions(+), 23 deletions(-) diff --git a/bioimageio/core/weight_converters/_add_weights.py b/bioimageio/core/weight_converters/_add_weights.py index c0aa3bfb..ed541385 100644 --- a/bioimageio/core/weight_converters/_add_weights.py +++ b/bioimageio/core/weight_converters/_add_weights.py @@ -1,37 +1,92 @@ -from copy import deepcopy -from pathlib import Path -from typing import List, Optional, Sequence, Union +from typing import Optional, Sequence -from bioimageio.spec.model import v0_4, v0_5 +from loguru import logger +from pydantic import DirectoryPath + +from bioimageio.core._resource_tests import test_model +from bioimageio.spec import load_model_description, save_bioimageio_package_as_folder +from bioimageio.spec._internal.types import AbsoluteTolerance, RelativeTolerance +from bioimageio.spec.model.v0_5 import ModelDescr, WeightsFormat def increase_available_weight_formats( - model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], + model_descr: ModelDescr, *, - source_format: Optional[v0_5.WeightsFormat] = None, - target_format: Optional[v0_5.WeightsFormat] = None, - output_path: Path, - devices: Optional[Sequence[str]] = None, -) -> Union[v0_4.ModelDescr, v0_5.ModelDescr]: - """Convert neural network weights to other formats and add them to the model description""" - if not isinstance(model_descr, (v0_4.ModelDescr, v0_5.ModelDescr)): - raise TypeError( - f"expected v0_4.ModelDescr or v0_5.ModelDescr, but got {type(model_descr)}" - ) + output_path: DirectoryPath, + source_format: Optional[WeightsFormat] = None, + target_format: Optional[WeightsFormat] = None, + devices: Sequence[str] = ("cpu",), +) -> ModelDescr: + """Convert model weights to other formats and add them to the model description + + Args: + output_path: Path to save updated model package to. + source_format: convert from a specific weights format. + Default: choose automatically from any available. + target_format: convert to a specific weights format. + Default: attempt to convert to any missing format. + devices: Devices that may be used during conversion. + """ + if not isinstance(model_descr, ModelDescr): + raise TypeError(type(model_descr)) + + # save model to local folder + output_path = save_bioimageio_package_as_folder( + model_descr, output_path=output_path + ) + # reload from local folder to make sure we do not edit the given model + _model_descr = load_model_description(output_path) + assert isinstance(_model_descr, ModelDescr) + model_descr = _model_descr + del _model_descr if source_format is None: - available = [wf for wf, w in model_descr.weights if w is not None] - missing = [wf for wf, w in model_descr.weights if w is None] + available = set(model_descr.weights.available_formats) + else: + available = {source_format} + + if target_format is None: + missing = set(model_descr.weights.missing_formats) else: - available = [source_format] - missing = [target_format] + missing = {target_format} if "pytorch_state_dict" in available and "onnx" in missing: from .pytorch_to_onnx import convert - onnx = convert(model_descr) + try: + model_descr.weights.onnx = convert( + model_descr, + output_path=output_path, + use_tracing=False, + ) + except Exception as e: + logger.error(e) + else: + available.add("onnx") + missing.discard("onnx") - else: - raise NotImplementedError( - f"Converting from '{source_format}' to '{target_format}' is not yet implemented. Please create an issue at https://github.com/bioimage-io/core-bioimage-io-python/issues/new/choose" + if "pytorch_state_dict" in available and "torchscript" in missing: + from .pytorch_to_torchscript import convert + + try: + model_descr.weights.torchscript = convert( + model_descr, + output_path=output_path, + use_tracing=False, + ) + except Exception as e: + logger.error(e) + else: + available.add("torchscript") + missing.discard("torchscript") + + if missing: + logger.warning( + f"Converting from any of the available weights formats {available} to any" + + f" of {missing} is not yet implemented. Please create an issue at" + + " https://github.com/bioimage-io/core-bioimage-io-python/issues/new/choose" + + " if you would like bioimageio.core to support a particular conversion." ) + + test_model(model_descr).display() + return model_descr From cfa3713a7abfa14746aad86b1e60f8df3611e9b9 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 27 Jan 2025 12:55:56 +0100 Subject: [PATCH 057/187] make docstring test os independent --- bioimageio/core/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index 001db539..d80c6870 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -81,11 +81,11 @@ def _split_dataset_path(path: Path) -> Tuple[Path, Optional[PurePosixPath]]: Examples: >>> _split_dataset_path(Path("my_file.h5/dataset")) - (PosixPath('my_file.h5'), PurePosixPath('dataset')) + (...Path('my_file.h5'), PurePosixPath('dataset')) If no suffix is detected the path is returned with >>> _split_dataset_path(Path("my_plain_file")) - (PosixPath('my_plain_file'), None) + (...Path('my_plain_file'), None) """ if path.suffix: From b4e76d1fc341331db22de6d1a71fd4539f26e7ce Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 27 Jan 2025 12:56:40 +0100 Subject: [PATCH 058/187] return Tensor instead of np.ndarray --- bioimageio/core/stat_calculators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/stat_calculators.py b/bioimageio/core/stat_calculators.py index f3aa8dcd..6279289b 100644 --- a/bioimageio/core/stat_calculators.py +++ b/bioimageio/core/stat_calculators.py @@ -183,7 +183,7 @@ def finalize( else: assert self._m2 is not None var = self._m2 / self._n - sqrt = np.sqrt(var) + sqrt = var**0.5 if isinstance(sqrt, (int, float)): # var and mean are scalar tensors, let's keep it consistent sqrt = Tensor.from_xarray(xr.DataArray(sqrt)) From 43438d9a1cc385c68dfa29cc1cafe0d03d39d24b Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 27 Jan 2025 12:57:03 +0100 Subject: [PATCH 059/187] append to conv report --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 968444a0..98bf386e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ typeCheckingMode = "strict" useLibraryCodeForTypes = true [tool.pytest.ini_options] -addopts = "--cov=bioimageio --cov-report=xml -n auto --capture=no --doctest-modules --failed-first" +addopts = "--cov=bioimageio --cov-report=xml --cov-append -n 0 --capture=no --doctest-modules --failed-first" [tool.ruff] line-length = 88 From bf7494e945fe661096cc570dbccc32e72aa6a981 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 27 Jan 2025 12:58:12 +0100 Subject: [PATCH 060/187] avoid loguru logger in conftest.py to avoid error messages by pytest-xdist --- tests/conftest.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 32a2b6a8..cb78bfe9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,6 @@ from itertools import chain from typing import Dict, List -from loguru import logger from pytest import FixtureRequest, fixture from bioimageio.spec import __version__ as bioimageio_spec_version @@ -14,7 +13,7 @@ import torch torch_version = tuple(map(int, torch.__version__.split(".")[:2])) - logger.warning(f"detected torch version {torch.__version__}") + warnings.warn(f"detected torch version {torch.__version__}") except ImportError: torch = None torch_version = None @@ -45,9 +44,7 @@ # TODO: use models from new collection on S3 MODEL_SOURCES: Dict[str, str] = { - "hpa_densenet": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/hpa-densenet/rdf.yaml" - ), + "hpa_densenet": "polite-pig/1", "stardist": ( "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models" "/stardist_example_model/v0_4.bioimageio.yaml" From c99c89a05cac2c788cdefc754635776ff5530b38 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 3 Feb 2025 13:28:54 +0100 Subject: [PATCH 061/187] add todo and docstrings --- bioimageio/core/block_meta.py | 1 + bioimageio/core/common.py | 3 +++ bioimageio/core/prediction.py | 14 +++++++------- bioimageio/core/sample.py | 20 ++++++++++++++++---- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/bioimageio/core/block_meta.py b/bioimageio/core/block_meta.py index f7740092..4e40c1cf 100644 --- a/bioimageio/core/block_meta.py +++ b/bioimageio/core/block_meta.py @@ -258,6 +258,7 @@ def split_shape_into_blocks( set(block_shape), ) if any(shape[a] < block_shape[a] for a in block_shape): + # TODO: allow larger blockshape raise ValueError(f"shape {shape} is smaller than block shape {block_shape}") assert all(a in shape for a in halo), (tuple(shape), set(halo)) diff --git a/bioimageio/core/common.py b/bioimageio/core/common.py index 78a85886..5ce70c27 100644 --- a/bioimageio/core/common.py +++ b/bioimageio/core/common.py @@ -87,7 +87,10 @@ class SliceInfo(NamedTuple): SampleId = Hashable +"""ID of a sample, see `bioimageio.core.sample.Sample`""" MemberId = v0_5.TensorId +"""ID of a `Sample` member, see `bioimageio.core.sample.Sample`""" + T = TypeVar("T") PerMember = Mapping[MemberId, T] diff --git a/bioimageio/core/prediction.py b/bioimageio/core/prediction.py index 27a4129c..3ffec57f 100644 --- a/bioimageio/core/prediction.py +++ b/bioimageio/core/prediction.py @@ -50,18 +50,18 @@ def predict( """Run prediction for a single set of input(s) with a bioimage.io model Args: - model: model to predict with. + model: Model to predict with. May be given as RDF source, model description or prediction pipeline. inputs: the input sample or the named input(s) for this model as a dictionary sample_id: the sample id. - blocksize_parameter: (optional) tile the input into blocks parametrized by + blocksize_parameter: (optional) Tile the input into blocks parametrized by blocksize according to any parametrized axis sizes defined in the model RDF. Note: For a predetermined, fixed block shape use `input_block_shape` - input_block_shape: (optional) tile the input sample tensors into blocks. - Note: For a parameterized block shape, not dealing with the exact block shape, - use `blocksize_parameter`. - skip_preprocessing: flag to skip the model's preprocessing - skip_postprocessing: flag to skip the model's postprocessing + input_block_shape: (optional) Tile the input sample tensors into blocks. + Note: Use `blocksize_parameter` for a parameterized block shape to + run prediction independent of the exact block shape. + skip_preprocessing: Flag to skip the model's preprocessing. + skip_postprocessing: Flag to skip the model's postprocessing. save_output_path: A path with `{member_id}` `{sample_id}` in it to save the output to. """ diff --git a/bioimageio/core/sample.py b/bioimageio/core/sample.py index 0620282d..0d4c3724 100644 --- a/bioimageio/core/sample.py +++ b/bioimageio/core/sample.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from math import ceil, floor from typing import ( + Any, Callable, Dict, Generic, @@ -14,6 +15,7 @@ ) import numpy as np +from numpy.typing import NDArray from typing_extensions import Self from .axis import AxisId, PerAxis @@ -42,21 +44,31 @@ @dataclass class Sample: - """A dataset sample""" + """A dataset sample. + + A `Sample` has `members`, which allows to combine multiple tensors into a single + sample. + For example a `Sample` from a dataset with masked images may contain a + `MemberId("raw")` and `MemberId("mask")` image. + """ members: Dict[MemberId, Tensor] - """the sample's tensors""" + """The sample's tensors""" stat: Stat - """sample and dataset statistics""" + """Sample and dataset statistics""" id: SampleId - """identifier within the sample's dataset""" + """Identifies the `Sample` within the dataset -- typically a number or a string.""" @property def shape(self) -> PerMember[PerAxis[int]]: return {tid: t.sizes for tid, t in self.members.items()} + def as_arrays(self) -> Dict[str, NDArray[Any]]: + """Return sample as dictionary of arrays.""" + return {str(m): t.data.to_numpy() for m, t in self.members.items()} + def split_into_blocks( self, block_shapes: PerMember[PerAxis[int]], From 8c61d693815a695b0b05ede70b8d213069bdd7cb Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 3 Feb 2025 15:46:55 +0100 Subject: [PATCH 062/187] update typing --- bioimageio/core/_prediction_pipeline.py | 7 +++--- bioimageio/core/_resource_tests.py | 15 ++++++------ bioimageio/core/backends/_model_adapter.py | 24 +++++++++---------- bioimageio/core/backends/keras_backend.py | 2 +- bioimageio/core/backends/onnx_backend.py | 2 -- bioimageio/core/cli.py | 20 ++++++++-------- bioimageio/core/commands.py | 6 ++--- bioimageio/core/common.py | 9 +++++++ bioimageio/core/io.py | 2 +- bioimageio/core/proc_ops.py | 3 +-- bioimageio/core/stat_calculators.py | 2 +- .../core/weight_converters/_add_weights.py | 4 +--- .../weight_converters/keras_to_tensorflow.py | 4 ++-- scripts/show_diff.py | 2 +- setup.py | 2 +- tests/test_prediction_pipeline.py | 7 ++++-- ...t_prediction_pipeline_device_management.py | 5 ++-- 17 files changed, 61 insertions(+), 55 deletions(-) diff --git a/bioimageio/core/_prediction_pipeline.py b/bioimageio/core/_prediction_pipeline.py index 5ef337a0..4ff79d8d 100644 --- a/bioimageio/core/_prediction_pipeline.py +++ b/bioimageio/core/_prediction_pipeline.py @@ -15,11 +15,10 @@ from tqdm import tqdm from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 -from bioimageio.spec.model.v0_5 import WeightsFormat from ._op_base import BlockedOperator from .axis import AxisId, PerAxis -from .common import Halo, MemberId, PerMember, SampleId +from .common import Halo, MemberId, PerMember, SampleId, SupportedWeightsFormat from .digest_spec import ( get_block_transform, get_input_halo, @@ -296,8 +295,8 @@ def create_prediction_pipeline( bioimageio_model: AnyModelDescr, *, devices: Optional[Sequence[str]] = None, - weight_format: Optional[WeightsFormat] = None, - weights_format: Optional[WeightsFormat] = None, + weight_format: Optional[SupportedWeightsFormat] = None, + weights_format: Optional[SupportedWeightsFormat] = None, dataset_for_initial_statistics: Iterable[Union[Sample, Sequence[Tensor]]] = tuple(), keep_updating_initial_dataset_statistics: bool = False, fixed_dataset_statistics: Mapping[DatasetMeasure, MeasureValue] = MappingProxyType( diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 9e9c0937..7a8b9183 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -51,6 +51,7 @@ from ._prediction_pipeline import create_prediction_pipeline from .axis import AxisId, BatchSize +from .common import SupportedWeightsFormat from .digest_spec import get_test_inputs, get_test_outputs from .sample import Sample from .utils import VERSION @@ -119,7 +120,7 @@ def enable_determinism(mode: Literal["seed_only", "full"]): def test_model( source: Union[v0_4.ModelDescr, v0_5.ModelDescr, PermissiveFileSource], - weight_format: Optional[WeightsFormat] = None, + weight_format: Optional[SupportedWeightsFormat] = None, devices: Optional[List[str]] = None, absolute_tolerance: AbsoluteTolerance = 1.5e-4, relative_tolerance: RelativeTolerance = 1e-4, @@ -151,7 +152,7 @@ def test_description( source: Union[ResourceDescr, PermissiveFileSource, BioimageioYamlContent], *, format_version: Union[Literal["discover", "latest"], str] = "discover", - weight_format: Optional[WeightsFormat] = None, + weight_format: Optional[SupportedWeightsFormat] = None, devices: Optional[Sequence[str]] = None, absolute_tolerance: AbsoluteTolerance = 1.5e-4, relative_tolerance: RelativeTolerance = 1e-4, @@ -234,7 +235,7 @@ def _test_in_env( source: PermissiveFileSource, *, working_dir: Path, - weight_format: Optional[WeightsFormat], + weight_format: Optional[SupportedWeightsFormat], conda_env: Optional[BioimageioCondaEnv], devices: Optional[Sequence[str]], absolute_tolerance: AbsoluteTolerance, @@ -353,7 +354,7 @@ def load_description_and_test( source: Union[ResourceDescr, PermissiveFileSource, BioimageioYamlContent], *, format_version: Union[Literal["discover", "latest"], str] = "discover", - weight_format: Optional[WeightsFormat] = None, + weight_format: Optional[SupportedWeightsFormat] = None, devices: Optional[Sequence[str]] = None, absolute_tolerance: AbsoluteTolerance = 1.5e-4, relative_tolerance: RelativeTolerance = 1e-4, @@ -389,7 +390,7 @@ def load_description_and_test( if isinstance(rd, (v0_4.ModelDescr, v0_5.ModelDescr)): if weight_format is None: - weight_formats: List[WeightsFormat] = [ + weight_formats: List[SupportedWeightsFormat] = [ w for w, we in rd.weights if we is not None ] # pyright: ignore[reportAssignmentType] else: @@ -424,7 +425,7 @@ def load_description_and_test( def _test_model_inference( model: Union[v0_4.ModelDescr, v0_5.ModelDescr], - weight_format: WeightsFormat, + weight_format: SupportedWeightsFormat, devices: Optional[Sequence[str]], atol: float, rtol: float, @@ -493,7 +494,7 @@ def _test_model_inference( def _test_model_inference_parametrized( model: v0_5.ModelDescr, - weight_format: WeightsFormat, + weight_format: SupportedWeightsFormat, devices: Optional[Sequence[str]], ) -> None: if not any( diff --git a/bioimageio/core/backends/_model_adapter.py b/bioimageio/core/backends/_model_adapter.py index 677a88f7..1ac7bc9f 100644 --- a/bioimageio/core/backends/_model_adapter.py +++ b/bioimageio/core/backends/_model_adapter.py @@ -3,7 +3,6 @@ from typing import ( Any, List, - Literal, Optional, Sequence, Tuple, @@ -14,20 +13,13 @@ from numpy.typing import NDArray -from bioimageio.core.digest_spec import get_axes_infos, get_member_ids -from bioimageio.core.sample import Sample from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 +from ..common import SupportedWeightsFormat +from ..digest_spec import get_axes_infos, get_member_ids +from ..sample import Sample, SampleBlock, SampleBlockWithOrigin from ..tensor import Tensor -SupportedWeightsFormat = Literal[ - "keras_hdf5", - "onnx", - "pytorch_state_dict", - "tensorflow_saved_model_bundle", - "torchscript", -] - # Known weight formats in order of priority # First match wins DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER: Tuple[SupportedWeightsFormat, ...] = ( @@ -192,7 +184,9 @@ def create( def load(self, *, devices: Optional[Sequence[str]] = None) -> None: warnings.warn("Deprecated. ModelAdapter is loaded on initialization") - def forward(self, input_sample: Sample) -> Sample: + def forward( + self, input_sample: Union[Sample, SampleBlock, SampleBlockWithOrigin] + ) -> Sample: """ Run forward pass of model to get model predictions @@ -226,7 +220,11 @@ def forward(self, input_sample: Sample) -> Sample: if out is not None }, stat=input_sample.stat, - id=input_sample.id, + id=( + input_sample.id + if isinstance(input_sample, Sample) + else input_sample.sample_id + ), ) @abstractmethod diff --git a/bioimageio/core/backends/keras_backend.py b/bioimageio/core/backends/keras_backend.py index 6ca603ad..ef65b8ea 100644 --- a/bioimageio/core/backends/keras_backend.py +++ b/bioimageio/core/backends/keras_backend.py @@ -4,7 +4,7 @@ from loguru import logger from numpy.typing import NDArray -from bioimageio.spec._internal.io_utils import download +from bioimageio.spec._internal.io import download from bioimageio.spec._internal.type_guards import is_list, is_tuple from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.model.v0_5 import Version diff --git a/bioimageio/core/backends/onnx_backend.py b/bioimageio/core/backends/onnx_backend.py index 858b4cc1..901deb5e 100644 --- a/bioimageio/core/backends/onnx_backend.py +++ b/bioimageio/core/backends/onnx_backend.py @@ -8,9 +8,7 @@ from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.utils import download -from ..digest_spec import get_axes_infos from ..model_adapters import ModelAdapter -from ..tensor import Tensor class ONNXModelAdapter(ModelAdapter): diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 49700b43..b225fe8c 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -101,16 +101,16 @@ def descr_id(self) -> str: """ if isinstance(self.descr, InvalidDescr): return str(getattr(self.descr, "id", getattr(self.descr, "name"))) - else: - return str( - ( - (bio_config := self.descr.config.get("bioimageio", {})) - and isinstance(bio_config, dict) - and bio_config.get("nickname") - ) - or self.descr.id - or self.descr.name - ) + + nickname = None + if ( + isinstance(self.descr.config, v0_5.Config) + and (bio_config := self.descr.config.bioimageio) + and bio_config.model_extra is not None + ): + nickname = bio_config.model_extra.get("nickname") + + return str(nickname or self.descr.id or self.descr.name) class ValidateFormatCmd(CmdBase, WithSource): diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 92d7ddbc..b4668bf8 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -6,18 +6,18 @@ from typing_extensions import Literal +from bioimageio.core.common import SupportedWeightsFormat from bioimageio.spec import ( InvalidDescr, ResourceDescr, save_bioimageio_package, save_bioimageio_package_as_folder, ) -from bioimageio.spec.model.v0_5 import WeightsFormat from ._resource_tests import test_description -WeightFormatArgAll = Literal[WeightsFormat, "all"] -WeightFormatArgAny = Literal[WeightsFormat, "any"] +WeightFormatArgAll = Literal[SupportedWeightsFormat, "all"] +WeightFormatArgAny = Literal[SupportedWeightsFormat, "any"] def test( diff --git a/bioimageio/core/common.py b/bioimageio/core/common.py index 5ce70c27..d36eb9c6 100644 --- a/bioimageio/core/common.py +++ b/bioimageio/core/common.py @@ -15,6 +15,15 @@ from bioimageio.spec.model import v0_5 +SupportedWeightsFormat = Literal[ + "keras_hdf5", + "onnx", + "pytorch_state_dict", + "tensorflow_saved_model_bundle", + "torchscript", +] + + DTypeStr = Literal[ "bool", "float32", diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index d80c6870..44044a92 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -8,7 +8,7 @@ import h5py import numpy as np -from imageio.v3 import imread, imwrite +from imageio.v3 import imread, imwrite # type: ignore from loguru import logger from numpy.typing import NDArray from pydantic import BaseModel, ConfigDict, TypeAdapter diff --git a/bioimageio/core/proc_ops.py b/bioimageio/core/proc_ops.py index d6d59092..7cc1574c 100644 --- a/bioimageio/core/proc_ops.py +++ b/bioimageio/core/proc_ops.py @@ -16,11 +16,10 @@ import xarray as xr from typing_extensions import Self, assert_never -from bioimageio.core.digest_spec import get_axes_infos from bioimageio.spec.model import v0_4, v0_5 from ._op_base import BlockedOperator, Operator -from .axis import AxisId, AxisInfo, PerAxis +from .axis import AxisId, PerAxis from .block import Block from .common import DTypeStr, MemberId from .sample import Sample, SampleBlock, SampleBlockWithOrigin diff --git a/bioimageio/core/stat_calculators.py b/bioimageio/core/stat_calculators.py index 6279289b..d969ae01 100644 --- a/bioimageio/core/stat_calculators.py +++ b/bioimageio/core/stat_calculators.py @@ -47,7 +47,7 @@ from .tensor import Tensor try: - import crick + import crick # pyright: ignore[reportMissingImports] except Exception: crick = None diff --git a/bioimageio/core/weight_converters/_add_weights.py b/bioimageio/core/weight_converters/_add_weights.py index ed541385..a651f39e 100644 --- a/bioimageio/core/weight_converters/_add_weights.py +++ b/bioimageio/core/weight_converters/_add_weights.py @@ -1,11 +1,10 @@ -from typing import Optional, Sequence +from typing import Optional from loguru import logger from pydantic import DirectoryPath from bioimageio.core._resource_tests import test_model from bioimageio.spec import load_model_description, save_bioimageio_package_as_folder -from bioimageio.spec._internal.types import AbsoluteTolerance, RelativeTolerance from bioimageio.spec.model.v0_5 import ModelDescr, WeightsFormat @@ -15,7 +14,6 @@ def increase_available_weight_formats( output_path: DirectoryPath, source_format: Optional[WeightsFormat] = None, target_format: Optional[WeightsFormat] = None, - devices: Sequence[str] = ("cpu",), ) -> ModelDescr: """Convert model weights to other formats and add them to the model description diff --git a/bioimageio/core/weight_converters/keras_to_tensorflow.py b/bioimageio/core/weight_converters/keras_to_tensorflow.py index 083bae5b..9670d2c2 100644 --- a/bioimageio/core/weight_converters/keras_to_tensorflow.py +++ b/bioimageio/core/weight_converters/keras_to_tensorflow.py @@ -7,14 +7,14 @@ import tensorflow from bioimageio.core.io import ensure_unzipped -from bioimageio.spec._internal.io_utils import download +from bioimageio.spec._internal.io import download from bioimageio.spec._internal.version_type import Version from bioimageio.spec.common import ZipPath from bioimageio.spec.model import v0_4, v0_5 try: # try to build the tf model with the keras import from tensorflow - from tensorflow import keras + from tensorflow import keras # type: ignore except Exception: # if the above fails try to export with the standalone keras import keras diff --git a/scripts/show_diff.py b/scripts/show_diff.py index 1b0163bb..8889437c 100644 --- a/scripts/show_diff.py +++ b/scripts/show_diff.py @@ -9,7 +9,7 @@ if __name__ == "__main__": rdf_source = "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/unet2d_nuclei_broad/v0_4_9.bioimageio.yaml" - local_source = Path(pooch.retrieve(rdf_source, None)) # type: ignore + local_source = Path(pooch.retrieve(rdf_source, None)) model_as_is = load_description(rdf_source, format_version="discover") model_latest = load_description(rdf_source, format_version="latest") print(model_latest.validation_summary) diff --git a/setup.py b/setup.py index 02f39f8d..ebaee737 100644 --- a/setup.py +++ b/setup.py @@ -63,7 +63,7 @@ "pre-commit", "pdoc", "psutil", # parallel pytest with 'pytest -n auto' - "pyright==1.1.391", + "pyright==1.1.393", "pytest-cov", "pytest-xdist", # parallel pytest "pytest", diff --git a/tests/test_prediction_pipeline.py b/tests/test_prediction_pipeline.py index a0a85f5d..08e9f094 100644 --- a/tests/test_prediction_pipeline.py +++ b/tests/test_prediction_pipeline.py @@ -2,12 +2,15 @@ from numpy.testing import assert_array_almost_equal +from bioimageio.core.common import SupportedWeightsFormat from bioimageio.spec import load_description from bioimageio.spec.model.v0_4 import ModelDescr as ModelDescr04 -from bioimageio.spec.model.v0_5 import ModelDescr, WeightsFormat +from bioimageio.spec.model.v0_5 import ModelDescr -def _test_prediction_pipeline(model_package: Path, weights_format: WeightsFormat): +def _test_prediction_pipeline( + model_package: Path, weights_format: SupportedWeightsFormat +): from bioimageio.core._prediction_pipeline import create_prediction_pipeline from bioimageio.core.digest_spec import get_test_inputs, get_test_outputs diff --git a/tests/test_prediction_pipeline_device_management.py b/tests/test_prediction_pipeline_device_management.py index 2dde4120..0d2ff9b7 100644 --- a/tests/test_prediction_pipeline_device_management.py +++ b/tests/test_prediction_pipeline_device_management.py @@ -3,11 +3,12 @@ import pytest from numpy.testing import assert_array_almost_equal +from bioimageio.core.common import SupportedWeightsFormat from bioimageio.spec.model.v0_4 import ModelDescr as ModelDescr04 -from bioimageio.spec.model.v0_5 import ModelDescr, WeightsFormat +from bioimageio.spec.model.v0_5 import ModelDescr -def _test_device_management(model_package: Path, weight_format: WeightsFormat): +def _test_device_management(model_package: Path, weight_format: SupportedWeightsFormat): import torch from bioimageio.core import load_description From 666e80cf72e570584274f2e54c4f03f71184facb Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 3 Feb 2025 16:36:13 +0100 Subject: [PATCH 063/187] fix stats for absence of batch axis --- bioimageio/core/stat_calculators.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/stat_calculators.py b/bioimageio/core/stat_calculators.py index d969ae01..efff5b63 100644 --- a/bioimageio/core/stat_calculators.py +++ b/bioimageio/core/stat_calculators.py @@ -26,6 +26,8 @@ from numpy.typing import NDArray from typing_extensions import assert_never +from bioimageio.spec.model.v0_5 import BATCH_AXIS_ID + from .axis import AxisId, PerAxis from .common import MemberId from .sample import Sample @@ -120,7 +122,7 @@ class MeanVarStdCalculator: def __init__(self, member_id: MemberId, axes: Optional[Sequence[AxisId]]): super().__init__() - self._axes = None if axes is None else tuple(axes) + self._axes = None if axes is None else tuple(map(AxisId, axes)) self._member_id = member_id self._n: int = 0 self._mean: Optional[Tensor] = None @@ -152,6 +154,9 @@ def compute( } def update(self, sample: Sample): + if self._axes is not None and BATCH_AXIS_ID not in self._axes: + return + tensor = sample.members[self._member_id].astype("float64", copy=False) mean_b = tensor.mean(dim=self._axes) assert mean_b.dtype == "float64" @@ -178,7 +183,11 @@ def update(self, sample: Sample): def finalize( self, ) -> Dict[Union[DatasetMean, DatasetVar, DatasetStd], MeasureValue]: - if self._mean is None: + if ( + self._axes is not None + and BATCH_AXIS_ID not in self._axes + or self._mean is None + ): return {} else: assert self._m2 is not None From 0d0127133d3ccd12fd740ad77bcf02e782d9e487 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 3 Feb 2025 16:36:49 +0100 Subject: [PATCH 064/187] split test_mean_var_std_calculator --- tests/test_stat_calculators.py | 41 ++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/tests/test_stat_calculators.py b/tests/test_stat_calculators.py index 0efe02d9..64141370 100644 --- a/tests/test_stat_calculators.py +++ b/tests/test_stat_calculators.py @@ -13,6 +13,9 @@ DatasetMean, DatasetStd, DatasetVar, + SampleMean, + SampleStd, + SampleVar, ) from bioimageio.core.tensor import Tensor @@ -31,13 +34,47 @@ def create_random_dataset(tid: MemberId, axes: Tuple[AxisId, ...]): @pytest.mark.parametrize( "axes", [ - None, (AxisId("x"), AxisId("y")), (AxisId("channel"), AxisId("y")), + ], +) +def test_sample_mean_var_std_calculator(axes: Optional[Tuple[AxisId, ...]]): + tid = MemberId("tensor") + d_axes = tuple(map(AxisId, ("batch", "channel", "x", "y"))) + data, ds = create_random_dataset(tid, d_axes) + expected_mean = data[0].mean(axes) + expected_var = data[0].var(axes) + expected_std = data[0].std(axes) + + calc = MeanVarStdCalculator(tid, axes=axes) + + actual = calc.compute(ds[0]) + actual_mean = actual[SampleMean(member_id=tid, axes=axes)] + actual_var = actual[SampleVar(member_id=tid, axes=axes)] + actual_std = actual[SampleStd(member_id=tid, axes=axes)] + + assert_allclose( + actual_mean if isinstance(actual_mean, (int, float)) else actual_mean.data, + expected_mean.data, + ) + assert_allclose( + actual_var if isinstance(actual_var, (int, float)) else actual_var.data, + expected_var.data, + ) + assert_allclose( + actual_std if isinstance(actual_std, (int, float)) else actual_std.data, + expected_std.data, + ) + + +@pytest.mark.parametrize( + "axes", + [ + None, (AxisId("batch"), AxisId("channel"), AxisId("x"), AxisId("y")), ], ) -def test_mean_var_std_calculator(axes: Optional[Tuple[AxisId, ...]]): +def test_dataset_mean_var_std_calculator(axes: Optional[Tuple[AxisId, ...]]): tid = MemberId("tensor") d_axes = tuple(map(AxisId, ("batch", "channel", "x", "y"))) data, ds = create_random_dataset(tid, d_axes) From 489d04e0524f38406156887f9ab6ce1167b00359 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 5 Feb 2025 19:11:17 +0100 Subject: [PATCH 065/187] default_ns -> blocksize_paramter and update docstrings --- bioimageio/core/_prediction_pipeline.py | 63 ++++++++++++++++++++----- bioimageio/core/axis.py | 1 + bioimageio/core/common.py | 11 +++++ bioimageio/core/prediction.py | 17 +++---- bioimageio/core/proc_setup.py | 2 +- 5 files changed, 70 insertions(+), 24 deletions(-) diff --git a/bioimageio/core/_prediction_pipeline.py b/bioimageio/core/_prediction_pipeline.py index 4ff79d8d..51afcf2a 100644 --- a/bioimageio/core/_prediction_pipeline.py +++ b/bioimageio/core/_prediction_pipeline.py @@ -18,7 +18,14 @@ from ._op_base import BlockedOperator from .axis import AxisId, PerAxis -from .common import Halo, MemberId, PerMember, SampleId, SupportedWeightsFormat +from .common import ( + BlocksizeParameter, + Halo, + MemberId, + PerMember, + SampleId, + SupportedWeightsFormat, +) from .digest_spec import ( get_block_transform, get_input_halo, @@ -42,7 +49,8 @@ class PredictionPipeline: """ Represents model computation including preprocessing and postprocessing - Note: Ideally use the PredictionPipeline as a context manager + Note: Ideally use the `PredictionPipeline` in a with statement + (as a context manager). """ def __init__( @@ -53,13 +61,20 @@ def __init__( preprocessing: List[Processing], postprocessing: List[Processing], model_adapter: ModelAdapter, - default_ns: Union[ - v0_5.ParameterizedSize_N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], - ] = 10, + default_ns: Optional[BlocksizeParameter] = None, + default_blocksize_parameter: BlocksizeParameter = 10, default_batch_size: int = 1, ) -> None: + """Use `create_prediction_pipeline` to create a `PredictionPipeline`""" super().__init__() + default_blocksize_parameter = default_ns or default_blocksize_parameter + if default_ns is not None: + warnings.warn( + "Argument `default_ns` is deprecated in favor of" + + " `default_blocksize_paramter` and will be removed soon." + ) + del default_ns + if model_description.run_mode: warnings.warn( f"Not yet implemented inference for run mode '{model_description.run_mode.name}'" @@ -87,7 +102,7 @@ def __init__( ) self._block_transform = get_block_transform(model_description) - self._default_ns = default_ns + self._default_blocksize_parameter = default_blocksize_parameter self._default_batch_size = default_batch_size self._input_ids = get_member_ids(model_description.inputs) @@ -214,7 +229,7 @@ def predict_sample_with_blocking( + " Consider using `predict_sample_with_fixed_blocking`" ) - ns = ns or self._default_ns + ns = ns or self._default_blocksize_parameter if isinstance(ns, int): ns = { (ipt.id, a.id): ns @@ -303,10 +318,8 @@ def create_prediction_pipeline( {} ), model_adapter: Optional[ModelAdapter] = None, - ns: Union[ - v0_5.ParameterizedSize_N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], - ] = 10, + ns: Optional[BlocksizeParameter] = None, + default_blocksize_parameter: BlocksizeParameter = 10, **deprecated_kwargs: Any, ) -> PredictionPipeline: """ @@ -316,9 +329,33 @@ def create_prediction_pipeline( * model prediction * computation of output statistics * postprocessing + + Args: + bioimageio_model: A bioimageio model description. + devices: (optional) + weight_format: deprecated in favor of **weights_format** + weights_format: (optional) Use a specific **weights_format** rather than + choosing one automatically. + A corresponding `bioimageio.core.model_adapters.ModelAdapter` will be + created to run inference with the **bioimageio_model**. + dataset_for_initial_statistics: (optional) If preprocessing steps require input + dataset statistics, **dataset_for_initial_statistics** allows you to + specifcy a dataset from which these statistics are computed. + keep_updating_initial_dataset_statistics: (optional) Set to `True` if you want + to update dataset statistics with each processed sample. + fixed_dataset_statistics: (optional) Allows you to specify a mapping of + `DatasetMeasure`s to precomputed `MeasureValue`s. + model_adapter: (optional) Allows you to use a custom **model_adapter** instead + of creating one according to the present/selected **weights_format**. + ns: deprecated in favor of **default_blocksize_parameter** + default_blocksize_parameter: Allows to control the default block size for + blockwise predictions, see `BlocksizeParameter`. + """ weights_format = weight_format or weights_format del weight_format + default_blocksize_parameter = ns or default_blocksize_parameter + del ns if deprecated_kwargs: warnings.warn( f"deprecated create_prediction_pipeline kwargs: {set(deprecated_kwargs)}" @@ -353,5 +390,5 @@ def dataset(): model_adapter=model_adapter, preprocessing=preprocessing, postprocessing=postprocessing, - default_ns=ns, + default_blocksize_parameter=default_blocksize_parameter, ) diff --git a/bioimageio/core/axis.py b/bioimageio/core/axis.py index 30c0d281..0b39045e 100644 --- a/bioimageio/core/axis.py +++ b/bioimageio/core/axis.py @@ -34,6 +34,7 @@ def _guess_axis_type(a: str): AxisId = v0_5.AxisId +"""An axis identifier, e.g. 'batch', 'channel', 'z', 'y', 'x'""" T = TypeVar("T") PerAxis = Mapping[AxisId, T] diff --git a/bioimageio/core/common.py b/bioimageio/core/common.py index d36eb9c6..9f939061 100644 --- a/bioimageio/core/common.py +++ b/bioimageio/core/common.py @@ -15,6 +15,8 @@ from bioimageio.spec.model import v0_5 +from .axis import AxisId + SupportedWeightsFormat = Literal[ "keras_hdf5", "onnx", @@ -100,6 +102,15 @@ class SliceInfo(NamedTuple): MemberId = v0_5.TensorId """ID of a `Sample` member, see `bioimageio.core.sample.Sample`""" +BlocksizeParameter = Union[ + v0_5.ParameterizedSize_N, + Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], +] +""" +Parameter to determine a concrete size for paramtrized axis sizes defined by +`bioimageio.spec.model.v0_5.ParameterizedSize`. +""" + T = TypeVar("T") PerMember = Mapping[MemberId, T] diff --git a/bioimageio/core/prediction.py b/bioimageio/core/prediction.py index 3ffec57f..a27451e7 100644 --- a/bioimageio/core/prediction.py +++ b/bioimageio/core/prediction.py @@ -22,7 +22,7 @@ from ._prediction_pipeline import PredictionPipeline, create_prediction_pipeline from .axis import AxisId -from .common import MemberId, PerMember +from .common import BlocksizeParameter, MemberId, PerMember from .digest_spec import create_sample_for_model from .io import save_sample from .sample import Sample @@ -36,12 +36,7 @@ def predict( ], inputs: Union[Sample, PerMember[Union[Tensor, xr.DataArray, NDArray[Any], Path]]], sample_id: Hashable = "sample", - blocksize_parameter: Optional[ - Union[ - v0_5.ParameterizedSize_N, - Mapping[Tuple[MemberId, AxisId], v0_5.ParameterizedSize_N], - ] - ] = None, + blocksize_parameter: Optional[BlocksizeParameter] = None, input_block_shape: Optional[Mapping[MemberId, Mapping[AxisId, int]]] = None, skip_preprocessing: bool = False, skip_postprocessing: bool = False, @@ -55,10 +50,12 @@ def predict( inputs: the input sample or the named input(s) for this model as a dictionary sample_id: the sample id. blocksize_parameter: (optional) Tile the input into blocks parametrized by - blocksize according to any parametrized axis sizes defined in the model RDF. - Note: For a predetermined, fixed block shape use `input_block_shape` + **blocksize_parameter** according to any parametrized axis sizes defined + by the **model**. + See `bioimageio.spec.model.v0_5.ParameterizedSize` for details. + Note: For a predetermined, fixed block shape use **input_block_shape**. input_block_shape: (optional) Tile the input sample tensors into blocks. - Note: Use `blocksize_parameter` for a parameterized block shape to + Note: Use **blocksize_parameter** for a parameterized block shape to run prediction independent of the exact block shape. skip_preprocessing: Flag to skip the model's preprocessing. skip_postprocessing: Flag to skip the model's postprocessing. diff --git a/bioimageio/core/proc_setup.py b/bioimageio/core/proc_setup.py index 89277da5..ab24752f 100644 --- a/bioimageio/core/proc_setup.py +++ b/bioimageio/core/proc_setup.py @@ -94,7 +94,7 @@ def setup_pre_and_postprocessing( ) -> PreAndPostprocessing: """ Get pre- and postprocessing operators for a `model` description. - userd in `bioimageio.core.create_prediction_pipeline""" + Used in `bioimageio.core.create_prediction_pipeline""" prep, post, prep_meas, post_meas = _prepare_setup_pre_and_postprocessing(model) missing_dataset_stats = { From f970c6b4cc3174272db369c6701f9295f1578f84 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 5 Feb 2025 19:12:33 +0100 Subject: [PATCH 066/187] deprecate tolerance arguments --- bioimageio/core/_resource_tests.py | 148 ++++++++++++++++++----------- tests/test_stat_calculators.py | 6 +- 2 files changed, 93 insertions(+), 61 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 7a8b9183..3a41593c 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -20,9 +20,8 @@ Union, ) -import numpy as np from loguru import logger -from typing_extensions import assert_never, get_args +from typing_extensions import NotRequired, TypedDict, Unpack, assert_never, get_args from bioimageio.spec import ( BioimageioCondaEnv, @@ -37,7 +36,11 @@ from bioimageio.spec._internal.common_nodes import ResourceDescrBase from bioimageio.spec._internal.io import is_yaml_value from bioimageio.spec._internal.io_utils import read_yaml, write_yaml -from bioimageio.spec._internal.types import AbsoluteTolerance, RelativeTolerance +from bioimageio.spec._internal.types import ( + AbsoluteTolerance, + MismatchedElementsPerMillion, + RelativeTolerance, +) from bioimageio.spec._internal.validation_context import validation_context_var from bioimageio.spec.common import BioimageioYamlContent, PermissiveFileSource, Sha256 from bioimageio.spec.model import v0_4, v0_5 @@ -51,12 +54,18 @@ from ._prediction_pipeline import create_prediction_pipeline from .axis import AxisId, BatchSize -from .common import SupportedWeightsFormat +from .common import MemberId, SupportedWeightsFormat from .digest_spec import get_test_inputs, get_test_outputs from .sample import Sample from .utils import VERSION +class DeprecatedKwargs(TypedDict): + absolute_tolerance: NotRequired[AbsoluteTolerance] + relative_tolerance: NotRequired[RelativeTolerance] + decimal: NotRequired[Optional[int]] + + def enable_determinism(mode: Literal["seed_only", "full"]): """Seed and configure ML frameworks for maximum reproducibility. May degrade performance. Only recommended for testing reproducibility! @@ -122,24 +131,20 @@ def test_model( source: Union[v0_4.ModelDescr, v0_5.ModelDescr, PermissiveFileSource], weight_format: Optional[SupportedWeightsFormat] = None, devices: Optional[List[str]] = None, - absolute_tolerance: AbsoluteTolerance = 1.5e-4, - relative_tolerance: RelativeTolerance = 1e-4, - decimal: Optional[int] = None, *, determinism: Literal["seed_only", "full"] = "seed_only", sha256: Optional[Sha256] = None, + **deprecated: Unpack[DeprecatedKwargs], ) -> ValidationSummary: """Test model inference""" return test_description( source, weight_format=weight_format, devices=devices, - absolute_tolerance=absolute_tolerance, - relative_tolerance=relative_tolerance, - decimal=decimal, determinism=determinism, expected_type="model", sha256=sha256, + **deprecated, ) @@ -154,9 +159,6 @@ def test_description( format_version: Union[Literal["discover", "latest"], str] = "discover", weight_format: Optional[SupportedWeightsFormat] = None, devices: Optional[Sequence[str]] = None, - absolute_tolerance: AbsoluteTolerance = 1.5e-4, - relative_tolerance: RelativeTolerance = 1e-4, - decimal: Optional[int] = None, determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, sha256: Optional[Sha256] = None, @@ -164,6 +166,7 @@ def test_description( Literal["currently-active", "as-described"], Path, BioimageioCondaEnv ] = ("currently-active"), run_command: Callable[[Sequence[str]], None] = default_run_command, + **deprecated: Unpack[DeprecatedKwargs], ) -> ValidationSummary: """Test a bioimage.io resource dynamically, e.g. prediction of test tensors for models. @@ -173,8 +176,6 @@ def test_description( Default: All weight formats present in **source**. devices: Devices to test with, e.g. 'cpu', 'cuda'. Default (may be weight format dependent): ['cuda'] if available, ['cpu'] otherwise. - absolute_tolerance: Maximum absolute tolerance of reproduced output tensors. - relative_tolerance: Maximum relative tolerance of reproduced output tensors. determinism: Modes to improve reproducibility of test outputs. runtime_env: (Experimental feature!) The Python environment to run the tests in - `"currently-active"`: Use active Python interpreter. @@ -191,12 +192,10 @@ def test_description( format_version=format_version, weight_format=weight_format, devices=devices, - absolute_tolerance=absolute_tolerance, - relative_tolerance=relative_tolerance, - decimal=decimal, determinism=determinism, expected_type=expected_type, sha256=sha256, + **deprecated, ) return rd.validation_summary @@ -224,10 +223,9 @@ def test_description( weight_format=weight_format, conda_env=conda_env, devices=devices, - absolute_tolerance=absolute_tolerance, - relative_tolerance=relative_tolerance, determinism=determinism, run_command=run_command, + **deprecated, ) @@ -238,10 +236,9 @@ def _test_in_env( weight_format: Optional[SupportedWeightsFormat], conda_env: Optional[BioimageioCondaEnv], devices: Optional[Sequence[str]], - absolute_tolerance: AbsoluteTolerance, - relative_tolerance: RelativeTolerance, determinism: Literal["seed_only", "full"], run_command: Callable[[Sequence[str]], None], + **deprecated: Unpack[DeprecatedKwargs], ) -> ValidationSummary: descr = load_description(source) @@ -263,11 +260,10 @@ def _test_in_env( working_dir=working_dir / all_present_wfs[0], weight_format=all_present_wfs[0], devices=devices, - absolute_tolerance=absolute_tolerance, - relative_tolerance=relative_tolerance, determinism=determinism, conda_env=conda_env, run_command=run_command, + **deprecated, ) for wf in all_present_wfs[1:]: additional_summary = _test_in_env( @@ -275,11 +271,10 @@ def _test_in_env( working_dir=working_dir / wf, weight_format=wf, devices=devices, - absolute_tolerance=absolute_tolerance, - relative_tolerance=relative_tolerance, determinism=determinism, conda_env=conda_env, run_command=run_command, + **deprecated, ) for d in additional_summary.details: # TODO: filter reduntant details; group details @@ -356,12 +351,10 @@ def load_description_and_test( format_version: Union[Literal["discover", "latest"], str] = "discover", weight_format: Optional[SupportedWeightsFormat] = None, devices: Optional[Sequence[str]] = None, - absolute_tolerance: AbsoluteTolerance = 1.5e-4, - relative_tolerance: RelativeTolerance = 1e-4, - decimal: Optional[int] = None, determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, sha256: Optional[Sha256] = None, + **deprecated: Unpack[DeprecatedKwargs], ) -> Union[ResourceDescr, InvalidDescr]: """Test RDF dynamically, e.g. model inference of test inputs""" if ( @@ -396,24 +389,9 @@ def load_description_and_test( else: weight_formats = [weight_format] - if decimal is None: - atol = absolute_tolerance - rtol = relative_tolerance - else: - warnings.warn( - "The argument `decimal` has been deprecated in favour of" - + " `relative_tolerance` and `absolute_tolerance`, with different" - + " validation logic, using `numpy.testing.assert_allclose, see" - + " 'https://numpy.org/doc/stable/reference/generated/" - + " numpy.testing.assert_allclose.html'. Passing a value for `decimal`" - + " will cause validation to revert to the old behaviour." - ) - atol = 1.5 * 10 ** (-decimal) - rtol = 0 - enable_determinism(determinism) for w in weight_formats: - _test_model_inference(rd, w, devices, atol, rtol) + _test_model_inference(rd, w, devices, **deprecated) if not isinstance(rd, v0_4.ModelDescr): _test_model_inference_parametrized(rd, w, devices) @@ -423,12 +401,49 @@ def load_description_and_test( return rd +def _get_tolerance( + model: Union[v0_4.ModelDescr, v0_5.ModelDescr], + wf: SupportedWeightsFormat, + m: MemberId, + **deprecated: Unpack[DeprecatedKwargs], +) -> Tuple[RelativeTolerance, AbsoluteTolerance, MismatchedElementsPerMillion]: + if isinstance(model, v0_5.ModelDescr): + applicable = v0_5.ReproducibilityTolerance() + for a in model.config.bioimageio.reproducibility_tolerance: + if (not a.weights_formats or wf in a.weights_formats) and ( + not a.output_ids or m in a.output_ids + ): + applicable = a + break + + rtol = applicable.relative_tolerance + atol = applicable.absolute_tolerance + mismatched_tol = applicable.mismatched_elements_per_million + elif (decimal := deprecated.get("decimal")) is not None: + warnings.warn( + "The argument `decimal` has been deprecated in favour of" + + " `relative_tolerance` and `absolute_tolerance`, with different" + + " validation logic, using `numpy.testing.assert_allclose, see" + + " 'https://numpy.org/doc/stable/reference/generated/" + + " numpy.testing.assert_allclose.html'. Passing a value for `decimal`" + + " will cause validation to revert to the old behaviour." + ) + atol = 1.5 * 10 ** (-decimal) + rtol = 0 + mismatched_tol = 0 + else: + atol = deprecated.get("absolute_tolerance", 0) + rtol = deprecated.get("relative_tolerance", 1e-3) + mismatched_tol = 0 + + return rtol, atol, mismatched_tol + + def _test_model_inference( model: Union[v0_4.ModelDescr, v0_5.ModelDescr], weight_format: SupportedWeightsFormat, devices: Optional[Sequence[str]], - atol: float, - rtol: float, + **deprecated: Unpack[DeprecatedKwargs], ) -> None: test_name = f"Reproduce test outputs from test inputs ({weight_format})" logger.debug("starting '{}'", test_name) @@ -448,20 +463,37 @@ def _test_model_inference( error = f"Expected {len(expected.members)} outputs, but got {len(results.members)}" else: - for m, exp in expected.members.items(): - res = results.members.get(m) - if res is None: + for m, expected in expected.members.items(): + actual = results.members.get(m) + if actual is None: error = "Output tensors for test case may not be None" break - try: - np.testing.assert_allclose( - res.data, - exp.data, - rtol=rtol, - atol=atol, + + rtol, atol, mismatched_tol = _get_tolerance( + model, wf=weight_format, m=m, **deprecated + ) + mismatched = (abs_diff := abs(actual - expected)) > atol + rtol * abs( + expected + ) + mismatched_elements = mismatched.sum().item() + if mismatched_elements > mismatched_tol: + r_max_idx = (r_diff := abs_diff / abs(expected)).argmax() + r_max = r_diff[r_max_idx].item() + r_actual = actual[r_max_idx].item() + r_expected = expected[r_max_idx].item() + a_max_idx = abs_diff.argmax() + a_max = abs_diff[a_max_idx].item() + a_actual = actual[a_max_idx].item() + a_expected = expected[a_max_idx].item() + error = ( + f"Output '{m}' disagrees with {mismatched_elements} of" + + f" {expected.size} expected values." + + f"\n Max relative difference: {r_max}" + + f" (= |{r_actual} - {r_expected}|/|{r_expected}|)" + + f" at {r_max_idx}" + + f"\n Max absolute difference: {a_max}" + + f" (= |{a_actual} - {a_expected}|) at {a_max_idx}" ) - except AssertionError as e: - error = f"Output and expected output disagree:\n {e}" break except Exception as e: if validation_context_var.get().raise_errors: diff --git a/tests/test_stat_calculators.py b/tests/test_stat_calculators.py index 64141370..55ea1fec 100644 --- a/tests/test_stat_calculators.py +++ b/tests/test_stat_calculators.py @@ -42,9 +42,9 @@ def test_sample_mean_var_std_calculator(axes: Optional[Tuple[AxisId, ...]]): tid = MemberId("tensor") d_axes = tuple(map(AxisId, ("batch", "channel", "x", "y"))) data, ds = create_random_dataset(tid, d_axes) - expected_mean = data[0].mean(axes) - expected_var = data[0].var(axes) - expected_std = data[0].std(axes) + expected_mean = data[0:1].mean(axes) + expected_var = data[0:1].var(axes) + expected_std = data[0:1].std(axes) calc = MeanVarStdCalculator(tid, axes=axes) From c775546214c7a47d0a86eb996a4417ef7f04d7b0 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 5 Feb 2025 20:14:09 +0100 Subject: [PATCH 067/187] sort Tensor methods and add argmax() and item() --- bioimageio/core/tensor.py | 68 +++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/bioimageio/core/tensor.py b/bioimageio/core/tensor.py index 57148058..5f76cd6e 100644 --- a/bioimageio/core/tensor.py +++ b/bioimageio/core/tensor.py @@ -186,9 +186,15 @@ def dims(self): # TODO: rename to `axes`? return cast(Tuple[AxisId, ...], self._data.dims) @property - def tagged_shape(self): - """(alias for `sizes`) Ordered, immutable mapping from axis ids to lengths.""" - return self.sizes + def dtype(self) -> DTypeStr: + dt = str(self.data.dtype) # pyright: ignore[reportUnknownArgumentType] + assert dt in get_args(DTypeStr) + return dt # pyright: ignore[reportReturnType] + + @property + def ndim(self): + """Number of tensor dimensions.""" + return self._data.ndim @property def shape_tuple(self): @@ -203,26 +209,21 @@ def size(self): """ return self._data.size - def sum(self, dim: Optional[Union[AxisId, Sequence[AxisId]]] = None) -> Self: - """Reduce this Tensor's data by applying sum along some dimension(s).""" - return self.__class__.from_xarray(self._data.sum(dim=dim)) - - @property - def ndim(self): - """Number of tensor dimensions.""" - return self._data.ndim - - @property - def dtype(self) -> DTypeStr: - dt = str(self.data.dtype) # pyright: ignore[reportUnknownArgumentType] - assert dt in get_args(DTypeStr) - return dt # pyright: ignore[reportReturnType] - @property def sizes(self): """Ordered, immutable mapping from axis ids to axis lengths.""" return cast(Mapping[AxisId, int], self.data.sizes) + @property + def tagged_shape(self): + """(alias for `sizes`) Ordered, immutable mapping from axis ids to lengths.""" + return self.sizes + + def argmax(self) -> Mapping[AxisId, int]: + ret = self._data.argmax(...) + assert isinstance(ret, dict) + return {cast(AxisId, k): cast(int, v.item()) for k, v in ret.items()} + def astype(self, dtype: DTypeStr, *, copy: bool = False): """Return tensor cast to `dtype` @@ -282,14 +283,23 @@ def crop_to( def expand_dims(self, dims: Union[Sequence[AxisId], PerAxis[int]]) -> Self: return self.__class__.from_xarray(self._data.expand_dims(dims=dims)) - def mean(self, dim: Optional[Union[AxisId, Sequence[AxisId]]] = None) -> Self: - return self.__class__.from_xarray(self._data.mean(dim=dim)) + def item( + self, + key: Union[ + None, SliceInfo, slice, int, PerAxis[Union[SliceInfo, slice, int]] + ] = None, + ): + """Copy a tensor element to a standard Python scalar and return it.""" + if key is None: + ret = self._data.item() + else: + ret = self[key]._data.item() - def std(self, dim: Optional[Union[AxisId, Sequence[AxisId]]] = None) -> Self: - return self.__class__.from_xarray(self._data.std(dim=dim)) + assert isinstance(ret, (bool, float, int)) + return ret - def var(self, dim: Optional[Union[AxisId, Sequence[AxisId]]] = None) -> Self: - return self.__class__.from_xarray(self._data.var(dim=dim)) + def mean(self, dim: Optional[Union[AxisId, Sequence[AxisId]]] = None) -> Self: + return self.__class__.from_xarray(self._data.mean(dim=dim)) def pad( self, @@ -405,6 +415,13 @@ def resize_to( return tensor + def std(self, dim: Optional[Union[AxisId, Sequence[AxisId]]] = None) -> Self: + return self.__class__.from_xarray(self._data.std(dim=dim)) + + def sum(self, dim: Optional[Union[AxisId, Sequence[AxisId]]] = None) -> Self: + """Reduce this Tensor's data by applying sum along some dimension(s).""" + return self.__class__.from_xarray(self._data.sum(dim=dim)) + def transpose( self, axes: Sequence[AxisId], @@ -423,6 +440,9 @@ def transpose( # transpose to the correct axis order return self.__class__.from_xarray(array.transpose(*axes)) + def var(self, dim: Optional[Union[AxisId, Sequence[AxisId]]] = None) -> Self: + return self.__class__.from_xarray(self._data.var(dim=dim)) + @classmethod def _interprete_array_wo_known_axes(cls, array: NDArray[Any]): ndim = array.ndim From 05af735711a2fb98a51e376b7ba393d212f8dafb Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 5 Feb 2025 22:17:08 +0100 Subject: [PATCH 068/187] remove decimal argument --- README.md | 2 +- bioimageio/core/commands.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 9985207c..aaecc98e 100644 --- a/README.md +++ b/README.md @@ -379,7 +379,7 @@ The model specification and its validation tools can be found at Date: Thu, 6 Feb 2025 09:32:29 +0100 Subject: [PATCH 069/187] update conda test build --- .github/workflows/build.yaml | 44 ++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 3ed0f5df..aac82160 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -221,27 +221,33 @@ jobs: BIOIMAGEIO_CACHE_PATH: bioimageio_cache conda-build: - runs-on: ubuntu-latest needs: test-spec-conda + runs-on: ubuntu-latest steps: - - name: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Install Conda environment with Micromamba - uses: mamba-org/setup-micromamba@v1 - with: - cache-downloads: true - cache-environment: true - environment-name: build-env - condarc: | - channels: - - conda-forge - create-args: | - boa - - name: linux conda build - run: | - conda mambabuild -c conda-forge conda-recipe + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + auto-activate-base: true + activate-environment: "" + channel-priority: strict + miniforge-version: latest + conda-solver: libmamba + - name: install common conda dependencies + run: conda install -n base -c conda-forge conda-build -y + - uses: actions/cache@v4 + with: + path: | + pkgs/noarch + pkgs/channeldata.json + key: ${{ github.sha }}-packages + - name: linux conda build test + shell: bash -l {0} + run: | + mkdir -p ./pkgs/noarch + conda-build -c conda-forge conda-recipe --no-test --output-folder ./pkgs docs: needs: [test-spec-main] From d13067ef8bc9494c8c3c40966f31ceaa1e59179a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 6 Feb 2025 09:33:29 +0100 Subject: [PATCH 070/187] remove decimal cli arg --- bioimageio/core/cli.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index b225fe8c..34a7678d 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -131,9 +131,6 @@ class TestCmd(CmdBase, WithSource): devices: Optional[Union[str, Sequence[str]]] = None """Device(s) to use for testing""" - decimal: int = 4 - """Precision for numerical comparisons""" - runtime_env: Union[Literal["currently-active", "as-described"], Path] = Field( "currently-active", alias="runtime-env" ) @@ -158,7 +155,6 @@ def run(self): self.descr, weight_format=self.weight_format, devices=self.devices, - decimal=self.decimal, summary_path=self.summary_path, runtime_env=self.runtime_env, determinism=self.determinism, From d3599f36ddd9489525c114718c84724813a703ee Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 6 Feb 2025 09:34:00 +0100 Subject: [PATCH 071/187] fix mismatched elements per million --- bioimageio/core/_resource_tests.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 3a41593c..6cd0bcf5 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -66,6 +66,7 @@ class DeprecatedKwargs(TypedDict): decimal: NotRequired[Optional[int]] +# TODO: avoid unnecessary imports in enable_determinism def enable_determinism(mode: Literal["seed_only", "full"]): """Seed and configure ML frameworks for maximum reproducibility. May degrade performance. Only recommended for testing reproducibility! @@ -476,7 +477,7 @@ def _test_model_inference( expected ) mismatched_elements = mismatched.sum().item() - if mismatched_elements > mismatched_tol: + if mismatched_elements / expected.size > mismatched_tol / 1e6: r_max_idx = (r_diff := abs_diff / abs(expected)).argmax() r_max = r_diff[r_max_idx].item() r_actual = actual[r_max_idx].item() @@ -488,11 +489,11 @@ def _test_model_inference( error = ( f"Output '{m}' disagrees with {mismatched_elements} of" + f" {expected.size} expected values." - + f"\n Max relative difference: {r_max}" - + f" (= |{r_actual} - {r_expected}|/|{r_expected}|)" + + f"\n Max relative difference: {r_max:.2e}" + + rf" (= \|{r_actual:.2e} - {r_expected:.2e}\|/\|{r_expected:.2e}\|)" + f" at {r_max_idx}" - + f"\n Max absolute difference: {a_max}" - + f" (= |{a_actual} - {a_expected}|) at {a_max_idx}" + + f"\n Max absolute difference: {a_max:.2e}" + + rf" (= \|{a_actual:.2e} - {a_expected:.2e}\|) at {a_max_idx}" ) break except Exception as e: From c171ab0c1b8068543640da1f7599adac8c0b9a80 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 11:50:57 +0100 Subject: [PATCH 072/187] fix axis id conversion --- bioimageio/core/proc_ops.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/proc_ops.py b/bioimageio/core/proc_ops.py index 7cc1574c..f7454b1c 100644 --- a/bioimageio/core/proc_ops.py +++ b/bioimageio/core/proc_ops.py @@ -51,11 +51,16 @@ def _convert_axis_ids( if mode == "per_sample": ret = [] elif mode == "per_dataset": - ret = [AxisId("b")] + ret = [v0_5.BATCH_AXIS_ID] else: assert_never(mode) - ret.extend([AxisId(a) for a in axes]) + ret.extend( + [ + AxisId(v0_5._AXIS_ID_MAP.get(a, a)) # pyright: ignore[reportPrivateUsage] + for a in axes + ] + ) return tuple(ret) From 8d76a810314d083377d3f179264e099c8a8dbfc1 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 13:17:56 +0100 Subject: [PATCH 073/187] add UpdateFormatCmd --- bioimageio/core/cli.py | 56 ++++++++++++++++++++++++++++++++++++- bioimageio/core/commands.py | 5 ++++ tests/conftest.py | 14 +++++++++- tests/test_cli.py | 15 ++++++++-- 4 files changed, 86 insertions(+), 4 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 34a7678d..19e7c8e9 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -11,6 +11,7 @@ from argparse import RawTextHelpFormatter from difflib import SequenceMatcher from functools import cached_property +from io import StringIO from pathlib import Path from pprint import pformat, pprint from typing import ( @@ -28,6 +29,7 @@ Union, ) +import rich.markdown from loguru import logger from pydantic import BaseModel, Field, model_validator from pydantic_settings import ( @@ -44,8 +46,12 @@ from tqdm import tqdm from typing_extensions import assert_never +from bioimageio.core.weight_converters._add_weights import ( + increase_available_weight_formats, +) from bioimageio.spec import AnyModelDescr, InvalidDescr, load_description from bioimageio.spec._internal.io_basics import ZipPath +from bioimageio.spec._internal.io_utils import write_yaml from bioimageio.spec._internal.types import NotEmpty from bioimageio.spec.dataset import DatasetDescr from bioimageio.spec.model import ModelDescr, v0_4, v0_5 @@ -57,6 +63,7 @@ WeightFormatArgAny, package, test, + update_format, validate_format, ) from .common import MemberId, SampleId @@ -221,6 +228,23 @@ def _get_stat( return stat +class UpdateFormatCmd(CmdBase, WithSource): + """Update the metadata format""" + + path: Optional[Path] = Field(None, alias="output-path") + """save updated RDF to this path""" + + def run(self): + updated = update_format(self.descr, output_path=self.path) + updated_stream = StringIO() + write_yaml(updated, updated_stream) + updated_md = f"```yaml\n{updated_stream.getvalue()}\n```" + + rich_markdown = rich.markdown.Markdown(updated_md) + console = rich.console.Console() + console.print(rich_markdown) + + class PredictCmd(CmdBase, WithSource): """Run inference on your data with a bioimage.io model.""" @@ -563,6 +587,20 @@ def input_dataset(stat: Stat): save_sample(sp_out, sample_out) +class IncreaseWeightFormatsCmd(CmdBase, WithSource): + path: CliPositionalArg[Path] + """The path to write the updated model description to.""" + + def run(self): + model_descr = ensure_description_is_model(self.descr) + if isinstance(model_descr, v0_4.ModelDescr): + raise TypeError( + f"model format {model_descr.format_version} not supported." + + " Please update the model first." + ) + _ = increase_available_weight_formats(model_descr, output_path=self.path) + + JSON_FILE = "bioimageio-cli.json" YAML_FILE = "bioimageio-cli.yaml" @@ -594,6 +632,15 @@ class Bioimageio( predict: CliSubCommand[PredictCmd] "Predict with a model resource" + update_format: CliSubCommand[UpdateFormatCmd] = Field(alias="update-format") + """Update the metadata format""" + + increase_weight_formats: CliSubCommand[IncreaseWeightFormatsCmd] = Field( + alias="incease-weight-formats" + ) + """Add additional weights to the model descriptions converted from available + formats to improve deployability.""" + @classmethod def settings_customise_sources( cls, @@ -631,7 +678,14 @@ def run(self): "executing CLI command:\n{}", pformat({k: v for k, v in self.model_dump().items() if v is not None}), ) - cmd = self.validate_format or self.test or self.package or self.predict + cmd = ( + self.validate_format + or self.test + or self.package + or self.predict + or self.update_format + or self.increase_weight_formats + ) assert cmd is not None cmd.run() diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index c1481d6b..3b385452 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -13,6 +13,7 @@ save_bioimageio_package, save_bioimageio_package_as_folder, ) +from bioimageio.spec import update_format as update_format from ._resource_tests import test_description @@ -100,3 +101,7 @@ def package( weights_priority_order=weights_priority_order, ) return 0 + + +# def update_format(descr: ResourceDescr, path: Path): +# update_format_func() diff --git a/tests/conftest.py b/tests/conftest.py index cb78bfe9..cfd76b64 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -67,7 +67,7 @@ ), "unet2d_expand_output_shape": ( "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" - "unet2d_nuclei_broad/expand_output_shape_v0_4.bioimageio.yaml" + "unet2d_nuclei_broad/expand_output_shape.bioimageio.yaml" ), "unet2d_fixed_shape": ( "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" @@ -85,6 +85,10 @@ "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" "unet2d_multi_tensor/v0_4.bioimageio.yaml" ), + "unet2d_nuclei_broad_model_old": ( + "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" + "unet2d_nuclei_broad/v0_4_9.bioimageio.yaml" + ), "unet2d_nuclei_broad_model": ( "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" "unet2d_nuclei_broad/bioimageio.yaml" @@ -241,6 +245,14 @@ def unet2d_nuclei_broad_model(request: FixtureRequest): return MODEL_SOURCES[request.param] +# written as model group to automatically skip on missing torch +@fixture( + scope="session", params=[] if skip_torch else ["unet2d_nuclei_broad_model_old"] +) +def unet2d_nuclei_broad_model_old(request: FixtureRequest): + return MODEL_SOURCES[request.param] + + # written as model group to automatically skip on missing torch @fixture(scope="session", params=[] if skip_torch else ["unet2d_diff_output_shape"]) def unet2d_diff_output_shape(request: FixtureRequest): diff --git a/tests/test_cli.py b/tests/test_cli.py index e0828ac6..437559a7 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -36,11 +36,22 @@ def run_subprocess( ], ["test", "unet2d_nuclei_broad_model"], ["predict", "--example", "unet2d_nuclei_broad_model"], + ["update-format", "unet2d_path_old_version"], ], ) -def test_cli(args: List[str], unet2d_nuclei_broad_model: str): +def test_cli( + args: List[str], unet2d_nuclei_broad_model: str, unet2d_nuclei_broad_model_old: str +): resolved_args = [ - str(unet2d_nuclei_broad_model) if arg == "unet2d_nuclei_broad_model" else arg + ( + unet2d_nuclei_broad_model + if arg == "unet2d_nuclei_broad_model" + else ( + unet2d_nuclei_broad_model_old + if arg == "unet2d_nuclei_broad_model_old" + else arg + ) + ) for arg in args ] ret = run_subprocess(["bioimageio", *resolved_args]) From cb477337ebedfe4156776b8ad620fa24385b187e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 13:18:46 +0100 Subject: [PATCH 074/187] fix load_torch_model --- bioimageio/core/backends/pytorch_backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/backends/pytorch_backend.py b/bioimageio/core/backends/pytorch_backend.py index a7fecfb7..459d2904 100644 --- a/bioimageio/core/backends/pytorch_backend.py +++ b/bioimageio/core/backends/pytorch_backend.py @@ -113,7 +113,7 @@ def load_torch_model( network = arch(**model_kwargs) if not isinstance(network, nn.Module): raise ValueError( - f"calling {weight_spec.architecture.callable} did not return a torch.nn.Module" + f"calling {weight_spec.architecture.callable_name if isinstance(weight_spec.architecture, (v0_4.CallableFromFile, v0_4.CallableFromDepencency)) else weight_spec.architecture.callable} did not return a torch.nn.Module" ) if load_state or devices: @@ -153,7 +153,7 @@ def get_devices( ) -> List[torch.device]: if not devices: torch_devices = [ - (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")) + torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") ] else: torch_devices = [torch.device(d) for d in devices] From 6a6d6a23c37310e4603558b61ad3466207560623 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 13:19:25 +0100 Subject: [PATCH 075/187] add logging to increase_available_weight_formats --- .../core/weight_converters/_add_weights.py | 17 ++++++++++++++--- tests/test_cli.py | 1 + 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/weight_converters/_add_weights.py b/bioimageio/core/weight_converters/_add_weights.py index a651f39e..b9837ab4 100644 --- a/bioimageio/core/weight_converters/_add_weights.py +++ b/bioimageio/core/weight_converters/_add_weights.py @@ -14,7 +14,7 @@ def increase_available_weight_formats( output_path: DirectoryPath, source_format: Optional[WeightsFormat] = None, target_format: Optional[WeightsFormat] = None, -) -> ModelDescr: +) -> Optional[ModelDescr]: """Convert model weights to other formats and add them to the model description Args: @@ -24,6 +24,10 @@ def increase_available_weight_formats( target_format: convert to a specific weights format. Default: attempt to convert to any missing format. devices: Devices that may be used during conversion. + + Returns: + - An updated model description if any converted weights were added. + - `None` if no conversion was possible. """ if not isinstance(model_descr, ModelDescr): raise TypeError(type(model_descr)) @@ -48,6 +52,8 @@ def increase_available_weight_formats( else: missing = {target_format} + originally_missing = set(missing) + if "pytorch_state_dict" in available and "onnx" in missing: from .pytorch_to_onnx import convert @@ -86,5 +92,10 @@ def increase_available_weight_formats( + " if you would like bioimageio.core to support a particular conversion." ) - test_model(model_descr).display() - return model_descr + if originally_missing == missing: + logger.warning("failed to add any converted weights") + return None + else: + logger.info(f"added weights formats {originally_missing - missing}") + test_model(model_descr).display() + return model_descr diff --git a/tests/test_cli.py b/tests/test_cli.py index 437559a7..4d98de70 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -37,6 +37,7 @@ def run_subprocess( ["test", "unet2d_nuclei_broad_model"], ["predict", "--example", "unet2d_nuclei_broad_model"], ["update-format", "unet2d_path_old_version"], + ["increase-weight-formats", "unet2d_nuclei_broad_model"], ], ) def test_cli( From ee310fa25c398dd0cc29487b4da04db39b8ae2ce Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 13:20:24 +0100 Subject: [PATCH 076/187] bump polite-pig --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index cfd76b64..a566bb28 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -44,7 +44,7 @@ # TODO: use models from new collection on S3 MODEL_SOURCES: Dict[str, str] = { - "hpa_densenet": "polite-pig/1", + "hpa_densenet": "polite-pig/1.1", "stardist": ( "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models" "/stardist_example_model/v0_4.bioimageio.yaml" From 19e2092b338e5803f417a7aa6df0485d81150d80 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 14:13:36 +0100 Subject: [PATCH 077/187] do not use pytest-xdist for cleaner test logs --- dev/env-py38.yaml | 2 -- dev/env-tf.yaml | 2 -- dev/env-wo-python.yaml | 2 -- dev/env.yaml | 2 -- pyproject.toml | 2 +- setup.py | 37 ++++++++++++++++++------------------- 6 files changed, 19 insertions(+), 28 deletions(-) diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 148c9668..d29201e8 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -8,7 +8,6 @@ dependencies: - bioimageio.spec>=0.5.3.6 - black - crick # uncommented - - filelock - h5py - imagecodecs - imageio>=2.5 @@ -30,7 +29,6 @@ dependencies: - pyright - pytest - pytest-cov - - pytest-xdist - python=3.8 # changed - pytorch>=2.1,<3 - requests diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index 61e00c41..af054496 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -8,7 +8,6 @@ dependencies: - bioimageio.spec>=0.5.3.6 - black # - crick # currently requires python<=3.9 - - filelock - h5py - imagecodecs - imageio>=2.5 @@ -30,7 +29,6 @@ dependencies: - pyright - pytest - pytest-cov - - pytest-xdist # - python=3.9 # removed # - pytorch>=2.1,<3 # removed - requests diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index cd72571e..3308f64a 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -8,7 +8,6 @@ dependencies: - bioimageio.spec>=0.5.3.6 - black # - crick # currently requires python<=3.9 - - filelock - h5py - imagecodecs - imageio>=2.5 @@ -30,7 +29,6 @@ dependencies: - pyright - pytest - pytest-cov - - pytest-xdist # - python=3.9 # removed - pytorch>=2.1,<3 - requests diff --git a/dev/env.yaml b/dev/env.yaml index 8fed3b25..db12748a 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -7,7 +7,6 @@ dependencies: - bioimageio.spec>=0.5.3.6 - black # - crick # currently requires python<=3.9 - - filelock - h5py - imagecodecs - imageio>=2.5 @@ -29,7 +28,6 @@ dependencies: - pyright - pytest - pytest-cov - - pytest-xdist - python=3.12 - pytorch>=2.1,<3 - requests diff --git a/pyproject.toml b/pyproject.toml index 98bf386e..c7dae606 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ typeCheckingMode = "strict" useLibraryCodeForTypes = true [tool.pytest.ini_options] -addopts = "--cov=bioimageio --cov-report=xml --cov-append -n 0 --capture=no --doctest-modules --failed-first" +addopts = "--cov=bioimageio --cov-report=xml --cov-append --capture=no --doctest-modules --failed-first" [tool.ruff] line-length = 88 diff --git a/setup.py b/setup.py index ebaee737..6e3b25d2 100644 --- a/setup.py +++ b/setup.py @@ -49,25 +49,24 @@ "pytorch": (pytorch_deps := ["torch>=1.6,<3", "torchvision", "keras>=3.0,<4"]), "tensorflow": ["tensorflow", "keras>=2.15,<4"], "onnx": ["onnxruntime"], - "dev": pytorch_deps - + [ - "black", - # "crick", # currently requires python<=3.9 - "filelock", - "jupyter", - "jupyter-black", - "matplotlib", - "onnx", - "onnxruntime", - "packaging>=17.0", - "pre-commit", - "pdoc", - "psutil", # parallel pytest with 'pytest -n auto' - "pyright==1.1.393", - "pytest-cov", - "pytest-xdist", # parallel pytest - "pytest", - ], + "dev": ( + pytorch_deps + + [ + "black", + # "crick", # currently requires python<=3.9 + "jupyter", + "jupyter-black", + "matplotlib", + "onnx", + "onnxruntime", + "packaging>=17.0", + "pre-commit", + "pdoc", + "pyright==1.1.393", + "pytest-cov", + "pytest", + ] + ), }, project_urls={ "Bug Reports": "https://github.com/bioimage-io/core-bioimage-io-python/issues", From 6ec64519cf4d106d645cf284ad88b594d849e125 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 14:15:44 +0100 Subject: [PATCH 078/187] use summary.display() for nicer test logs --- tests/conftest.py | 6 +++--- tests/test_any_model_fixture.py | 2 +- tests/test_resource_tests.py | 2 +- tests/test_weight_converters.py | 12 ++++++------ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index a566bb28..5a75e679 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,10 @@ from __future__ import annotations import subprocess -import warnings from itertools import chain from typing import Dict, List +from loguru import logger from pytest import FixtureRequest, fixture from bioimageio.spec import __version__ as bioimageio_spec_version @@ -13,7 +13,7 @@ import torch torch_version = tuple(map(int, torch.__version__.split(".")[:2])) - warnings.warn(f"detected torch version {torch.__version__}") + logger.warning("detected torch version {}", torch.__version__) except ImportError: torch = None torch_version = None @@ -40,7 +40,7 @@ skip_tensorflow = tensorflow is None -warnings.warn(f"testing with bioimageio.spec {bioimageio_spec_version}") +logger.warning("testing with bioimageio.spec {}", bioimageio_spec_version) # TODO: use models from new collection on S3 MODEL_SOURCES: Dict[str, str] = { diff --git a/tests/test_any_model_fixture.py b/tests/test_any_model_fixture.py index a4cc1bce..92701109 100644 --- a/tests/test_any_model_fixture.py +++ b/tests/test_any_model_fixture.py @@ -3,4 +3,4 @@ def test_model(any_model: str): summary = load_description_and_validate_format_only(any_model) - assert summary.status == "passed", summary.format() + assert summary.status == "passed", summary.display() diff --git a/tests/test_resource_tests.py b/tests/test_resource_tests.py index ce5a6fe8..342dfc78 100644 --- a/tests/test_resource_tests.py +++ b/tests/test_resource_tests.py @@ -41,7 +41,7 @@ def test_test_model(any_model: str): with ValidationContext(raise_errors=True): summary = test_model(any_model) - assert summary.status == "passed", summary.format() + assert summary.status == "passed", summary.display() def test_loading_description_multiple_times(unet2d_nuclei_broad_model: str): diff --git a/tests/test_weight_converters.py b/tests/test_weight_converters.py index 24d2b9cb..67d662fc 100644 --- a/tests/test_weight_converters.py +++ b/tests/test_weight_converters.py @@ -21,7 +21,7 @@ def test_pytorch_to_torchscript(any_torch_model, tmp_path): assert ret_val.source == out_path model_descr.weights.torchscript = ret_val summary = test_model(model_descr, weight_format="torchscript") - assert summary.status == "passed", summary.format() + assert summary.status == "passed", summary.display() def test_pytorch_to_onnx(convert_to_onnx, tmp_path): @@ -42,7 +42,7 @@ def test_pytorch_to_onnx(convert_to_onnx, tmp_path): model_descr.weights.onnx = ret_val summary = test_model(model_descr, weight_format="onnx") - assert summary.status == "passed", summary.format() + assert summary.status == "passed", summary.display() def test_keras_to_tensorflow(any_keras_model: Path, tmp_path: Path): @@ -57,7 +57,7 @@ def test_keras_to_tensorflow(any_keras_model: Path, tmp_path: Path): model_descr.weights.keras = ret_val summary = test_model(model_descr, weight_format="keras_hdf5") - assert summary.status == "passed", summary.format() + assert summary.status == "passed", summary.display() @pytest.mark.skip() @@ -78,7 +78,7 @@ def test_keras_to_tensorflow_zipped(any_keras_model: Path, tmp_path: Path): model_descr.weights.keras = ret_val summary = test_model(model_descr, weight_format="keras_hdf5") - assert summary.status == "passed", summary.format() + assert summary.status == "passed", summary.display() # TODO: add tensorflow_to_keras converter @@ -94,7 +94,7 @@ def test_keras_to_tensorflow_zipped(any_keras_model: Path, tmp_path: Path): # model_descr.weights.keras = ret_val # summary = test_model(model_descr, weight_format="keras_hdf5") -# assert summary.status == "passed", summary.format() +# assert summary.status == "passed", summary.display() # @pytest.mark.skip() @@ -115,4 +115,4 @@ def test_keras_to_tensorflow_zipped(any_keras_model: Path, tmp_path: Path): # model_descr.weights.keras = ret_val # summary = test_model(model_descr, weight_format="keras_hdf5") -# assert summary.status == "passed", summary.format() +# assert summary.status == "passed", summary.display() From 0d113c2b13c684eae4c812e69c42efcb3c3bd7f7 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 14:18:10 +0100 Subject: [PATCH 079/187] avoid unnecessary imports in enable_determinism --- bioimageio/core/_resource_tests.py | 87 +++++++++++++++++------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 6cd0bcf5..16b328d0 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -1,4 +1,5 @@ import hashlib +import os import platform import subprocess import traceback @@ -66,8 +67,9 @@ class DeprecatedKwargs(TypedDict): decimal: NotRequired[Optional[int]] -# TODO: avoid unnecessary imports in enable_determinism -def enable_determinism(mode: Literal["seed_only", "full"]): +def enable_determinism( + mode: Literal["seed_only", "full"], weight_formats: Sequence[SupportedWeightsFormat] +): """Seed and configure ML frameworks for maximum reproducibility. May degrade performance. Only recommended for testing reproducibility! @@ -93,39 +95,46 @@ def enable_determinism(mode: Literal["seed_only", "full"]): except Exception as e: logger.debug(str(e)) - try: + if "pytorch_state_dict" in weight_formats or "torchscript" in weight_formats: try: - import torch - except ImportError: - pass - else: - _ = torch.manual_seed(0) - torch.use_deterministic_algorithms(mode == "full") - except Exception as e: - logger.debug(str(e)) + try: + import torch + except ImportError: + pass + else: + _ = torch.manual_seed(0) + torch.use_deterministic_algorithms(mode == "full") + except Exception as e: + logger.debug(str(e)) - try: + if ( + "tensorflow_saved_model_bundle" in weight_formats + or "keras_hdf5" in weight_formats + ): try: - import keras - except ImportError: - pass - else: - keras.utils.set_random_seed(0) - except Exception as e: - logger.debug(str(e)) - - try: + os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0" + try: + import tensorflow as tf + except ImportError: + pass + else: + tf.random.set_seed(0) + if mode == "full": + tf.config.experimental.enable_op_determinism() + # TODO: find possibility to switch it off again?? + except Exception as e: + logger.debug(str(e)) + + if "keras_hdf5" in weight_formats: try: - import tensorflow as tf - except ImportError: - pass - else: - tf.random.set_seed(0) - if mode == "full": - tf.config.experimental.enable_op_determinism() - # TODO: find possibility to switch it off again?? - except Exception as e: - logger.debug(str(e)) + try: + import keras + except ImportError: + pass + else: + keras.utils.set_random_seed(0) + except Exception as e: + logger.debug(str(e)) def test_model( @@ -390,7 +399,7 @@ def load_description_and_test( else: weight_formats = [weight_format] - enable_determinism(determinism) + enable_determinism(determinism, weight_formats=weight_formats) for w in weight_formats: _test_model_inference(rd, w, devices, **deprecated) if not isinstance(rd, v0_4.ModelDescr): @@ -589,12 +598,14 @@ def get_ns(n: int): resized_test_inputs = Sample( members={ - t.id: test_inputs.members[t.id].resize_to( - { - aid: s - for (tid, aid), s in input_target_sizes.items() - if tid == t.id - }, + t.id: ( + test_inputs.members[t.id].resize_to( + { + aid: s + for (tid, aid), s in input_target_sizes.items() + if tid == t.id + }, + ) ) for t in model.inputs }, From 01c0fbd37f8fcb9aa61f7e460d277235cef732f5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 14:25:11 +0100 Subject: [PATCH 080/187] add default args to enable_determinism --- bioimageio/core/_resource_tests.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 16b328d0..b1174b86 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -68,15 +68,25 @@ class DeprecatedKwargs(TypedDict): def enable_determinism( - mode: Literal["seed_only", "full"], weight_formats: Sequence[SupportedWeightsFormat] + mode: Literal["seed_only", "full"] = "full", + weight_formats: Optional[Sequence[SupportedWeightsFormat]] = None, ): """Seed and configure ML frameworks for maximum reproducibility. May degrade performance. Only recommended for testing reproducibility! Seed any random generators and (if **mode**=="full") request ML frameworks to use deterministic algorithms. + + Args: + mode: determinism mode + - 'seed_only' -- only set seeds, or + - 'full' determinsm features (might degrade performance or throw exceptions) + weight_formats: Limit deep learning importing deep learning frameworks + based on weight_formats. + E.g. this allows to avoid importing tensorflow when testing with pytorch. + Notes: - - **mode** == "full" might degrade performance and throw exceptions. + - **mode** == "full" might degrade performance or throw exceptions. - Subsequent inference calls might still differ. Call before each function (sequence) that is expected to be reproducible. - Degraded performance: Use for testing reproducibility only! @@ -95,7 +105,11 @@ def enable_determinism( except Exception as e: logger.debug(str(e)) - if "pytorch_state_dict" in weight_formats or "torchscript" in weight_formats: + if ( + weight_formats is None + or "pytorch_state_dict" in weight_formats + or "torchscript" in weight_formats + ): try: try: import torch @@ -108,7 +122,8 @@ def enable_determinism( logger.debug(str(e)) if ( - "tensorflow_saved_model_bundle" in weight_formats + weight_formats is None + or "tensorflow_saved_model_bundle" in weight_formats or "keras_hdf5" in weight_formats ): try: @@ -125,7 +140,7 @@ def enable_determinism( except Exception as e: logger.debug(str(e)) - if "keras_hdf5" in weight_formats: + if weight_formats is None or "keras_hdf5" in weight_formats: try: try: import keras From e5f0efda4c0eadd4eeb49e09da441e9b43fe3ccb Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 14:45:05 +0100 Subject: [PATCH 081/187] rename UpdateFormatCmd path arg to output --- bioimageio/core/cli.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 19e7c8e9..055180ec 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -231,11 +231,14 @@ def _get_stat( class UpdateFormatCmd(CmdBase, WithSource): """Update the metadata format""" - path: Optional[Path] = Field(None, alias="output-path") - """save updated RDF to this path""" + output: Optional[Path] = None + """Save updated bioimageio.yaml to this file. + + (Always renders updated bioimageio.yaml to terminal.) + """ def run(self): - updated = update_format(self.descr, output_path=self.path) + updated = update_format(self.descr, output_path=self.output) updated_stream = StringIO() write_yaml(updated, updated_stream) updated_md = f"```yaml\n{updated_stream.getvalue()}\n```" From 961503d02c64eb587bff3b522b01c64eca04c053 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 14:45:24 +0100 Subject: [PATCH 082/187] enable_determinsm for all tests --- tests/conftest.py | 4 ++++ tests/test_resource_tests.py | 11 ----------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 5a75e679..2e077f86 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,8 +7,12 @@ from loguru import logger from pytest import FixtureRequest, fixture +from bioimageio.core import enable_determinism from bioimageio.spec import __version__ as bioimageio_spec_version +enable_determinism() + + try: import torch diff --git a/tests/test_resource_tests.py b/tests/test_resource_tests.py index 342dfc78..f4eca96b 100644 --- a/tests/test_resource_tests.py +++ b/tests/test_resource_tests.py @@ -1,17 +1,6 @@ -from typing import Literal - -import pytest - from bioimageio.spec import InvalidDescr, ValidationContext -@pytest.mark.parametrize("mode", ["seed_only", "full"]) -def test_enable_determinism(mode: Literal["seed_only", "full"]): - from bioimageio.core import enable_determinism - - enable_determinism(mode) - - def test_error_for_wrong_shape(stardist_wrong_shape: str): from bioimageio.core._resource_tests import test_model From 9f0cf83c575f5ecf409cd3b280ee9fc838bbfa5c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 14:52:01 +0100 Subject: [PATCH 083/187] use common EXAMPLE_DESCRIPTIONS URL --- tests/conftest.py | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2e077f86..9258c3b6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -46,56 +46,48 @@ logger.warning("testing with bioimageio.spec {}", bioimageio_spec_version) +EXAMPLE_DESCRIPTIONS = "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/" + # TODO: use models from new collection on S3 MODEL_SOURCES: Dict[str, str] = { "hpa_densenet": "polite-pig/1.1", "stardist": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models" - "/stardist_example_model/v0_4.bioimageio.yaml" + EXAMPLE_DESCRIPTIONS + "models/stardist_example_model/v0_4.bioimageio.yaml" ), "shape_change": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" - "upsample_test_model/v0_4.bioimageio.yaml" + EXAMPLE_DESCRIPTIONS + "models/upsample_test_model/v0_4.bioimageio.yaml" ), "stardist_wrong_shape": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" - "stardist_example_model/rdf_wrong_shape.yaml" + EXAMPLE_DESCRIPTIONS + "models/stardist_example_model/rdf_wrong_shape.yaml" ), "stardist_wrong_shape2": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" - "stardist_example_model/rdf_wrong_shape2_v0_4.yaml" + EXAMPLE_DESCRIPTIONS + + "models/stardist_example_model/rdf_wrong_shape2_v0_4.yaml" ), "unet2d_diff_output_shape": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" - "unet2d_diff_output_shape/v0_4.bioimageio.yaml" + EXAMPLE_DESCRIPTIONS + "models/unet2d_diff_output_shape/v0_4.bioimageio.yaml" ), "unet2d_expand_output_shape": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" - "unet2d_nuclei_broad/expand_output_shape.bioimageio.yaml" + EXAMPLE_DESCRIPTIONS + + "models/unet2d_nuclei_broad/expand_output_shape.bioimageio.yaml" ), "unet2d_fixed_shape": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" - "unet2d_fixed_shape/v0_4.bioimageio.yaml" + EXAMPLE_DESCRIPTIONS + "models/unet2d_fixed_shape/v0_4.bioimageio.yaml" ), "unet2d_keras_tf2": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" - "unet2d_keras_tf2/v0_4.bioimageio.yaml" + EXAMPLE_DESCRIPTIONS + "models/unet2d_keras_tf2/v0_4.bioimageio.yaml" ), "unet2d_keras": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" - "unet2d_keras_tf/v0_4.bioimageio.yaml" + EXAMPLE_DESCRIPTIONS + "models/unet2d_keras_tf/v0_4.bioimageio.yaml" ), "unet2d_multi_tensor": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" - "unet2d_multi_tensor/v0_4.bioimageio.yaml" + EXAMPLE_DESCRIPTIONS + "models/unet2d_multi_tensor/v0_4.bioimageio.yaml" ), "unet2d_nuclei_broad_model_old": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" - "unet2d_nuclei_broad/v0_4_9.bioimageio.yaml" + EXAMPLE_DESCRIPTIONS + "models/unet2d_nuclei_broad/v0_4_9.bioimageio.yaml" ), "unet2d_nuclei_broad_model": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/" - "unet2d_nuclei_broad/bioimageio.yaml" + EXAMPLE_DESCRIPTIONS + "models/unet2d_nuclei_broad/bioimageio.yaml" ), } From 01eb0fd19294a98e38dd391b12940e1953819167 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 7 Feb 2025 14:52:22 +0100 Subject: [PATCH 084/187] rename IncreaseWeightFormatsCmd arg path to output --- bioimageio/core/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 055180ec..2edacd06 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -591,7 +591,7 @@ def input_dataset(stat: Stat): class IncreaseWeightFormatsCmd(CmdBase, WithSource): - path: CliPositionalArg[Path] + output: CliPositionalArg[Path] """The path to write the updated model description to.""" def run(self): @@ -601,7 +601,7 @@ def run(self): f"model format {model_descr.format_version} not supported." + " Please update the model first." ) - _ = increase_available_weight_formats(model_descr, output_path=self.path) + _ = increase_available_weight_formats(model_descr, output_path=self.output) JSON_FILE = "bioimageio-cli.json" @@ -639,7 +639,7 @@ class Bioimageio( """Update the metadata format""" increase_weight_formats: CliSubCommand[IncreaseWeightFormatsCmd] = Field( - alias="incease-weight-formats" + alias="increase-weight-formats" ) """Add additional weights to the model descriptions converted from available formats to improve deployability.""" From cf6f9ee6c92971345817283fda13aea14ed7def7 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 20 Feb 2025 13:15:37 +0100 Subject: [PATCH 085/187] improve weights converters --- README.md | 3 +- bioimageio/core/__init__.py | 2 + bioimageio/core/__main__.py | 2 +- bioimageio/core/backends/_model_adapter.py | 2 +- bioimageio/core/backends/pytorch_backend.py | 2 +- bioimageio/core/cli.py | 45 +++++--- bioimageio/core/commands.py | 2 +- bioimageio/core/weight_converters/__init__.py | 3 + .../core/weight_converters/_add_weights.py | 101 +++++++++++++++--- .../core/weight_converters/pytorch_to_onnx.py | 94 ++++------------ .../pytorch_to_torchscript.py | 22 ++-- .../weight_converters/torchscript_to_onnx.py | 76 +++++++++++++ 12 files changed, 243 insertions(+), 111 deletions(-) create mode 100644 bioimageio/core/weight_converters/torchscript_to_onnx.py diff --git a/README.md b/README.md index aaecc98e..6ee50c85 100644 --- a/README.md +++ b/README.md @@ -377,9 +377,10 @@ The model specification and its validation tools can be found at nn.Module: arch = import_callable( diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 2edacd06..2778d201 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -46,9 +46,6 @@ from tqdm import tqdm from typing_extensions import assert_never -from bioimageio.core.weight_converters._add_weights import ( - increase_available_weight_formats, -) from bioimageio.spec import AnyModelDescr, InvalidDescr, load_description from bioimageio.spec._internal.io_basics import ZipPath from bioimageio.spec._internal.io_utils import write_yaml @@ -66,7 +63,7 @@ update_format, validate_format, ) -from .common import MemberId, SampleId +from .common import MemberId, SampleId, SupportedWeightsFormat from .digest_spec import get_member_ids, load_sample_for_model from .io import load_dataset_stat, save_dataset_stat, save_sample from .prediction import create_prediction_pipeline @@ -80,6 +77,7 @@ from .sample import Sample from .stat_measures import Stat from .utils import VERSION +from .weight_converters._add_weights import add_weights yaml = YAML(typ="safe") @@ -183,7 +181,7 @@ class PackageCmd(CmdBase, WithSource): def run(self): if isinstance(self.descr, InvalidDescr): self.descr.validation_summary.display() - raise ValueError("resource description is invalid") + raise ValueError(f"Invalid {self.descr.type} description.") sys.exit( package( @@ -590,9 +588,18 @@ def input_dataset(stat: Stat): save_sample(sp_out, sample_out) -class IncreaseWeightFormatsCmd(CmdBase, WithSource): +class ConvertWeightsCmd(CmdBase, WithSource): output: CliPositionalArg[Path] - """The path to write the updated model description to.""" + """The path to write the updated model package to.""" + + source_format: Optional[SupportedWeightsFormat] = Field(None, alias="source-format") + """Exclusively use these weights to convert to other formats.""" + + target_format: Optional[SupportedWeightsFormat] = Field(None, alias="target-format") + """Exclusively add this weight format.""" + + verbose: bool = False + """Log more (error) output.""" def run(self): model_descr = ensure_description_is_model(self.descr) @@ -601,7 +608,23 @@ def run(self): f"model format {model_descr.format_version} not supported." + " Please update the model first." ) - _ = increase_available_weight_formats(model_descr, output_path=self.output) + updated_model_descr = add_weights( + model_descr, + output_path=self.output, + source_format=self.source_format, + target_format=self.target_format, + verbose=self.verbose, + ) + if updated_model_descr is None: + return + + updated_model_descr.validation_summary.display() + + # save validation summary as attachments + updated_model_descr.validation_summary.save_markdown( + (summary_path := self.output / "test_summary.md") + ) + logger.info("Saved rendered validation summary to {}", summary_path.absolute()) JSON_FILE = "bioimageio-cli.json" @@ -638,9 +661,7 @@ class Bioimageio( update_format: CliSubCommand[UpdateFormatCmd] = Field(alias="update-format") """Update the metadata format""" - increase_weight_formats: CliSubCommand[IncreaseWeightFormatsCmd] = Field( - alias="increase-weight-formats" - ) + add_weights: CliSubCommand[ConvertWeightsCmd] = Field(alias="add-weights") """Add additional weights to the model descriptions converted from available formats to improve deployability.""" @@ -687,7 +708,7 @@ def run(self): or self.package or self.predict or self.update_format - or self.increase_weight_formats + or self.add_weights ) assert cmd is not None cmd.run() diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 3b385452..92a0cd65 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -81,7 +81,7 @@ def package( """ if isinstance(descr, InvalidDescr): descr.validation_summary.display() - raise ValueError("resource description is invalid") + raise ValueError(f"Invalid {descr.type} description.") if weight_format == "all": weights_priority_order = None diff --git a/bioimageio/core/weight_converters/__init__.py b/bioimageio/core/weight_converters/__init__.py index e69de29b..31a91642 100644 --- a/bioimageio/core/weight_converters/__init__.py +++ b/bioimageio/core/weight_converters/__init__.py @@ -0,0 +1,3 @@ +from ._add_weights import add_weights + +__all__ = ["add_weights"] diff --git a/bioimageio/core/weight_converters/_add_weights.py b/bioimageio/core/weight_converters/_add_weights.py index b9837ab4..f3c9ee33 100644 --- a/bioimageio/core/weight_converters/_add_weights.py +++ b/bioimageio/core/weight_converters/_add_weights.py @@ -1,19 +1,26 @@ +import traceback from typing import Optional from loguru import logger from pydantic import DirectoryPath -from bioimageio.core._resource_tests import test_model -from bioimageio.spec import load_model_description, save_bioimageio_package_as_folder +from bioimageio.spec import ( + InvalidDescr, + load_model_description, + save_bioimageio_package_as_folder, +) from bioimageio.spec.model.v0_5 import ModelDescr, WeightsFormat +from .._resource_tests import test_model -def increase_available_weight_formats( + +def add_weights( model_descr: ModelDescr, *, output_path: DirectoryPath, source_format: Optional[WeightsFormat] = None, target_format: Optional[WeightsFormat] = None, + verbose: bool = False, ) -> Optional[ModelDescr]: """Convert model weights to other formats and add them to the model description @@ -24,12 +31,19 @@ def increase_available_weight_formats( target_format: convert to a specific weights format. Default: attempt to convert to any missing format. devices: Devices that may be used during conversion. + verbose: log more (error) output Returns: - An updated model description if any converted weights were added. - `None` if no conversion was possible. """ if not isinstance(model_descr, ModelDescr): + if model_descr.type == "model" and not isinstance(model_descr, InvalidDescr): + raise TypeError( + f"Model format {model_descr.format} is not supported, please update" + + f" model to format {ModelDescr.implemented_format_version} first." + ) + raise TypeError(type(model_descr)) # save model to local folder @@ -37,7 +51,7 @@ def increase_available_weight_formats( model_descr, output_path=output_path ) # reload from local folder to make sure we do not edit the given model - _model_descr = load_model_description(output_path) + _model_descr = load_model_description(output_path, perform_io_checks=False) assert isinstance(_model_descr, ModelDescr) model_descr = _model_descr del _model_descr @@ -54,41 +68,96 @@ def increase_available_weight_formats( originally_missing = set(missing) - if "pytorch_state_dict" in available and "onnx" in missing: - from .pytorch_to_onnx import convert + if "pytorch_state_dict" in available and "torchscript" in missing: + logger.info( + "Attempting to convert 'pytorch_state_dict' weights to 'torchscript'." + ) + from .pytorch_to_torchscript import convert try: - model_descr.weights.onnx = convert( + torchscript_weights_path = output_path / "weights_torchscript.pt" + model_descr.weights.torchscript = convert( model_descr, - output_path=output_path, + output_path=torchscript_weights_path, use_tracing=False, ) except Exception as e: + if verbose: + traceback.print_exception(e) + logger.error(e) else: - available.add("onnx") - missing.discard("onnx") + available.add("torchscript") + missing.discard("torchscript") if "pytorch_state_dict" in available and "torchscript" in missing: + logger.info( + "Attempting to convert 'pytorch_state_dict' weights to 'torchscript' by tracing." + ) from .pytorch_to_torchscript import convert try: + torchscript_weights_path = output_path / "weights_torchscript_traced.pt" + model_descr.weights.torchscript = convert( model_descr, - output_path=output_path, - use_tracing=False, + output_path=torchscript_weights_path, + use_tracing=True, ) except Exception as e: + if verbose: + traceback.print_exception(e) + logger.error(e) else: available.add("torchscript") missing.discard("torchscript") + if "torchscript" in available and "onnx" in missing: + logger.info("Attempting to convert 'torchscript' weights to 'onnx'.") + from .torchscript_to_onnx import convert + + try: + onnx_weights_path = output_path / "weights.onnx" + model_descr.weights.onnx = convert( + model_descr, + output_path=onnx_weights_path, + ) + except Exception as e: + if verbose: + traceback.print_exception(e) + + logger.error(e) + else: + available.add("onnx") + missing.discard("onnx") + + if "pytorch_state_dict" in available and "onnx" in missing: + logger.info("Attempting to convert 'pytorch_state_dict' weights to 'onnx'.") + from .pytorch_to_onnx import convert + + try: + onnx_weights_path = output_path / "weights.onnx" + + model_descr.weights.onnx = convert( + model_descr, + output_path=onnx_weights_path, + verbose=verbose, + ) + except Exception as e: + if verbose: + traceback.print_exception(e) + + logger.error(e) + else: + available.add("onnx") + missing.discard("onnx") + if missing: logger.warning( f"Converting from any of the available weights formats {available} to any" - + f" of {missing} is not yet implemented. Please create an issue at" - + " https://github.com/bioimage-io/core-bioimage-io-python/issues/new/choose" + + f" of {missing} failed or is not yet implemented. Please create an issue" + + " at https://github.com/bioimage-io/core-bioimage-io-python/issues/new/choose" + " if you would like bioimageio.core to support a particular conversion." ) @@ -97,5 +166,7 @@ def increase_available_weight_formats( return None else: logger.info(f"added weights formats {originally_missing - missing}") - test_model(model_descr).display() + # resave model with updated rdf.yaml + _ = save_bioimageio_package_as_folder(model_descr, output_path=output_path) + _ = test_model(model_descr) return model_descr diff --git a/bioimageio/core/weight_converters/pytorch_to_onnx.py b/bioimageio/core/weight_converters/pytorch_to_onnx.py index a104cb2d..9fd4615e 100644 --- a/bioimageio/core/weight_converters/pytorch_to_onnx.py +++ b/bioimageio/core/weight_converters/pytorch_to_onnx.py @@ -1,50 +1,40 @@ from pathlib import Path -from typing import Any, List, Sequence, Union, cast +from typing import Union -import numpy as np -import torch -from numpy.testing import assert_allclose +import torch.jit -from bioimageio.core.backends.pytorch_backend import load_torch_model -from bioimageio.core.digest_spec import get_member_id, get_test_inputs -from bioimageio.core.proc_setup import get_pre_and_postprocessing -from bioimageio.spec._internal.types import AbsoluteTolerance, RelativeTolerance from bioimageio.spec.model import v0_4, v0_5 +from .. import __version__ +from ..backends.pytorch_backend import load_torch_model +from ..digest_spec import get_member_id, get_test_inputs +from ..proc_setup import get_pre_and_postprocessing + def convert( model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], *, output_path: Path, - use_tracing: bool = True, verbose: bool = False, - opset_version: int = 15, - check_reproducibility: bool = True, - relative_tolerance: RelativeTolerance = 1e-07, - absolute_tolerance: AbsoluteTolerance = 0, + opset_version: int = 20, ) -> v0_5.OnnxWeightsDescr: """ - Convert model weights from the PyTorch state_dict format to the ONNX format. + Convert model weights from the Torchscript state_dict format to the ONNX format. - # TODO: update Args Args: model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): The model description object that contains the model and its weights. output_path (Path): The file path where the ONNX model will be saved. - use_tracing (bool, optional): - Whether to use tracing or scripting to export the ONNX format. Defaults to True. verbose (bool, optional): If True, will print out detailed information during the ONNX export process. Defaults to False. opset_version (int, optional): The ONNX opset version to use for the export. Defaults to 15. + Raises: ValueError: If the provided model does not have weights in the PyTorch state_dict format. - ImportError: - If ONNX Runtime is not available for checking the exported ONNX model. - ValueError: - If the results before and after weights conversion do not agree. + Returns: v0_5.OnnxWeightsDescr: A descriptor object that contains information about the exported ONNX weights. @@ -65,61 +55,23 @@ def convert( sample.members[get_member_id(ipt)].data.data for ipt in model_descr.inputs ] inputs_torch = [torch.from_numpy(ipt) for ipt in inputs_numpy] - model = load_torch_model(state_dict_weights_descr) + model = load_torch_model(state_dict_weights_descr, load_state=True) with torch.no_grad(): outputs_original_torch = model(*inputs_torch) if isinstance(outputs_original_torch, torch.Tensor): outputs_original_torch = [outputs_original_torch] - outputs_original: List[np.ndarray[Any, Any]] = [ - out.numpy() for out in outputs_original_torch - ] - if use_tracing: - _ = torch.onnx.export( - model, - tuple(inputs_torch), - str(output_path), - verbose=verbose, - opset_version=opset_version, - ) - else: - raise NotImplementedError - - if check_reproducibility: - try: - import onnxruntime as rt # pyright: ignore [reportMissingTypeStubs] - except ImportError as e: - raise ImportError( - "The onnx weights were exported, but onnx rt is not available" - + " and weights cannot be checked." - ) from e - - # check the onnx model - sess = rt.InferenceSession(str(output_path)) - onnx_input_node_args = cast( - List[Any], sess.get_inputs() - ) # FIXME: remove cast, try using rt.NodeArg instead of Any - inputs_onnx = { - input_name.name: inp - for input_name, inp in zip(onnx_input_node_args, inputs_numpy) - } - outputs_onnx = cast( - Sequence[np.ndarray[Any, Any]], sess.run(None, inputs_onnx) - ) # FIXME: remove cast - - try: - for out_original, out_onnx in zip(outputs_original, outputs_onnx): - assert_allclose( - out_original, - out_onnx, - rtol=relative_tolerance, - atol=absolute_tolerance, - ) - except AssertionError as e: - raise AssertionError( - "Inference results of original and converted weights do not match." - ) from e + _ = torch.onnx.export( + model, + tuple(inputs_torch), + str(output_path), + verbose=verbose, + opset_version=opset_version, + ) return v0_5.OnnxWeightsDescr( - source=output_path, parent="pytorch_state_dict", opset_version=opset_version + source=output_path, + parent="pytorch_state_dict", + opset_version=opset_version, + comment=(f"Converted with bioimageio.core {__version__}."), ) diff --git a/bioimageio/core/weight_converters/pytorch_to_torchscript.py b/bioimageio/core/weight_converters/pytorch_to_torchscript.py index a724e5f8..8a362af9 100644 --- a/bioimageio/core/weight_converters/pytorch_to_torchscript.py +++ b/bioimageio/core/weight_converters/pytorch_to_torchscript.py @@ -7,10 +7,12 @@ from torch.jit import ScriptModule from typing_extensions import assert_never -from bioimageio.core.backends.pytorch_backend import load_torch_model from bioimageio.spec._internal.version_type import Version from bioimageio.spec.model import v0_4, v0_5 +from .. import __version__ +from ..backends.pytorch_backend import load_torch_model + def convert( model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], @@ -48,29 +50,33 @@ def convert( input_data = model_descr.get_input_test_arrays() with torch.no_grad(): - input_data = [torch.from_numpy(inp.astype("float32")) for inp in input_data] - model = load_torch_model(state_dict_weights_descr) - scripted_module: Union[ # pyright: ignore[reportUnknownVariableType] + input_data = [torch.from_numpy(inp) for inp in input_data] + model = load_torch_model(state_dict_weights_descr, load_state=True) + scripted_model: Union[ # pyright: ignore[reportUnknownVariableType] ScriptModule, Tuple[Any, ...] ] = ( torch.jit.trace(model, input_data) if use_tracing else torch.jit.script(model) ) - assert not isinstance(scripted_module, tuple), scripted_module - _check_predictions( + assert not isinstance(scripted_model, tuple), scripted_model + _check_predictions( # TODO: remove model=model, - scripted_model=scripted_module, + scripted_model=scripted_model, model_spec=model_descr, input_data=input_data, ) - scripted_module.save(str(output_path)) + scripted_model.save(output_path) return v0_5.TorchscriptWeightsDescr( source=output_path, pytorch_version=Version(torch.__version__), parent="pytorch_state_dict", + comment=( + f"Converted with bioimageio.core {__version__}" + + f" with use_tracing={use_tracing}." + ), ) diff --git a/bioimageio/core/weight_converters/torchscript_to_onnx.py b/bioimageio/core/weight_converters/torchscript_to_onnx.py new file mode 100644 index 00000000..f7ca10f3 --- /dev/null +++ b/bioimageio/core/weight_converters/torchscript_to_onnx.py @@ -0,0 +1,76 @@ +from pathlib import Path +from typing import Union + +import torch.jit + +from bioimageio.core.digest_spec import get_member_id, get_test_inputs +from bioimageio.core.proc_setup import get_pre_and_postprocessing +from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.utils import download + + +def convert( + model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], + *, + output_path: Path, + verbose: bool = False, + opset_version: int = 15, +) -> v0_5.OnnxWeightsDescr: + """ + Convert model weights from the PyTorch state_dict format to the ONNX format. + + Args: + model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + The model description object that contains the model and its weights. + output_path (Path): + The file path where the ONNX model will be saved. + verbose (bool, optional): + If True, will print out detailed information during the ONNX export process. Defaults to False. + opset_version (int, optional): + The ONNX opset version to use for the export. Defaults to 15. + Raises: + ValueError: + If the provided model does not have weights in the torchscript format. + + Returns: + v0_5.OnnxWeightsDescr: + A descriptor object that contains information about the exported ONNX weights. + """ + + torchscript_descr = model_descr.weights.torchscript + if torchscript_descr is None: + raise ValueError( + "The provided model does not have weights in the torchscript format" + ) + + sample = get_test_inputs(model_descr) + procs = get_pre_and_postprocessing( + model_descr, dataset_for_initial_statistics=[sample] + ) + procs.pre(sample) + inputs_numpy = [ + sample.members[get_member_id(ipt)].data.data for ipt in model_descr.inputs + ] + inputs_torch = [torch.from_numpy(ipt) for ipt in inputs_numpy] + + weight_path = download(torchscript_descr).path + model = torch.jit.load(weight_path) # type: ignore + model.to("cpu") + model = model.eval() # type: ignore + + with torch.no_grad(): + outputs_original_torch = model(*inputs_torch) # type: ignore + if isinstance(outputs_original_torch, torch.Tensor): + outputs_original_torch = [outputs_original_torch] + + _ = torch.onnx.export( + model, # type: ignore + tuple(inputs_torch), + str(output_path), + verbose=verbose, + opset_version=opset_version, + ) + + return v0_5.OnnxWeightsDescr( + source=output_path, parent="pytorch_state_dict", opset_version=opset_version + ) From e6bd24bf18db7b93f2d8cf5b69864086c22b2ea3 Mon Sep 17 00:00:00 2001 From: Tomaz Vieira Date: Thu, 20 Feb 2025 13:26:22 +0100 Subject: [PATCH 086/187] Separates pipeline build logic per version --- bioimageio/core/proc_ops.py | 94 ++++++++++++++++++++++++++++ bioimageio/core/proc_setup.py | 112 +++++++++++++++++----------------- 2 files changed, 150 insertions(+), 56 deletions(-) diff --git a/bioimageio/core/proc_ops.py b/bioimageio/core/proc_ops.py index eecf47b1..5f1195bb 100644 --- a/bioimageio/core/proc_ops.py +++ b/bioimageio/core/proc_ops.py @@ -3,6 +3,7 @@ from dataclasses import InitVar, dataclass, field from typing import ( Collection, + List, Literal, Mapping, Optional, @@ -17,6 +18,7 @@ from typing_extensions import Self, assert_never from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.model.v0_5 import TensorId from ._op_base import BlockedOperator, Operator from .axis import AxisId, PerAxis @@ -688,3 +690,95 @@ def get_proc_class(proc_spec: ProcDescr): return ZeroMeanUnitVariance else: assert_never(proc_spec) + +def preproc_v4_to_processing(inp: v0_4.InputTensorDescr, proc_spec: v0_4.PreprocessingDescr,) -> Processing: + from bioimageio.spec.model.v0_5 import _convert_proc # pyright: ignore [reportPrivateUsage] + member_id = TensorId(str(inp.name)) + if isinstance(proc_spec, v0_4.BinarizeDescr): + return Binarize.from_proc_descr(proc_spec, member_id) + elif isinstance(proc_spec, v0_4.ClipDescr): + return Clip.from_proc_descr(proc_spec, member_id) + elif isinstance(proc_spec, v0_4.ScaleLinearDescr): + return ScaleLinear.from_proc_descr(proc_spec, member_id) + elif isinstance(proc_spec, v0_4.ScaleRangeDescr): + return ScaleRange.from_proc_descr(proc_spec, member_id) + elif isinstance(proc_spec, v0_4.SigmoidDescr): + return Sigmoid.from_proc_descr(proc_spec, member_id) + elif isinstance(proc_spec, v0_4.ZeroMeanUnitVarianceDescr): + if proc_spec.kwargs.mode == "fixed": + axes = inp.axes + v5_proc_spec = _convert_proc(proc_spec, axes) + assert isinstance(v5_proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr) #FIXME + return FixedZeroMeanUnitVariance.from_proc_descr(v5_proc_spec, member_id) + else: + return ZeroMeanUnitVariance.from_proc_descr(proc_spec, member_id) + else: + assert_never(proc_spec) + +def postproc_v4_to_processing(inp: v0_4.OutputTensorDescr, proc_spec: v0_4.PostprocessingDescr,) -> Processing: + from bioimageio.spec.model.v0_5 import _convert_proc # pyright: ignore [reportPrivateUsage] + member_id = TensorId(str(inp.name)) + if isinstance(proc_spec, v0_4.BinarizeDescr): + return Binarize.from_proc_descr(proc_spec, member_id) + if isinstance(proc_spec, v0_4.ScaleMeanVarianceDescr): + return ScaleMeanVariance.from_proc_descr(proc_spec, member_id) + elif isinstance(proc_spec, v0_4.ClipDescr): + return Clip.from_proc_descr(proc_spec, member_id) + elif isinstance(proc_spec, v0_4.ScaleLinearDescr): + return ScaleLinear.from_proc_descr(proc_spec, member_id) + elif isinstance(proc_spec, v0_4.ScaleRangeDescr): + return ScaleRange.from_proc_descr(proc_spec, member_id) + elif isinstance(proc_spec, v0_4.SigmoidDescr): + return Sigmoid.from_proc_descr(proc_spec, member_id) + elif isinstance(proc_spec, v0_4.ZeroMeanUnitVarianceDescr): + if proc_spec.kwargs.mode == "fixed": + axes = inp.axes + v5_proc_spec = _convert_proc(proc_spec, axes) + assert isinstance(v5_proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr) #FIXME + return FixedZeroMeanUnitVariance.from_proc_descr(v5_proc_spec, member_id) + else: + return ZeroMeanUnitVariance.from_proc_descr(proc_spec, member_id) + else: + assert_never(proc_spec) + +def preproc_v5_to_processing(inp: v0_5.InputTensorDescr, proc_spec: v0_5.PreprocessingDescr,) -> Processing: + if isinstance(proc_spec, v0_5.BinarizeDescr): + return Binarize.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.ClipDescr): + return Clip.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.ScaleLinearDescr): + return ScaleLinear.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.ScaleRangeDescr): + return ScaleRange.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.SigmoidDescr): + return Sigmoid.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.EnsureDtypeDescr): + return EnsureDtype.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.ZeroMeanUnitVarianceDescr): + return ZeroMeanUnitVariance.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr): + return FixedZeroMeanUnitVariance.from_proc_descr(proc_spec, inp.id) + else: + assert_never(proc_spec) + +def postproc_v5_to_processing(inp: v0_5.OutputTensorDescr, proc_spec: v0_5.PostprocessingDescr,) -> Processing: + if isinstance(proc_spec, v0_5.BinarizeDescr): + return Binarize.from_proc_descr(proc_spec, inp.id) + if isinstance(proc_spec, v0_5.ScaleMeanVarianceDescr): + return ScaleMeanVariance.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.ClipDescr): + return Clip.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.ScaleLinearDescr): + return ScaleLinear.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.ScaleRangeDescr): + return ScaleRange.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.SigmoidDescr): + return Sigmoid.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.EnsureDtypeDescr): + return EnsureDtype.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.ZeroMeanUnitVarianceDescr): + return ZeroMeanUnitVariance.from_proc_descr(proc_spec, inp.id) + elif isinstance(proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr): + return FixedZeroMeanUnitVariance.from_proc_descr(proc_spec, inp.id) + else: + assert_never(proc_spec) diff --git a/bioimageio/core/proc_setup.py b/bioimageio/core/proc_setup.py index b9afb711..860d7b84 100644 --- a/bioimageio/core/proc_setup.py +++ b/bioimageio/core/proc_setup.py @@ -6,6 +6,7 @@ Optional, Sequence, Set, + Tuple, Union, ) @@ -17,9 +18,14 @@ from .digest_spec import get_member_ids from .proc_ops import ( AddKnownDatasetStats, + EnsureDtype, Processing, UpdateStats, get_proc_class, + postproc_v4_to_processing, + postproc_v5_to_processing, + preproc_v4_to_processing, + preproc_v5_to_processing, ) from .sample import Sample from .stat_calculators import StatsCalculator @@ -135,66 +141,60 @@ def get_requried_sample_measures(model: AnyModelDescr) -> RequiredSampleMeasures {m for m in s.post_measures if isinstance(m, SampleMeasureBase)}, ) +def _prepare_v4_preprocs(tensor_descrs: Sequence[v0_4.InputTensorDescr]) -> Tuple[List[Processing], Set[Measure]]: + procs: List[Processing] = [] + for t_descr in tensor_descrs: + member_id = TensorId(str(t_descr.name)) + procs.append( + EnsureDtype(input=member_id, output=member_id, dtype=t_descr.data_type) + ) + for proc_d in t_descr.preprocessing: + procs.append(preproc_v4_to_processing(t_descr, proc_d)) + measures = {m for proc in procs for m in proc.required_measures} + return (procs, measures) + +def _prepare_v4_postprocs(tensor_descrs: Sequence[v0_4.OutputTensorDescr]) -> Tuple[List[Processing], Set[Measure]]: + procs: List[Processing] = [] + for t_descr in tensor_descrs: + member_id = TensorId(str(t_descr.name)) + procs.append( + EnsureDtype(input=member_id, output=member_id, dtype=t_descr.data_type) + ) + for proc_d in t_descr.postprocessing: + procs.append(postproc_v4_to_processing(t_descr, proc_d)) + measures = {m for proc in procs for m in proc.required_measures} + return (procs, measures) + +def _prepare_v5_preprocs(tensor_descrs: Sequence[v0_5.InputTensorDescr]) -> Tuple[List[Processing], Set[Measure]]: + procs: List[Processing] = [] + for t_descr in tensor_descrs: + for proc_d in t_descr.preprocessing: + procs.append(preproc_v5_to_processing(t_descr, proc_d)) + measures = {m for proc in procs for m in proc.required_measures} + return (procs, measures) + +def _prepare_v5_postprocs(tensor_descrs: Sequence[v0_5.OutputTensorDescr]) -> Tuple[List[Processing], Set[Measure]]: + procs: List[Processing] = [] + for t_descr in tensor_descrs: + for proc_d in t_descr.postprocessing: + procs.append(postproc_v5_to_processing(t_descr, proc_d)) + measures = {m for proc in procs for m in proc.required_measures} + return (procs, measures) + def _prepare_setup_pre_and_postprocessing(model: AnyModelDescr) -> _SetupProcessing: - pre_measures: Set[Measure] = set() - post_measures: Set[Measure] = set() - - input_ids = set(get_member_ids(model.inputs)) - output_ids = set(get_member_ids(model.outputs)) - - def prepare_procs(tensor_descrs: Sequence[TensorDescr]): - procs: List[Processing] = [] - for t_descr in tensor_descrs: - if isinstance(t_descr, (v0_4.InputTensorDescr, v0_5.InputTensorDescr)): - proc_descrs: List[ - Union[ - v0_4.PreprocessingDescr, - v0_5.PreprocessingDescr, - v0_4.PostprocessingDescr, - v0_5.PostprocessingDescr, - ] - ] = list(t_descr.preprocessing) - elif isinstance( - t_descr, - (v0_4.OutputTensorDescr, v0_5.OutputTensorDescr), - ): - proc_descrs = list(t_descr.postprocessing) - else: - assert_never(t_descr) - - if isinstance(t_descr, (v0_4.InputTensorDescr, v0_4.OutputTensorDescr)): - ensure_dtype = v0_5.EnsureDtypeDescr( - kwargs=v0_5.EnsureDtypeKwargs(dtype=t_descr.data_type) - ) - if isinstance(t_descr, v0_4.InputTensorDescr) and proc_descrs: - proc_descrs.insert(0, ensure_dtype) - - proc_descrs.append(ensure_dtype) - - for proc_d in proc_descrs: - proc_class = get_proc_class(proc_d) - member_id = ( - TensorId(str(t_descr.name)) - if isinstance(t_descr, v0_4.TensorDescrBase) - else t_descr.id - ) - req = proc_class.from_proc_descr( - proc_d, member_id # pyright: ignore[reportArgumentType] - ) - for m in req.required_measures: - if m.member_id in input_ids: - pre_measures.add(m) - elif m.member_id in output_ids: - post_measures.add(m) - else: - raise ValueError("When to raise ") - procs.append(req) - return procs + if isinstance(model, v0_4.ModelDescr): + pre, pre_measures = _prepare_v4_preprocs(model.inputs) + post, post_measures = _prepare_v4_postprocs(model.outputs) + elif isinstance(model, v0_5.ModelDescr): + pre, pre_measures = _prepare_v5_preprocs(model.inputs) + post, post_measures = _prepare_v5_postprocs(model.outputs) + else: + assert_never(model) return _SetupProcessing( - pre=prepare_procs(model.inputs), - post=prepare_procs(model.outputs), + pre=pre, + post=post, pre_measures=pre_measures, post_measures=post_measures, ) From 73f4dba1aa0c9aa9b9bfd32e8154ed7489e36ed6 Mon Sep 17 00:00:00 2001 From: Tomaz Vieira Date: Thu, 20 Feb 2025 14:24:35 +0100 Subject: [PATCH 087/187] Factors out common code from pipeline setup funcs --- bioimageio/core/proc_setup.py | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/bioimageio/core/proc_setup.py b/bioimageio/core/proc_setup.py index 860d7b84..6ce31158 100644 --- a/bioimageio/core/proc_setup.py +++ b/bioimageio/core/proc_setup.py @@ -141,7 +141,7 @@ def get_requried_sample_measures(model: AnyModelDescr) -> RequiredSampleMeasures {m for m in s.post_measures if isinstance(m, SampleMeasureBase)}, ) -def _prepare_v4_preprocs(tensor_descrs: Sequence[v0_4.InputTensorDescr]) -> Tuple[List[Processing], Set[Measure]]: +def _prepare_v4_preprocs(tensor_descrs: Sequence[v0_4.InputTensorDescr]) -> List[Processing]: procs: List[Processing] = [] for t_descr in tensor_descrs: member_id = TensorId(str(t_descr.name)) @@ -150,10 +150,9 @@ def _prepare_v4_preprocs(tensor_descrs: Sequence[v0_4.InputTensorDescr]) -> Tupl ) for proc_d in t_descr.preprocessing: procs.append(preproc_v4_to_processing(t_descr, proc_d)) - measures = {m for proc in procs for m in proc.required_measures} - return (procs, measures) + return procs -def _prepare_v4_postprocs(tensor_descrs: Sequence[v0_4.OutputTensorDescr]) -> Tuple[List[Processing], Set[Measure]]: +def _prepare_v4_postprocs(tensor_descrs: Sequence[v0_4.OutputTensorDescr]) -> List[Processing]: procs: List[Processing] = [] for t_descr in tensor_descrs: member_id = TensorId(str(t_descr.name)) @@ -162,39 +161,36 @@ def _prepare_v4_postprocs(tensor_descrs: Sequence[v0_4.OutputTensorDescr]) -> Tu ) for proc_d in t_descr.postprocessing: procs.append(postproc_v4_to_processing(t_descr, proc_d)) - measures = {m for proc in procs for m in proc.required_measures} - return (procs, measures) + return procs -def _prepare_v5_preprocs(tensor_descrs: Sequence[v0_5.InputTensorDescr]) -> Tuple[List[Processing], Set[Measure]]: +def _prepare_v5_preprocs(tensor_descrs: Sequence[v0_5.InputTensorDescr]) -> List[Processing]: procs: List[Processing] = [] for t_descr in tensor_descrs: for proc_d in t_descr.preprocessing: procs.append(preproc_v5_to_processing(t_descr, proc_d)) - measures = {m for proc in procs for m in proc.required_measures} - return (procs, measures) + return procs -def _prepare_v5_postprocs(tensor_descrs: Sequence[v0_5.OutputTensorDescr]) -> Tuple[List[Processing], Set[Measure]]: +def _prepare_v5_postprocs(tensor_descrs: Sequence[v0_5.OutputTensorDescr]) -> List[Processing]: procs: List[Processing] = [] for t_descr in tensor_descrs: for proc_d in t_descr.postprocessing: procs.append(postproc_v5_to_processing(t_descr, proc_d)) - measures = {m for proc in procs for m in proc.required_measures} - return (procs, measures) + return procs def _prepare_setup_pre_and_postprocessing(model: AnyModelDescr) -> _SetupProcessing: if isinstance(model, v0_4.ModelDescr): - pre, pre_measures = _prepare_v4_preprocs(model.inputs) - post, post_measures = _prepare_v4_postprocs(model.outputs) + pre = _prepare_v4_preprocs(model.inputs) + post = _prepare_v4_postprocs(model.outputs) elif isinstance(model, v0_5.ModelDescr): - pre, pre_measures = _prepare_v5_preprocs(model.inputs) - post, post_measures = _prepare_v5_postprocs(model.outputs) + pre = _prepare_v5_preprocs(model.inputs) + post = _prepare_v5_postprocs(model.outputs) else: assert_never(model) return _SetupProcessing( pre=pre, post=post, - pre_measures=pre_measures, - post_measures=post_measures, + pre_measures={m for proc in pre for m in proc.required_measures}, + post_measures={m for proc in post for m in proc.required_measures}, ) From 891990046b3dcc6677082d5cd7802422abae61df Mon Sep 17 00:00:00 2001 From: Tomaz Vieira Date: Thu, 20 Feb 2025 16:08:14 +0100 Subject: [PATCH 088/187] Apllies black --- bioimageio/core/proc_ops.py | 38 ++++++++++++++++++++++++++--------- bioimageio/core/proc_setup.py | 23 ++++++++++++++------- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/bioimageio/core/proc_ops.py b/bioimageio/core/proc_ops.py index 5f1195bb..9677956b 100644 --- a/bioimageio/core/proc_ops.py +++ b/bioimageio/core/proc_ops.py @@ -3,7 +3,6 @@ from dataclasses import InitVar, dataclass, field from typing import ( Collection, - List, Literal, Mapping, Optional, @@ -19,6 +18,9 @@ from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.model.v0_5 import TensorId +from bioimageio.spec.model.v0_5 import ( + _convert_proc, # pyright: ignore [reportPrivateUsage] +) from ._op_base import BlockedOperator, Operator from .axis import AxisId, PerAxis @@ -691,8 +693,11 @@ def get_proc_class(proc_spec: ProcDescr): else: assert_never(proc_spec) -def preproc_v4_to_processing(inp: v0_4.InputTensorDescr, proc_spec: v0_4.PreprocessingDescr,) -> Processing: - from bioimageio.spec.model.v0_5 import _convert_proc # pyright: ignore [reportPrivateUsage] + +def preproc_v4_to_processing( + inp: v0_4.InputTensorDescr, + proc_spec: v0_4.PreprocessingDescr, +) -> Processing: member_id = TensorId(str(inp.name)) if isinstance(proc_spec, v0_4.BinarizeDescr): return Binarize.from_proc_descr(proc_spec, member_id) @@ -708,15 +713,20 @@ def preproc_v4_to_processing(inp: v0_4.InputTensorDescr, proc_spec: v0_4.Preproc if proc_spec.kwargs.mode == "fixed": axes = inp.axes v5_proc_spec = _convert_proc(proc_spec, axes) - assert isinstance(v5_proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr) #FIXME + assert isinstance( + v5_proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr + ) # FIXME return FixedZeroMeanUnitVariance.from_proc_descr(v5_proc_spec, member_id) else: return ZeroMeanUnitVariance.from_proc_descr(proc_spec, member_id) else: assert_never(proc_spec) -def postproc_v4_to_processing(inp: v0_4.OutputTensorDescr, proc_spec: v0_4.PostprocessingDescr,) -> Processing: - from bioimageio.spec.model.v0_5 import _convert_proc # pyright: ignore [reportPrivateUsage] + +def postproc_v4_to_processing( + inp: v0_4.OutputTensorDescr, + proc_spec: v0_4.PostprocessingDescr, +) -> Processing: member_id = TensorId(str(inp.name)) if isinstance(proc_spec, v0_4.BinarizeDescr): return Binarize.from_proc_descr(proc_spec, member_id) @@ -734,14 +744,20 @@ def postproc_v4_to_processing(inp: v0_4.OutputTensorDescr, proc_spec: v0_4.Postp if proc_spec.kwargs.mode == "fixed": axes = inp.axes v5_proc_spec = _convert_proc(proc_spec, axes) - assert isinstance(v5_proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr) #FIXME + assert isinstance( + v5_proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr + ) # FIXME return FixedZeroMeanUnitVariance.from_proc_descr(v5_proc_spec, member_id) else: return ZeroMeanUnitVariance.from_proc_descr(proc_spec, member_id) else: assert_never(proc_spec) -def preproc_v5_to_processing(inp: v0_5.InputTensorDescr, proc_spec: v0_5.PreprocessingDescr,) -> Processing: + +def preproc_v5_to_processing( + inp: v0_5.InputTensorDescr, + proc_spec: v0_5.PreprocessingDescr, +) -> Processing: if isinstance(proc_spec, v0_5.BinarizeDescr): return Binarize.from_proc_descr(proc_spec, inp.id) elif isinstance(proc_spec, v0_5.ClipDescr): @@ -761,7 +777,11 @@ def preproc_v5_to_processing(inp: v0_5.InputTensorDescr, proc_spec: v0_5.Preproc else: assert_never(proc_spec) -def postproc_v5_to_processing(inp: v0_5.OutputTensorDescr, proc_spec: v0_5.PostprocessingDescr,) -> Processing: + +def postproc_v5_to_processing( + inp: v0_5.OutputTensorDescr, + proc_spec: v0_5.PostprocessingDescr, +) -> Processing: if isinstance(proc_spec, v0_5.BinarizeDescr): return Binarize.from_proc_descr(proc_spec, inp.id) if isinstance(proc_spec, v0_5.ScaleMeanVarianceDescr): diff --git a/bioimageio/core/proc_setup.py b/bioimageio/core/proc_setup.py index 6ce31158..21afcc3d 100644 --- a/bioimageio/core/proc_setup.py +++ b/bioimageio/core/proc_setup.py @@ -6,7 +6,6 @@ Optional, Sequence, Set, - Tuple, Union, ) @@ -15,13 +14,11 @@ from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 from bioimageio.spec.model.v0_5 import TensorId -from .digest_spec import get_member_ids from .proc_ops import ( AddKnownDatasetStats, EnsureDtype, Processing, UpdateStats, - get_proc_class, postproc_v4_to_processing, postproc_v5_to_processing, preproc_v4_to_processing, @@ -141,7 +138,10 @@ def get_requried_sample_measures(model: AnyModelDescr) -> RequiredSampleMeasures {m for m in s.post_measures if isinstance(m, SampleMeasureBase)}, ) -def _prepare_v4_preprocs(tensor_descrs: Sequence[v0_4.InputTensorDescr]) -> List[Processing]: + +def _prepare_v4_preprocs( + tensor_descrs: Sequence[v0_4.InputTensorDescr], +) -> List[Processing]: procs: List[Processing] = [] for t_descr in tensor_descrs: member_id = TensorId(str(t_descr.name)) @@ -152,7 +152,10 @@ def _prepare_v4_preprocs(tensor_descrs: Sequence[v0_4.InputTensorDescr]) -> List procs.append(preproc_v4_to_processing(t_descr, proc_d)) return procs -def _prepare_v4_postprocs(tensor_descrs: Sequence[v0_4.OutputTensorDescr]) -> List[Processing]: + +def _prepare_v4_postprocs( + tensor_descrs: Sequence[v0_4.OutputTensorDescr], +) -> List[Processing]: procs: List[Processing] = [] for t_descr in tensor_descrs: member_id = TensorId(str(t_descr.name)) @@ -163,14 +166,20 @@ def _prepare_v4_postprocs(tensor_descrs: Sequence[v0_4.OutputTensorDescr]) -> Li procs.append(postproc_v4_to_processing(t_descr, proc_d)) return procs -def _prepare_v5_preprocs(tensor_descrs: Sequence[v0_5.InputTensorDescr]) -> List[Processing]: + +def _prepare_v5_preprocs( + tensor_descrs: Sequence[v0_5.InputTensorDescr], +) -> List[Processing]: procs: List[Processing] = [] for t_descr in tensor_descrs: for proc_d in t_descr.preprocessing: procs.append(preproc_v5_to_processing(t_descr, proc_d)) return procs -def _prepare_v5_postprocs(tensor_descrs: Sequence[v0_5.OutputTensorDescr]) -> List[Processing]: + +def _prepare_v5_postprocs( + tensor_descrs: Sequence[v0_5.OutputTensorDescr], +) -> List[Processing]: procs: List[Processing] = [] for t_descr in tensor_descrs: for proc_d in t_descr.postprocessing: From aa22fefab3b38be7884e5e2fb80888806180e07e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 21 Feb 2025 13:42:42 +0100 Subject: [PATCH 089/187] make loading pytorch weights safe --- bioimageio/core/backends/pytorch_backend.py | 2 +- presentations/create_ambitious_sloth.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/backends/pytorch_backend.py b/bioimageio/core/backends/pytorch_backend.py index a6f51bcd..2b688901 100644 --- a/bioimageio/core/backends/pytorch_backend.py +++ b/bioimageio/core/backends/pytorch_backend.py @@ -136,7 +136,7 @@ def load_torch_state_dict( model = model.to(devices[0]) with path.open("rb") as f: assert not isinstance(f, TextIOWrapper) - state = torch.load(f, map_location=devices[0]) + state = torch.load(f, map_location=devices[0], weights_only=True) incompatible = model.load_state_dict(state) if incompatible is not None and incompatible.missing_keys: diff --git a/presentations/create_ambitious_sloth.ipynb b/presentations/create_ambitious_sloth.ipynb index 171b30db..8cda7fec 100644 --- a/presentations/create_ambitious_sloth.ipynb +++ b/presentations/create_ambitious_sloth.ipynb @@ -465,7 +465,7 @@ } ], "source": [ - "pytorch_weights = torch.load(root / \"weights.pt\", weights_only=False)\n", + "pytorch_weights = torch.load(root / \"weights.pt\", weights_only=True)\n", "pprint([(k, tuple(v.shape)) for k, v in pytorch_weights.items()][:4] + [\"...\"])" ] }, From adbb9760db3f19d936c48d10e7068731f92d2516 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 21 Feb 2025 16:39:16 +0100 Subject: [PATCH 090/187] accept single tensor source --- bioimageio/core/digest_spec.py | 20 ++++++++++++++------ bioimageio/core/prediction.py | 10 +++------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index 5789f377..0eef0a72 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -1,5 +1,6 @@ from __future__ import annotations +import collections.abc import importlib.util from itertools import chain from pathlib import Path @@ -47,6 +48,8 @@ from .stat_measures import Stat from .tensor import Tensor +TensorSource = Union[Tensor, xr.DataArray, NDArray[Any], Path] + def import_callable( node: Union[ @@ -312,7 +315,7 @@ def get_io_sample_block_metas( def get_tensor( - src: Union[Tensor, xr.DataArray, NDArray[Any], Path], + src: TensorSource, ipt: Union[v0_4.InputTensorDescr, v0_5.InputTensorDescr], ): """helper to cast/load various tensor sources""" @@ -337,10 +340,7 @@ def create_sample_for_model( *, stat: Optional[Stat] = None, sample_id: SampleId = None, - inputs: Optional[ - PerMember[Union[Tensor, xr.DataArray, NDArray[Any], Path]] - ] = None, # TODO: make non-optional # TODO: accept tuple of tensor sources - **kwargs: NDArray[Any], # TODO: deprecate in favor of `inputs` + inputs: Union[PerMember[TensorSource], TensorSource], ) -> Sample: """Create a sample from a single set of input(s) for a specific bioimage.io model @@ -349,9 +349,17 @@ def create_sample_for_model( stat: dictionary with sample and dataset statistics (may be updated in-place!) inputs: the input(s) constituting a single sample. """ - inputs = {MemberId(k): v for k, v in {**kwargs, **(inputs or {})}.items()} model_inputs = {get_member_id(d): d for d in model.inputs} + if isinstance(inputs, collections.abc.Mapping): + inputs = {MemberId(k): v for k, v in inputs.items()} + elif len(model_inputs) == 1: + inputs = {list(model_inputs)[0]: inputs} + else: + raise TypeError( + f"Expected `inputs` to be a mapping with keys {tuple(model_inputs)}" + ) + if unknown := {k for k in inputs if k not in model_inputs}: raise ValueError(f"Got unexpected inputs: {unknown}") diff --git a/bioimageio/core/prediction.py b/bioimageio/core/prediction.py index a27451e7..71812266 100644 --- a/bioimageio/core/prediction.py +++ b/bioimageio/core/prediction.py @@ -1,7 +1,6 @@ import collections.abc from pathlib import Path from typing import ( - Any, Hashable, Iterable, Iterator, @@ -11,9 +10,7 @@ Union, ) -import xarray as xr from loguru import logger -from numpy.typing import NDArray from tqdm import tqdm from bioimageio.spec import load_description @@ -23,10 +20,9 @@ from ._prediction_pipeline import PredictionPipeline, create_prediction_pipeline from .axis import AxisId from .common import BlocksizeParameter, MemberId, PerMember -from .digest_spec import create_sample_for_model +from .digest_spec import TensorSource, create_sample_for_model from .io import save_sample from .sample import Sample -from .tensor import Tensor def predict( @@ -34,7 +30,7 @@ def predict( model: Union[ PermissiveFileSource, v0_4.ModelDescr, v0_5.ModelDescr, PredictionPipeline ], - inputs: Union[Sample, PerMember[Union[Tensor, xr.DataArray, NDArray[Any], Path]]], + inputs: Union[Sample, PerMember[TensorSource], TensorSource], sample_id: Hashable = "sample", blocksize_parameter: Optional[BlocksizeParameter] = None, input_block_shape: Optional[Mapping[MemberId, Mapping[AxisId, int]]] = None, @@ -124,7 +120,7 @@ def predict_many( model: Union[ PermissiveFileSource, v0_4.ModelDescr, v0_5.ModelDescr, PredictionPipeline ], - inputs: Iterable[PerMember[Union[Tensor, xr.DataArray, NDArray[Any], Path]]], + inputs: Iterable[Union[TensorSource, PerMember[TensorSource]]], sample_id: str = "sample{i:03}", blocksize_parameter: Optional[ Union[ From 2d0d189c4462e4d3cbdf975fb81a29bef874afc8 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 21 Feb 2025 16:41:37 +0100 Subject: [PATCH 091/187] make sure to perform io checks --- bioimageio/core/_resource_tests.py | 32 ++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index b1174b86..3efcfc7d 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -28,6 +28,7 @@ BioimageioCondaEnv, InvalidDescr, ResourceDescr, + ValidationContext, build_description, dump_description, get_conda_env, @@ -382,22 +383,37 @@ def load_description_and_test( **deprecated: Unpack[DeprecatedKwargs], ) -> Union[ResourceDescr, InvalidDescr]: """Test RDF dynamically, e.g. model inference of test inputs""" - if ( - isinstance(source, ResourceDescrBase) - and format_version != "discover" - and source.format_version != format_version + if isinstance(source, ResourceDescrBase) and ( + (format_version != "discover" and source.format_version != format_version) + or (c := source.validation_summary.details[0].context) is None + or not c["perform_io_checks"] ): - warnings.warn( - f"deserializing source to ensure we validate and test using format {format_version}" + logger.debug( + "deserializing source to ensure we validate and test using format {} and perform io checks", + format_version, ) + root = source.root source = dump_description(source) + else: + root = Path() if isinstance(source, ResourceDescrBase): rd = source elif isinstance(source, dict): - rd = build_description(source, format_version=format_version) + # check context for a given root; default to root of source + context = validation_context_var.get(ValidationContext(root=root)).replace( + perform_io_checks=True # make sure we perform io checks though + ) + + rd = build_description( + source, + format_version=format_version, + context=context, + ) else: - rd = load_description(source, format_version=format_version, sha256=sha256) + rd = load_description( + source, format_version=format_version, sha256=sha256, perform_io_checks=True + ) rd.validation_summary.env.add( InstalledPackage(name="bioimageio.core", version=VERSION) From f6fff61cbf83087cea1dd060ff35f0effa261d0e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 24 Feb 2025 09:40:55 +0100 Subject: [PATCH 092/187] better logging in CLI --- bioimageio/core/cli.py | 64 +++++++++++++------ bioimageio/core/commands.py | 34 +++++----- .../core/weight_converters/_add_weights.py | 2 +- setup.py | 1 + 4 files changed, 63 insertions(+), 38 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 2778d201..024811b8 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -46,7 +46,13 @@ from tqdm import tqdm from typing_extensions import assert_never -from bioimageio.spec import AnyModelDescr, InvalidDescr, load_description +from bioimageio.spec import ( + AnyModelDescr, + InvalidDescr, + ResourceDescr, + load_description, + settings, +) from bioimageio.spec._internal.io_basics import ZipPath from bioimageio.spec._internal.io_utils import write_yaml from bioimageio.spec._internal.types import NotEmpty @@ -61,7 +67,6 @@ package, test, update_format, - validate_format, ) from .common import MemberId, SampleId, SupportedWeightsFormat from .digest_spec import get_member_ids, load_sample_for_model @@ -90,6 +95,19 @@ class ArgMixin(BaseModel, use_attribute_docstrings=True, cli_implicit_flags=True pass +class WithSummaryLogging(ArgMixin): + summary: Union[Path, Sequence[Path]] = Field( + (), examples=[Path("summary.md"), Path("bioimageio_summaries/")] + ) + """Save the validation summary as JSON, Markdown or HTML. + The format is chosen based on the suffix: `.json`, `.md`, `.html`. + If a folder is given (path w/o suffix) the summary is saved in all formats. + """ + + def log(self, descr: Union[ResourceDescr, InvalidDescr]): + _ = descr.validation_summary.log(self.summary) + + class WithSource(ArgMixin): source: CliPositionalArg[str] """Url/path to a bioimageio.yaml/rdf.yaml file @@ -118,14 +136,26 @@ def descr_id(self) -> str: return str(nickname or self.descr.id or self.descr.name) -class ValidateFormatCmd(CmdBase, WithSource): +class ValidateFormatCmd(CmdBase, WithSource, WithSummaryLogging): """Validate the meta data format of a bioimageio resource.""" + perform_io_checks: bool = Field( + settings.perform_io_checks, alias="perform-io-checks" + ) + """Wether or not to perform validations that requires downloading remote files. + Note: Default value is set by `BIOIMAGEIO_PERFORM_IO_CHECKS` environment variable. + """ + + @cached_property + def descr(self): + return load_description(self.source, perform_io_checks=self.perform_io_checks) + def run(self): - sys.exit(validate_format(self.descr)) + self.log(self.descr) + sys.exit(0 if self.descr.validation_summary.status == "passed" else 1) -class TestCmd(CmdBase, WithSource): +class TestCmd(CmdBase, WithSource, WithSummaryLogging): """Test a bioimageio resource (beyond meta data formatting).""" weight_format: WeightFormatArgAll = "all" @@ -140,7 +170,6 @@ class TestCmd(CmdBase, WithSource): "currently-active", alias="runtime-env" ) """The python environment to run the tests in - - `"currently-active"`: use active Python interpreter - `"as-described"`: generate a conda environment YAML file based on the model weights description. @@ -148,9 +177,6 @@ class TestCmd(CmdBase, WithSource): Note: The `bioimageio.core` dependency will be added automatically if not present. """ - summary_path: Optional[Path] = Field(None, alias="summary-path") - """Path to save validation summary as JSON file.""" - determinism: Literal["seed_only", "full"] = "seed_only" """Modes to improve reproducibility of test outputs.""" @@ -160,7 +186,7 @@ def run(self): self.descr, weight_format=self.weight_format, devices=self.devices, - summary_path=self.summary_path, + summary=self.summary, runtime_env=self.runtime_env, determinism=self.determinism, ) @@ -180,8 +206,10 @@ class PackageCmd(CmdBase, WithSource): def run(self): if isinstance(self.descr, InvalidDescr): - self.descr.validation_summary.display() - raise ValueError(f"Invalid {self.descr.type} description.") + paths = self.descr.validation_summary.log() + raise ValueError( + f"Invalid {self.descr.type} description. Logged details to {paths}" + ) sys.exit( package( @@ -205,7 +233,7 @@ def _get_stat( req_dataset_meas, _ = get_required_dataset_measures(model_descr) if stats_path.exists(): - logger.info(f"loading precomputed dataset measures from {stats_path}") + logger.info("loading precomputed dataset measures from {}", stats_path) stat = load_dataset_stat(stats_path) for m in req_dataset_meas: if m not in stat: @@ -232,7 +260,7 @@ class UpdateFormatCmd(CmdBase, WithSource): output: Optional[Path] = None """Save updated bioimageio.yaml to this file. - (Always renders updated bioimageio.yaml to terminal.) + Updated bioimageio.yaml is rendered to the terminal if the output is None. """ def run(self): @@ -618,13 +646,7 @@ def run(self): if updated_model_descr is None: return - updated_model_descr.validation_summary.display() - - # save validation summary as attachments - updated_model_descr.validation_summary.save_markdown( - (summary_path := self.output / "test_summary.md") - ) - logger.info("Saved rendered validation summary to {}", summary_path.absolute()) + _ = updated_model_descr.validation_summary.log() JSON_FILE = "bioimageio-cli.json" diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 92a0cd65..813c0ad3 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -26,7 +26,7 @@ def test( *, weight_format: WeightFormatArgAll = "all", devices: Optional[Union[str, Sequence[str]]] = None, - summary_path: Optional[Path] = None, + summary: Union[Path, Sequence[Path]] = (), runtime_env: Union[ Literal["currently-active", "as-described"], Path ] = "currently-active", @@ -40,34 +40,36 @@ def test( descr.validation_summary.display() return 1 - summary = test_description( + test_summary = test_description( descr, weight_format=None if weight_format == "all" else weight_format, devices=[devices] if isinstance(devices, str) else devices, runtime_env=runtime_env, determinism=determinism, ) - summary.display() - if summary_path is not None: - _ = summary_path.write_text(summary.model_dump_json(indent=4)) - - return 0 if summary.status == "passed" else 1 + _ = test_summary.log(summary) + return 0 if test_summary.status == "passed" else 1 def validate_format( descr: Union[ResourceDescr, InvalidDescr], + summary: Union[Path, Sequence[Path]] = (), ): - """validate the meta data format of a bioimageio resource + """DEPRECATED; Access the existing `validation_summary` attribute instead. + validate the meta data format of a bioimageio resource Args: descr: a bioimageio resource description """ - descr.validation_summary.display() + _ = descr.validation_summary.log(summary) return 0 if descr.validation_summary.status == "passed" else 1 def package( - descr: ResourceDescr, path: Path, *, weight_format: WeightFormatArgAll = "all" + descr: ResourceDescr, + path: Path, + *, + weight_format: WeightFormatArgAll = "all", ): """Save a resource's metadata with its associated files. @@ -80,8 +82,12 @@ def package( weight-format: include only this single weight-format (if not 'all'). """ if isinstance(descr, InvalidDescr): - descr.validation_summary.display() - raise ValueError(f"Invalid {descr.type} description.") + logged = descr.validation_summary.log() + msg = f"Invalid {descr.type} description." + if logged: + msg += f" Details saved to {logged}." + + raise ValueError(msg) if weight_format == "all": weights_priority_order = None @@ -101,7 +107,3 @@ def package( weights_priority_order=weights_priority_order, ) return 0 - - -# def update_format(descr: ResourceDescr, path: Path): -# update_format_func() diff --git a/bioimageio/core/weight_converters/_add_weights.py b/bioimageio/core/weight_converters/_add_weights.py index f3c9ee33..ce77ef5c 100644 --- a/bioimageio/core/weight_converters/_add_weights.py +++ b/bioimageio/core/weight_converters/_add_weights.py @@ -165,7 +165,7 @@ def add_weights( logger.warning("failed to add any converted weights") return None else: - logger.info(f"added weights formats {originally_missing - missing}") + logger.info("added weights formats {}", originally_missing - missing) # resave model with updated rdf.yaml _ = save_bioimageio_package_as_folder(model_descr, output_path=output_path) _ = test_model(model_descr) diff --git a/setup.py b/setup.py index 6e3b25d2..d1166341 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ "imagecodecs", "imageio>=2.10", "loguru", + "markdown", "numpy", "pydantic-settings>=2.5,<3", "pydantic>=2.7.0,<3", From d3787a569072d32f668d45574c52b42a4b88653f Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 24 Feb 2025 09:43:05 +0100 Subject: [PATCH 093/187] image io with PermissiveFileSource --- bioimageio/core/io.py | 115 +++++++++++++++++++++++++++++++++++------- 1 file changed, 96 insertions(+), 19 deletions(-) diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index 44044a92..0a9278ee 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -4,7 +4,15 @@ from io import TextIOWrapper from pathlib import Path, PurePosixPath from shutil import copyfileobj -from typing import Any, Mapping, Optional, Sequence, Tuple, Union +from typing import ( + Any, + Mapping, + Optional, + Sequence, + Tuple, + TypeVar, + Union, +) import h5py import numpy as np @@ -12,8 +20,15 @@ from loguru import logger from numpy.typing import NDArray from pydantic import BaseModel, ConfigDict, TypeAdapter - -from bioimageio.spec.common import FileSource, ZipPath +from typing_extensions import assert_never + +from bioimageio.spec._internal.io import interprete_file_source +from bioimageio.spec.common import ( + HttpUrl, + PermissiveFileSource, + RelativeFilePath, + ZipPath, +) from bioimageio.spec.utils import download, load_array, save_array from .axis import AxisLike @@ -25,29 +40,51 @@ DEFAULT_H5_DATASET_PATH = "data" -def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]: +SUFFIXES_WITH_DATAPATH = (".h5", ".hdf", ".hdf5") + + +def load_image( + source: PermissiveFileSource, is_volume: Optional[bool] = None +) -> NDArray[Any]: """load a single image as numpy array Args: - path: image path + source: image source is_volume: deprecated """ if is_volume is not None: warnings.warn("**is_volume** is deprecated and will be removed soon.") - file_path, subpath = _split_dataset_path(Path(path)) + parsed_source = interprete_file_source(source) - if file_path.suffix == ".npy": + if isinstance(parsed_source, RelativeFilePath): + src = parsed_source.absolute() + else: + src = parsed_source + + # FIXME: why is pyright complaining about giving the union to _split_dataset_path? + if isinstance(src, Path): + file_source, subpath = _split_dataset_path(src) + elif isinstance(src, HttpUrl): + file_source, subpath = _split_dataset_path(src) + elif isinstance(src, ZipPath): + file_source, subpath = _split_dataset_path(src) + else: + assert_never(src) + + path = download(file_source).path + + if path.suffix == ".npy": if subpath is not None: raise ValueError(f"Unexpected subpath {subpath} for .npy path {path}") return load_array(path) - elif file_path.suffix in (".h5", ".hdf", ".hdf5"): + elif path.suffix in SUFFIXES_WITH_DATAPATH: if subpath is None: dataset_path = DEFAULT_H5_DATASET_PATH else: dataset_path = str(subpath) - with h5py.File(file_path, "r") as f: + with h5py.File(path, "r") as f: h5_dataset = f.get( # pyright: ignore[reportUnknownVariableType] dataset_path ) @@ -64,18 +101,29 @@ def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]: image # pyright: ignore[reportUnknownArgumentType] ) return image # pyright: ignore[reportUnknownVariableType] + elif isinstance(path, ZipPath): + return imread( + path.read_bytes(), extension=path.suffix + ) # pyright: ignore[reportUnknownVariableType] else: return imread(path) # pyright: ignore[reportUnknownVariableType] -def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor: +def load_tensor( + path: Union[Path, str], axes: Optional[Sequence[AxisLike]] = None +) -> Tensor: # TODO: load axis meta data array = load_image(path) return Tensor.from_numpy(array, dims=axes) -def _split_dataset_path(path: Path) -> Tuple[Path, Optional[PurePosixPath]]: +_SourceT = TypeVar("_SourceT", Path, HttpUrl, ZipPath) + + +def _split_dataset_path( + source: _SourceT, +) -> Tuple[_SourceT, Optional[PurePosixPath]]: """Split off subpath (e.g. internal h5 dataset path) from a file path following a file extension. @@ -83,22 +131,51 @@ def _split_dataset_path(path: Path) -> Tuple[Path, Optional[PurePosixPath]]: >>> _split_dataset_path(Path("my_file.h5/dataset")) (...Path('my_file.h5'), PurePosixPath('dataset')) - If no suffix is detected the path is returned with >>> _split_dataset_path(Path("my_plain_file")) (...Path('my_plain_file'), None) """ - if path.suffix: + if isinstance(source, RelativeFilePath): + src = source.absolute() + else: + src = source + + del source + + def separate_pure_path(path: PurePosixPath): + for p in path.parents: + if p.suffix in SUFFIXES_WITH_DATAPATH: + return p, PurePosixPath(path.relative_to(p)) + return path, None - for p in path.parents: - if p.suffix: - return p, PurePosixPath(path.relative_to(p)) + if isinstance(src, HttpUrl): + file_path, data_path = separate_pure_path(PurePosixPath(src.path or "")) + + if data_path is None: + return src, None + + return ( + HttpUrl(str(file_path).replace(f"/{data_path}", "")), + data_path, + ) + + if isinstance(src, ZipPath): + file_path, data_path = separate_pure_path(PurePosixPath(str(src))) + + if data_path is None: + return src, None + + return ( + ZipPath(str(file_path).replace(f"/{data_path}", "")), + data_path, + ) - return path, None + file_path, data_path = separate_pure_path(PurePosixPath(src)) + return Path(file_path), data_path -def save_tensor(path: Path, tensor: Tensor) -> None: +def save_tensor(path: Union[Path, str], tensor: Tensor) -> None: # TODO: save axis meta data data: NDArray[Any] = tensor.data.to_numpy() @@ -182,7 +259,7 @@ def load_dataset_stat(path: Path): return {e.measure: e.value for e in seq} -def ensure_unzipped(source: Union[FileSource, ZipPath], folder: Path): +def ensure_unzipped(source: Union[PermissiveFileSource, ZipPath], folder: Path): """unzip a (downloaded) **source** to a file in **folder** if source is a zip archive. Always returns the path to the unzipped source (maybe source itself)""" local_weights_file = download(source).path From f01ee3ddb02ce7ed86d18100186954003571cf3d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 24 Feb 2025 10:33:13 +0100 Subject: [PATCH 094/187] fix predict_many and improve save_sample --- bioimageio/core/io.py | 36 +++++++++++------- bioimageio/core/prediction.py | 71 +++++++++++++++++++---------------- 2 files changed, 62 insertions(+), 45 deletions(-) diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index 0a9278ee..146ab115 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -215,23 +215,33 @@ def save_tensor(path: Union[Path, str], tensor: Tensor) -> None: imwrite(path, data) -def save_sample(path: Union[Path, str, PerMember[Path]], sample: Sample) -> None: - """save a sample to path +def save_sample( + path: Union[Path, str, PerMember[Union[Path, str]]], sample: Sample +) -> None: + """Save a **sample** to a **path** pattern + or all sample members in the **path** mapping. - If `path` is a pathlib.Path or a string it must contain `{member_id}` and may contain `{sample_id}`, - which are resolved with the `sample` object. - """ - - if not isinstance(path, collections.abc.Mapping) and "{member_id}" not in str(path): - raise ValueError(f"missing `{{member_id}}` in path {path}") + If **path** is a pathlib.Path or a string and the **sample** has multiple members, + **path** it must contain `{member_id}` (or `{input_id}` or `{output_id}`). - for m, t in sample.members.items(): - if isinstance(path, collections.abc.Mapping): - p = path[m] + (Each) **path** may contain `{sample_id}` to be formatted with the **sample** object. + """ + if not isinstance(path, collections.abc.Mapping): + if len(sample.members) < 2 or any( + m in str(path) for m in ("{member_id}", "{input_id}", "{output_id}") + ): + path = {m: path for m in sample.members} else: - p = Path(str(path).format(sample_id=sample.id, member_id=m)) + raise ValueError( + f"path {path} must contain '{{member_id}}' for sample with multiple members {list(sample.members)}." + ) - save_tensor(p, t) + for m, p in path.items(): + t = sample.members[m] + p_formatted = Path( + str(p).format(sample_id=sample.id, member_id=m, input_id=m, output_id=m) + ) + save_tensor(p_formatted, t) class _SerializedDatasetStatsEntry( diff --git a/bioimageio/core/prediction.py b/bioimageio/core/prediction.py index 71812266..9fe5ce12 100644 --- a/bioimageio/core/prediction.py +++ b/bioimageio/core/prediction.py @@ -20,7 +20,7 @@ from ._prediction_pipeline import PredictionPipeline, create_prediction_pipeline from .axis import AxisId from .common import BlocksizeParameter, MemberId, PerMember -from .digest_spec import TensorSource, create_sample_for_model +from .digest_spec import TensorSource, create_sample_for_model, get_member_id from .io import save_sample from .sample import Sample @@ -45,6 +45,8 @@ def predict( May be given as RDF source, model description or prediction pipeline. inputs: the input sample or the named input(s) for this model as a dictionary sample_id: the sample id. + The **sample_id** is used to format **save_output_path** + and to distinguish sample specific log messages. blocksize_parameter: (optional) Tile the input into blocks parametrized by **blocksize_parameter** according to any parametrized axis sizes defined by the **model**. @@ -55,17 +57,15 @@ def predict( run prediction independent of the exact block shape. skip_preprocessing: Flag to skip the model's preprocessing. skip_postprocessing: Flag to skip the model's postprocessing. - save_output_path: A path with `{member_id}` `{sample_id}` in it - to save the output to. + save_output_path: A path with to save the output to. M + Must contain: + - `{output_id}` (or `{member_id}`) if the model has multiple output tensors + May contain: + - `{sample_id}` to avoid overwriting recurrent calls """ - if save_output_path is not None: - if "{member_id}" not in str(save_output_path): - raise ValueError( - f"Missing `{{member_id}}` in save_output_path={save_output_path}" - ) - if isinstance(model, PredictionPipeline): pp = model + model = pp.model_description else: if not isinstance(model, (v0_4.ModelDescr, v0_5.ModelDescr)): loaded = load_description(model) @@ -75,6 +75,18 @@ def predict( pp = create_prediction_pipeline(model) + if save_output_path is not None: + if ( + "{output_id}" not in str(save_output_path) + and "{member_id}" not in str(save_output_path) + and len(model.outputs) > 1 + ): + raise ValueError( + f"Missing `{{output_id}}` in save_output_path={save_output_path} to " + + "distinguish model outputs " + + str([get_member_id(d) for d in model.outputs]) + ) + if isinstance(inputs, Sample): sample = inputs else: @@ -120,7 +132,7 @@ def predict_many( model: Union[ PermissiveFileSource, v0_4.ModelDescr, v0_5.ModelDescr, PredictionPipeline ], - inputs: Iterable[Union[TensorSource, PerMember[TensorSource]]], + inputs: Union[Iterable[PerMember[TensorSource]], Iterable[TensorSource]], sample_id: str = "sample{i:03}", blocksize_parameter: Optional[ Union[ @@ -135,31 +147,27 @@ def predict_many( """Run prediction for a multiple sets of inputs with a bioimage.io model Args: - model: model to predict with. + model: Model to predict with. May be given as RDF source, model description or prediction pipeline. inputs: An iterable of the named input(s) for this model as a dictionary. - sample_id: the sample id. + sample_id: The sample id. note: `{i}` will be formatted as the i-th sample. - If `{i}` (or `{i:`) is not present and `inputs` is an iterable `{i:03}` is appended. - blocksize_parameter: (optional) tile the input into blocks parametrized by - blocksize according to any parametrized axis sizes defined in the model RDF - skip_preprocessing: flag to skip the model's preprocessing - skip_postprocessing: flag to skip the model's postprocessing - save_output_path: A path with `{member_id}` `{sample_id}` in it - to save the output to. + If `{i}` (or `{i:`) is not present and `inputs` is not an iterable `{i:03}` + is appended. + blocksize_parameter: (optional) Tile the input into blocks parametrized by + blocksize according to any parametrized axis sizes defined in the model RDF. + skip_preprocessing: Flag to skip the model's preprocessing. + skip_postprocessing: Flag to skip the model's postprocessing. + save_output_path: A path to save the output to. + Must contain: + - `{sample_id}` to differentiate predicted samples + - `{output_id}` (or `{member_id}`) if the model has multiple outputs """ - if save_output_path is not None: - if "{member_id}" not in str(save_output_path): - raise ValueError( - f"Missing `{{member_id}}` in save_output_path={save_output_path}" - ) - - if not isinstance(inputs, collections.abc.Mapping) and "{sample_id}" not in str( - save_output_path - ): - raise ValueError( - f"Missing `{{sample_id}}` in save_output_path={save_output_path}" - ) + if save_output_path is not None and "{sample_id}" not in str(save_output_path): + raise ValueError( + f"Missing `{{sample_id}}` in save_output_path={save_output_path}" + + " to differentiate predicted samples." + ) if isinstance(model, PredictionPipeline): pp = model @@ -173,7 +181,6 @@ def predict_many( pp = create_prediction_pipeline(model) if not isinstance(inputs, collections.abc.Mapping): - sample_id = str(sample_id) if "{i}" not in sample_id and "{i:" not in sample_id: sample_id += "{i:03}" From bd784e03f3dbd6720c5abf20025b4719bed63ea8 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 24 Feb 2025 10:42:51 +0100 Subject: [PATCH 095/187] remove markdown dep (needed in spec) --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index d1166341..6e3b25d2 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,6 @@ "imagecodecs", "imageio>=2.10", "loguru", - "markdown", "numpy", "pydantic-settings>=2.5,<3", "pydantic>=2.7.0,<3", From 03d4940cdab9016d8b700cfd8198209da63b384c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 24 Feb 2025 10:43:00 +0100 Subject: [PATCH 096/187] bump pyright --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6e3b25d2..651e3796 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,7 @@ "packaging>=17.0", "pre-commit", "pdoc", - "pyright==1.1.393", + "pyright==1.1.394", "pytest-cov", "pytest", ] From 89d746691e3dd432996a28ee74bb3b435862a1fc Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 24 Feb 2025 14:27:13 +0100 Subject: [PATCH 097/187] replace get_proc_class with get_proc --- README.md | 11 ++- bioimageio/core/proc_ops.py | 177 +++++++++------------------------- bioimageio/core/proc_setup.py | 86 +++++++---------- 3 files changed, 85 insertions(+), 189 deletions(-) diff --git a/README.md b/README.md index 46557111..1a1aa932 100644 --- a/README.md +++ b/README.md @@ -275,6 +275,7 @@ functionality, but not any functionality depending on model prediction. To install additional deep learning libraries add `pytorch`, `onnxruntime`, `keras` or `tensorflow`. Deeplearning frameworks to consider installing alongside `bioimageio.core`: + - [Pytorch/Torchscript](https://pytorch.org/get-started/locally/) - [TensorFlow](https://www.tensorflow.org/install) - [ONNXRuntime](https://onnxruntime.ai/docs/install/#python-installs) @@ -297,13 +298,16 @@ These models are described by---and can be loaded with---the bioimageio.spec pac In addition bioimageio.core provides functionality to convert model weight formats. ### Documentation + [Here you find the bioimageio.core documentation.](https://bioimage-io.github.io/core-bioimage-io-python/bioimageio/core.html) #### Presentations + - [Create a model from scratch](https://bioimage-io.github.io/core-bioimage-io-python/presentations/create_ambitious_sloth.slides.html) ([source](https://github.com/bioimage-io/core-bioimage-io-python/tree/main/presentations)) #### Examples -
+ +
Notebooks that save and load resource descriptions and validate their format (using bioimageio.spec, a dependency of bioimageio.core)
load_model_and_create_your_own.ipynb Open In Colab @@ -327,7 +331,6 @@ bioimageio For examples see [Get started](#get-started). - ### CLI inputs from file For convenience the command line options (not arguments) may be given in a `bioimageio-cli.json` @@ -342,7 +345,6 @@ blockwise: true stats: inputs/dataset_statistics.json ``` - ## Set up Development Environment To set up a development conda environment run the following commands: @@ -355,13 +357,11 @@ pip install -e . --no-deps There are different environment files available that only install tensorflow or pytorch as dependencies, see [dev folder](https://github.com/bioimage-io/core-bioimage-io-python/tree/main/dev). - ## Logging level `bioimageio.spec` and `bioimageio.core` use [loguru](https://github.com/Delgan/loguru) for logging, hence the logging level may be controlled with the `LOGURU_LEVEL` environment variable. - ## Changelog ### 0.7.1 (to be released) @@ -370,6 +370,7 @@ may be controlled with the `LOGURU_LEVEL` environment variable. - New feature: `bioimageio.core.test_description` accepts **runtime_env** and **run_command** to test a resource using the conda environment described by that resource (or another specified conda env) - new CLI command: `bioimageio add-weights` (and utility function: bioimageio.core.add_weights) +- removed `bioimageio.core.proc_ops.get_proc_class` in favor of `bioimageio.core.proc_ops.get_proc` ### 0.7.0 diff --git a/bioimageio/core/proc_ops.py b/bioimageio/core/proc_ops.py index a3f9f281..1b2cf183 100644 --- a/bioimageio/core/proc_ops.py +++ b/bioimageio/core/proc_ops.py @@ -16,8 +16,8 @@ import xarray as xr from typing_extensions import Self, assert_never +from bioimageio.core.digest_spec import get_member_id from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.model.v0_5 import TensorId from bioimageio.spec.model.v0_5 import ( _convert_proc, # pyright: ignore [reportPrivateUsage] ) @@ -672,144 +672,53 @@ def _apply(self, input: Tensor, stat: Stat) -> Tensor: ] -def get_proc_class(proc_spec: ProcDescr): - if isinstance(proc_spec, (v0_4.BinarizeDescr, v0_5.BinarizeDescr)): - return Binarize - elif isinstance(proc_spec, (v0_4.ClipDescr, v0_5.ClipDescr)): - return Clip - elif isinstance(proc_spec, v0_5.EnsureDtypeDescr): - return EnsureDtype - elif isinstance(proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr): - return FixedZeroMeanUnitVariance - elif isinstance(proc_spec, (v0_4.ScaleLinearDescr, v0_5.ScaleLinearDescr)): - return ScaleLinear +def get_proc( + proc_descr: ProcDescr, + tensor_descr: Union[ + v0_4.InputTensorDescr, + v0_4.OutputTensorDescr, + v0_5.InputTensorDescr, + v0_5.OutputTensorDescr, + ], +) -> Processing: + member_id = get_member_id(tensor_descr) + + if isinstance(proc_descr, (v0_4.BinarizeDescr, v0_5.BinarizeDescr)): + return Binarize.from_proc_descr(proc_descr, member_id) + elif isinstance(proc_descr, (v0_4.ClipDescr, v0_5.ClipDescr)): + return Clip.from_proc_descr(proc_descr, member_id) + elif isinstance(proc_descr, v0_5.EnsureDtypeDescr): + return EnsureDtype.from_proc_descr(proc_descr, member_id) + elif isinstance(proc_descr, v0_5.FixedZeroMeanUnitVarianceDescr): + return FixedZeroMeanUnitVariance.from_proc_descr(proc_descr, member_id) + elif isinstance(proc_descr, (v0_4.ScaleLinearDescr, v0_5.ScaleLinearDescr)): + return ScaleLinear.from_proc_descr(proc_descr, member_id) elif isinstance( - proc_spec, (v0_4.ScaleMeanVarianceDescr, v0_5.ScaleMeanVarianceDescr) + proc_descr, (v0_4.ScaleMeanVarianceDescr, v0_5.ScaleMeanVarianceDescr) ): - return ScaleMeanVariance - elif isinstance(proc_spec, (v0_4.ScaleRangeDescr, v0_5.ScaleRangeDescr)): - return ScaleRange - elif isinstance(proc_spec, (v0_4.SigmoidDescr, v0_5.SigmoidDescr)): - return Sigmoid + return ScaleMeanVariance.from_proc_descr(proc_descr, member_id) + elif isinstance(proc_descr, (v0_4.ScaleRangeDescr, v0_5.ScaleRangeDescr)): + return ScaleRange.from_proc_descr(proc_descr, member_id) + elif isinstance(proc_descr, (v0_4.SigmoidDescr, v0_5.SigmoidDescr)): + return Sigmoid.from_proc_descr(proc_descr, member_id) elif ( - isinstance(proc_spec, v0_4.ZeroMeanUnitVarianceDescr) - and proc_spec.kwargs.mode == "fixed" + isinstance(proc_descr, v0_4.ZeroMeanUnitVarianceDescr) + and proc_descr.kwargs.mode == "fixed" ): - return FixedZeroMeanUnitVariance + if not isinstance( + tensor_descr, (v0_4.InputTensorDescr, v0_4.OutputTensorDescr) + ): + raise TypeError( + "Expected v0_4 tensor description for v0_4 processing description" + ) + + v5_proc_descr = _convert_proc(proc_descr, tensor_descr.axes) + assert isinstance(v5_proc_descr, v0_5.FixedZeroMeanUnitVarianceDescr) + return FixedZeroMeanUnitVariance.from_proc_descr(v5_proc_descr, member_id) elif isinstance( - proc_spec, + proc_descr, (v0_4.ZeroMeanUnitVarianceDescr, v0_5.ZeroMeanUnitVarianceDescr), ): - return ZeroMeanUnitVariance - else: - assert_never(proc_spec) - - -def preproc_v4_to_processing( - inp: v0_4.InputTensorDescr, - proc_spec: v0_4.PreprocessingDescr, -) -> Processing: - member_id = TensorId(str(inp.name)) - if isinstance(proc_spec, v0_4.BinarizeDescr): - return Binarize.from_proc_descr(proc_spec, member_id) - elif isinstance(proc_spec, v0_4.ClipDescr): - return Clip.from_proc_descr(proc_spec, member_id) - elif isinstance(proc_spec, v0_4.ScaleLinearDescr): - return ScaleLinear.from_proc_descr(proc_spec, member_id) - elif isinstance(proc_spec, v0_4.ScaleRangeDescr): - return ScaleRange.from_proc_descr(proc_spec, member_id) - elif isinstance(proc_spec, v0_4.SigmoidDescr): - return Sigmoid.from_proc_descr(proc_spec, member_id) - elif isinstance(proc_spec, v0_4.ZeroMeanUnitVarianceDescr): - if proc_spec.kwargs.mode == "fixed": - axes = inp.axes - v5_proc_spec = _convert_proc(proc_spec, axes) - assert isinstance( - v5_proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr - ) # FIXME - return FixedZeroMeanUnitVariance.from_proc_descr(v5_proc_spec, member_id) - else: - return ZeroMeanUnitVariance.from_proc_descr(proc_spec, member_id) - else: - assert_never(proc_spec) - - -def postproc_v4_to_processing( - inp: v0_4.OutputTensorDescr, - proc_spec: v0_4.PostprocessingDescr, -) -> Processing: - member_id = TensorId(str(inp.name)) - if isinstance(proc_spec, v0_4.BinarizeDescr): - return Binarize.from_proc_descr(proc_spec, member_id) - if isinstance(proc_spec, v0_4.ScaleMeanVarianceDescr): - return ScaleMeanVariance.from_proc_descr(proc_spec, member_id) - elif isinstance(proc_spec, v0_4.ClipDescr): - return Clip.from_proc_descr(proc_spec, member_id) - elif isinstance(proc_spec, v0_4.ScaleLinearDescr): - return ScaleLinear.from_proc_descr(proc_spec, member_id) - elif isinstance(proc_spec, v0_4.ScaleRangeDescr): - return ScaleRange.from_proc_descr(proc_spec, member_id) - elif isinstance(proc_spec, v0_4.SigmoidDescr): - return Sigmoid.from_proc_descr(proc_spec, member_id) - elif isinstance(proc_spec, v0_4.ZeroMeanUnitVarianceDescr): - if proc_spec.kwargs.mode == "fixed": - axes = inp.axes - v5_proc_spec = _convert_proc(proc_spec, axes) - assert isinstance( - v5_proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr - ) # FIXME - return FixedZeroMeanUnitVariance.from_proc_descr(v5_proc_spec, member_id) - else: - return ZeroMeanUnitVariance.from_proc_descr(proc_spec, member_id) - else: - assert_never(proc_spec) - - -def preproc_v5_to_processing( - inp: v0_5.InputTensorDescr, - proc_spec: v0_5.PreprocessingDescr, -) -> Processing: - if isinstance(proc_spec, v0_5.BinarizeDescr): - return Binarize.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.ClipDescr): - return Clip.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.ScaleLinearDescr): - return ScaleLinear.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.ScaleRangeDescr): - return ScaleRange.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.SigmoidDescr): - return Sigmoid.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.EnsureDtypeDescr): - return EnsureDtype.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.ZeroMeanUnitVarianceDescr): - return ZeroMeanUnitVariance.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr): - return FixedZeroMeanUnitVariance.from_proc_descr(proc_spec, inp.id) - else: - assert_never(proc_spec) - - -def postproc_v5_to_processing( - inp: v0_5.OutputTensorDescr, - proc_spec: v0_5.PostprocessingDescr, -) -> Processing: - if isinstance(proc_spec, v0_5.BinarizeDescr): - return Binarize.from_proc_descr(proc_spec, inp.id) - if isinstance(proc_spec, v0_5.ScaleMeanVarianceDescr): - return ScaleMeanVariance.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.ClipDescr): - return Clip.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.ScaleLinearDescr): - return ScaleLinear.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.ScaleRangeDescr): - return ScaleRange.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.SigmoidDescr): - return Sigmoid.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.EnsureDtypeDescr): - return EnsureDtype.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.ZeroMeanUnitVarianceDescr): - return ZeroMeanUnitVariance.from_proc_descr(proc_spec, inp.id) - elif isinstance(proc_spec, v0_5.FixedZeroMeanUnitVarianceDescr): - return FixedZeroMeanUnitVariance.from_proc_descr(proc_spec, inp.id) + return ZeroMeanUnitVariance.from_proc_descr(proc_descr, member_id) else: - assert_never(proc_spec) + assert_never(proc_descr) diff --git a/bioimageio/core/proc_setup.py b/bioimageio/core/proc_setup.py index bdcb3b54..a4113d21 100644 --- a/bioimageio/core/proc_setup.py +++ b/bioimageio/core/proc_setup.py @@ -12,18 +12,15 @@ from typing_extensions import assert_never +from bioimageio.core.digest_spec import get_member_id from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 -from bioimageio.spec.model.v0_5 import TensorId from .proc_ops import ( AddKnownDatasetStats, EnsureDtype, Processing, UpdateStats, - postproc_v4_to_processing, - postproc_v5_to_processing, - preproc_v4_to_processing, - preproc_v5_to_processing, + get_proc, ) from .sample import Sample from .stat_calculators import StatsCalculator @@ -173,61 +170,50 @@ def get_requried_sample_measures(model: AnyModelDescr) -> RequiredSampleMeasures ) -def _prepare_v4_preprocs( - tensor_descrs: Sequence[v0_4.InputTensorDescr], +def _prepare_procs( + tensor_descrs: Union[ + Sequence[v0_4.InputTensorDescr], + Sequence[v0_5.InputTensorDescr], + Sequence[v0_4.OutputTensorDescr], + Sequence[v0_5.OutputTensorDescr], + ], ) -> List[Processing]: procs: List[Processing] = [] for t_descr in tensor_descrs: - member_id = TensorId(str(t_descr.name)) - procs.append( - EnsureDtype(input=member_id, output=member_id, dtype=t_descr.data_type) - ) - for proc_d in t_descr.preprocessing: - procs.append(preproc_v4_to_processing(t_descr, proc_d)) - return procs - - -def _prepare_v4_postprocs( - tensor_descrs: Sequence[v0_4.OutputTensorDescr], -) -> List[Processing]: - procs: List[Processing] = [] - for t_descr in tensor_descrs: - member_id = TensorId(str(t_descr.name)) - procs.append( - EnsureDtype(input=member_id, output=member_id, dtype=t_descr.data_type) - ) - for proc_d in t_descr.postprocessing: - procs.append(postproc_v4_to_processing(t_descr, proc_d)) - return procs - + if isinstance(t_descr, (v0_4.InputTensorDescr, v0_4.OutputTensorDescr)): + member_id = get_member_id(t_descr) + procs.append( + EnsureDtype(input=member_id, output=member_id, dtype=t_descr.data_type) + ) + + if isinstance(t_descr, (v0_4.InputTensorDescr, v0_5.InputTensorDescr)): + for proc_d in t_descr.preprocessing: + procs.append(get_proc(proc_d, t_descr)) + elif isinstance(t_descr, (v0_4.OutputTensorDescr, v0_5.OutputTensorDescr)): + for proc_d in t_descr.postprocessing: + procs.append(get_proc(proc_d, t_descr)) + else: + assert_never(t_descr) + + if isinstance( + t_descr, + (v0_4.InputTensorDescr, (v0_4.InputTensorDescr, v0_4.OutputTensorDescr)), + ) and not isinstance(procs[-1], EnsureDtype): + member_id = get_member_id(t_descr) + procs.append( + EnsureDtype(input=member_id, output=member_id, dtype=t_descr.data_type) + ) -def _prepare_v5_preprocs( - tensor_descrs: Sequence[v0_5.InputTensorDescr], -) -> List[Processing]: - procs: List[Processing] = [] - for t_descr in tensor_descrs: - for proc_d in t_descr.preprocessing: - procs.append(preproc_v5_to_processing(t_descr, proc_d)) - return procs - - -def _prepare_v5_postprocs( - tensor_descrs: Sequence[v0_5.OutputTensorDescr], -) -> List[Processing]: - procs: List[Processing] = [] - for t_descr in tensor_descrs: - for proc_d in t_descr.postprocessing: - procs.append(postproc_v5_to_processing(t_descr, proc_d)) return procs def _prepare_setup_pre_and_postprocessing(model: AnyModelDescr) -> _SetupProcessing: if isinstance(model, v0_4.ModelDescr): - pre = _prepare_v4_preprocs(model.inputs) - post = _prepare_v4_postprocs(model.outputs) + pre = _prepare_procs(model.inputs) + post = _prepare_procs(model.outputs) elif isinstance(model, v0_5.ModelDescr): - pre = _prepare_v5_preprocs(model.inputs) - post = _prepare_v5_postprocs(model.outputs) + pre = _prepare_procs(model.inputs) + post = _prepare_procs(model.outputs) else: assert_never(model) From 67fa607f03113f44a05d1f019c531af8384a8af8 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 25 Feb 2025 12:51:32 +0100 Subject: [PATCH 098/187] use overload for 'latest' --- bioimageio/core/_resource_tests.py | 64 ++++++++++++++++++++++++------ bioimageio/core/cli.py | 6 +-- 2 files changed, 55 insertions(+), 15 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 3efcfc7d..c016f5be 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -19,6 +19,7 @@ Set, Tuple, Union, + overload, ) from loguru import logger @@ -27,6 +28,7 @@ from bioimageio.spec import ( BioimageioCondaEnv, InvalidDescr, + LatestResourceDescr, ResourceDescr, ValidationContext, build_description, @@ -35,11 +37,13 @@ load_description, save_bioimageio_package, ) +from bioimageio.spec._description_impl import DISCOVER from bioimageio.spec._internal.common_nodes import ResourceDescrBase from bioimageio.spec._internal.io import is_yaml_value from bioimageio.spec._internal.io_utils import read_yaml, write_yaml from bioimageio.spec._internal.types import ( AbsoluteTolerance, + FormatVersionPlaceholder, MismatchedElementsPerMillion, RelativeTolerance, ) @@ -182,7 +186,7 @@ def default_run_command(args: Sequence[str]): def test_description( source: Union[ResourceDescr, PermissiveFileSource, BioimageioYamlContent], *, - format_version: Union[Literal["discover", "latest"], str] = "discover", + format_version: Union[FormatVersionPlaceholder, str] = "discover", weight_format: Optional[SupportedWeightsFormat] = None, devices: Optional[Sequence[str]] = None, determinism: Literal["seed_only", "full"] = "seed_only", @@ -371,10 +375,38 @@ def _test_in_env( return ValidationSummary.model_validate_json(summary_path.read_bytes()) +@overload def load_description_and_test( source: Union[ResourceDescr, PermissiveFileSource, BioimageioYamlContent], *, - format_version: Union[Literal["discover", "latest"], str] = "discover", + format_version: Literal["latest"], + weight_format: Optional[SupportedWeightsFormat] = None, + devices: Optional[Sequence[str]] = None, + determinism: Literal["seed_only", "full"] = "seed_only", + expected_type: Optional[str] = None, + sha256: Optional[Sha256] = None, + **deprecated: Unpack[DeprecatedKwargs], +) -> Union[LatestResourceDescr, InvalidDescr]: ... + + +@overload +def load_description_and_test( + source: Union[ResourceDescr, PermissiveFileSource, BioimageioYamlContent], + *, + format_version: Union[FormatVersionPlaceholder, str] = DISCOVER, + weight_format: Optional[SupportedWeightsFormat] = None, + devices: Optional[Sequence[str]] = None, + determinism: Literal["seed_only", "full"] = "seed_only", + expected_type: Optional[str] = None, + sha256: Optional[Sha256] = None, + **deprecated: Unpack[DeprecatedKwargs], +) -> Union[ResourceDescr, InvalidDescr]: ... + + +def load_description_and_test( + source: Union[ResourceDescr, PermissiveFileSource, BioimageioYamlContent], + *, + format_version: Union[FormatVersionPlaceholder, str] = DISCOVER, weight_format: Optional[SupportedWeightsFormat] = None, devices: Optional[Sequence[str]] = None, determinism: Literal["seed_only", "full"] = "seed_only", @@ -383,17 +415,25 @@ def load_description_and_test( **deprecated: Unpack[DeprecatedKwargs], ) -> Union[ResourceDescr, InvalidDescr]: """Test RDF dynamically, e.g. model inference of test inputs""" - if isinstance(source, ResourceDescrBase) and ( - (format_version != "discover" and source.format_version != format_version) - or (c := source.validation_summary.details[0].context) is None - or not c["perform_io_checks"] - ): - logger.debug( - "deserializing source to ensure we validate and test using format {} and perform io checks", - format_version, - ) + if isinstance(source, ResourceDescrBase): root = source.root - source = dump_description(source) + if ( + ( + format_version + not in ( + DISCOVER, + source.format_version, + ".".join(source.format_version.split(".")[:2]), + ) + ) + or (c := source.validation_summary.details[0].context) is None + or not c["perform_io_checks"] + ): + logger.debug( + "deserializing source to ensure we validate and test using format {} and perform io checks", + format_version, + ) + source = dump_description(source) else: root = Path() diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 024811b8..5fe62632 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -51,10 +51,10 @@ InvalidDescr, ResourceDescr, load_description, + save_bioimageio_yaml_only, settings, ) from bioimageio.spec._internal.io_basics import ZipPath -from bioimageio.spec._internal.io_utils import write_yaml from bioimageio.spec._internal.types import NotEmpty from bioimageio.spec.dataset import DatasetDescr from bioimageio.spec.model import ModelDescr, v0_4, v0_5 @@ -264,9 +264,9 @@ class UpdateFormatCmd(CmdBase, WithSource): """ def run(self): - updated = update_format(self.descr, output_path=self.output) + updated = update_format(self.source, output=self.output) updated_stream = StringIO() - write_yaml(updated, updated_stream) + save_bioimageio_yaml_only(updated, updated_stream) updated_md = f"```yaml\n{updated_stream.getvalue()}\n```" rich_markdown = rich.markdown.Markdown(updated_md) From 667eeac477338f934cf08eda3fb217cd832adee4 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 25 Feb 2025 13:08:04 +0100 Subject: [PATCH 099/187] ensure 0.4 models get float32 inputa --- bioimageio/core/proc_setup.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/proc_setup.py b/bioimageio/core/proc_setup.py index a4113d21..1ada58b2 100644 --- a/bioimageio/core/proc_setup.py +++ b/bioimageio/core/proc_setup.py @@ -198,10 +198,17 @@ def _prepare_procs( if isinstance( t_descr, (v0_4.InputTensorDescr, (v0_4.InputTensorDescr, v0_4.OutputTensorDescr)), - ) and not isinstance(procs[-1], EnsureDtype): + ): + if len(procs) == 1: + # remove initial ensure_dtype if there are no other proccessing steps + assert isinstance(procs[0], EnsureDtype) + procs = [] + + # ensure 0.4 models get float32 input + # which has been the implicit assumption for 0.4 member_id = get_member_id(t_descr) procs.append( - EnsureDtype(input=member_id, output=member_id, dtype=t_descr.data_type) + EnsureDtype(input=member_id, output=member_id, dtype="float32") ) return procs From ddf86fc6d78a7ec5e4fec27c22e493050f6ac418 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 25 Feb 2025 14:43:47 +0100 Subject: [PATCH 100/187] log block shape --- bioimageio/core/_prediction_pipeline.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bioimageio/core/_prediction_pipeline.py b/bioimageio/core/_prediction_pipeline.py index 51afcf2a..0b7717aa 100644 --- a/bioimageio/core/_prediction_pipeline.py +++ b/bioimageio/core/_prediction_pipeline.py @@ -12,6 +12,7 @@ Union, ) +from loguru import logger from tqdm import tqdm from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 @@ -188,9 +189,15 @@ def predict_sample_with_fixed_blocking( ) input_blocks = list(input_blocks) predicted_blocks: List[SampleBlock] = [] + logger.info( + "split sample shape {} into {} blocks of {}.", + {k: dict(v) for k, v in sample.shape.items()}, + n_blocks, + {k: dict(v) for k, v in input_block_shape.items()}, + ) for b in tqdm( input_blocks, - desc=f"predict sample {sample.id or ''} with {self.model_description.id or self.model_description.name}", + desc=f"predict {sample.id or ''} with {self.model_description.id or self.model_description.name}", unit="block", unit_divisor=1, total=n_blocks, From 3298944fd163078555ceed32814794c15f47618c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 25 Feb 2025 14:53:46 +0100 Subject: [PATCH 101/187] add shape attribute --- bioimageio/core/tensor.py | 5 +++++ tests/test_tensor.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/bioimageio/core/tensor.py b/bioimageio/core/tensor.py index 5f76cd6e..cb3b3da9 100644 --- a/bioimageio/core/tensor.py +++ b/bioimageio/core/tensor.py @@ -196,6 +196,11 @@ def ndim(self): """Number of tensor dimensions.""" return self._data.ndim + @property + def shape(self): + """Tuple of tensor axes lengths""" + return self._data.shape + @property def shape_tuple(self): """Tuple of tensor axes lengths""" diff --git a/tests/test_tensor.py b/tests/test_tensor.py index e00efe04..c57980bd 100644 --- a/tests/test_tensor.py +++ b/tests/test_tensor.py @@ -60,3 +60,8 @@ def test_crop_and_pad(): def test_some_magic_ops(): tensor = Tensor.from_numpy(np.random.rand(256, 256), dims=None) assert tensor + 2 == 2 + tensor + + +def test_shape_attributes(): + tensor = Tensor.from_numpy(np.random.rand(1, 2, 25, 26), dims=None) + assert tensor.shape_tuple == tensor.shape From 70fb627fddb4678dc6dec4174237c91970d8913d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Feb 2025 13:57:58 +0100 Subject: [PATCH 102/187] add TODO exclude default values --- bioimageio/core/cli.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 5fe62632..116f3528 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -262,6 +262,9 @@ class UpdateFormatCmd(CmdBase, WithSource): Updated bioimageio.yaml is rendered to the terminal if the output is None. """ + # TODO: exclude default values (without braking discriminated unions) + # exclude_defaults: bool = Field(True, alias="exclude-defaults") + # """Exclude fields that have the default value.""" def run(self): updated = update_format(self.source, output=self.output) From 503c04b70892bb51058248e8e0f27b65f63529d2 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Feb 2025 23:33:19 +0100 Subject: [PATCH 103/187] add exclude_defaults arg --- bioimageio/core/cli.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 116f3528..c4102f33 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -262,14 +262,18 @@ class UpdateFormatCmd(CmdBase, WithSource): Updated bioimageio.yaml is rendered to the terminal if the output is None. """ - # TODO: exclude default values (without braking discriminated unions) - # exclude_defaults: bool = Field(True, alias="exclude-defaults") - # """Exclude fields that have the default value.""" + + exclude_defaults: bool = Field(True, alias="exclude-defaults") + """Exclude fields that have the default value (even if set explicitly).""" def run(self): - updated = update_format(self.source, output=self.output) + updated = update_format( + self.source, output=self.output, exclude_defaults=self.exclude_defaults + ) updated_stream = StringIO() - save_bioimageio_yaml_only(updated, updated_stream) + save_bioimageio_yaml_only( + updated, updated_stream, exclude_defaults=self.exclude_defaults + ) updated_md = f"```yaml\n{updated_stream.getvalue()}\n```" rich_markdown = rich.markdown.Markdown(updated_md) From ed6d15888c5adbec2917f9308c3689e38015d089 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Feb 2025 19:20:59 +0100 Subject: [PATCH 104/187] add update-hashes command --- bioimageio/core/cli.py | 89 +++++++++++++++++++++++++++---------- bioimageio/core/commands.py | 3 +- 2 files changed, 66 insertions(+), 26 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index c4102f33..703ff7d9 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -8,6 +8,7 @@ import shutil import subprocess import sys +from abc import ABC from argparse import RawTextHelpFormatter from difflib import SequenceMatcher from functools import cached_property @@ -42,7 +43,6 @@ SettingsConfigDict, YamlConfigSettingsSource, ) -from ruyaml import YAML from tqdm import tqdm from typing_extensions import assert_never @@ -53,8 +53,11 @@ load_description, save_bioimageio_yaml_only, settings, + update_format, + update_hashes, ) from bioimageio.spec._internal.io_basics import ZipPath +from bioimageio.spec._internal.io_utils import yaml from bioimageio.spec._internal.types import NotEmpty from bioimageio.spec.dataset import DatasetDescr from bioimageio.spec.model import ModelDescr, v0_4, v0_5 @@ -66,7 +69,6 @@ WeightFormatArgAny, package, test, - update_format, ) from .common import MemberId, SampleId, SupportedWeightsFormat from .digest_spec import get_member_ids, load_sample_for_model @@ -84,8 +86,6 @@ from .utils import VERSION from .weight_converters._add_weights import add_weights -yaml = YAML(typ="safe") - class CmdBase(BaseModel, use_attribute_docstrings=True, cli_implicit_flags=True): pass @@ -254,31 +254,68 @@ def _get_stat( return stat -class UpdateFormatCmd(CmdBase, WithSource): - """Update the metadata format""" +class UpdateCmdBase(CmdBase, WithSource, ABC): + output: Union[Literal["render", "stdout"], Path] = "render" + """Output updated bioimageio.yaml to the terminal or write to a file.""" - output: Optional[Path] = None - """Save updated bioimageio.yaml to this file. + exclude_unset: bool = Field(True, alias="exclude-unset") + """Exclude fields that have not explicitly be set.""" - Updated bioimageio.yaml is rendered to the terminal if the output is None. - """ - - exclude_defaults: bool = Field(True, alias="exclude-defaults") + exclude_defaults: bool = Field(False, alias="exclude-defaults") """Exclude fields that have the default value (even if set explicitly).""" + @cached_property + def updated(self) -> Union[ResourceDescr, InvalidDescr]: + raise NotImplementedError + def run(self): - updated = update_format( - self.source, output=self.output, exclude_defaults=self.exclude_defaults - ) - updated_stream = StringIO() + if self.output == "render": + out = StringIO() + elif self.output == "stdout": + out = sys.stdout + else: + out = self.output + save_bioimageio_yaml_only( - updated, updated_stream, exclude_defaults=self.exclude_defaults + self.updated, + out, + exclude_unset=self.exclude_unset, + exclude_defaults=self.exclude_defaults, + ) + + if self.output == "render": + assert isinstance(out, StringIO) + updated_md = f"```yaml\n{out.getvalue()}\n```" + + rich_markdown = rich.markdown.Markdown(updated_md) + console = rich.console.Console() + console.print(rich_markdown) + + +class UpdateFormatCmd(UpdateCmdBase): + """Update the metadata format to the latest format version.""" + + perform_io_checks: bool = Field( + settings.perform_io_checks, alias="perform-io-checks" + ) + """Wether or not to attempt validation that may require file download. + If `True` file hash values are added if not present.""" + + @cached_property + def updated(self): + return update_format( + self.source, + exclude_defaults=self.exclude_defaults, + perform_io_checks=self.perform_io_checks, ) - updated_md = f"```yaml\n{updated_stream.getvalue()}\n```" - rich_markdown = rich.markdown.Markdown(updated_md) - console = rich.console.Console() - console.print(rich_markdown) + +class UpdateHashesCmd(UpdateCmdBase): + """Create a bioimageio.yaml description with updated file hashes.""" + + @cached_property + def updated(self): + return update_hashes(self.source) class PredictCmd(CmdBase, WithSource): @@ -690,6 +727,9 @@ class Bioimageio( update_format: CliSubCommand[UpdateFormatCmd] = Field(alias="update-format") """Update the metadata format""" + update_hashes: CliSubCommand[UpdateHashesCmd] = Field(alias="update-hashes") + """Create a bioimageio.yaml description with updated file hashes.""" + add_weights: CliSubCommand[ConvertWeightsCmd] = Field(alias="add-weights") """Add additional weights to the model descriptions converted from available formats to improve deployability.""" @@ -732,12 +772,13 @@ def run(self): pformat({k: v for k, v in self.model_dump().items() if v is not None}), ) cmd = ( - self.validate_format - or self.test + self.add_weights or self.package or self.predict + or self.test or self.update_format - or self.add_weights + or self.update_hashes + or self.validate_format ) assert cmd is not None cmd.run() diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 813c0ad3..feb95434 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -1,4 +1,4 @@ -"""These functions implement the logic of the bioimageio command line interface +"""These functions are used in the bioimageio command line interface defined in `bioimageio.core.cli`.""" from pathlib import Path @@ -13,7 +13,6 @@ save_bioimageio_package, save_bioimageio_package_as_folder, ) -from bioimageio.spec import update_format as update_format from ._resource_tests import test_description From b6522580e726eee7fa3196490002e08d296ab528 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Feb 2025 14:15:30 +0100 Subject: [PATCH 105/187] update unet2d_diff_output_shape example and add cli tests --- tests/conftest.py | 2 +- tests/test_cli.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 9258c3b6..b8ea7d0f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -65,7 +65,7 @@ + "models/stardist_example_model/rdf_wrong_shape2_v0_4.yaml" ), "unet2d_diff_output_shape": ( - EXAMPLE_DESCRIPTIONS + "models/unet2d_diff_output_shape/v0_4.bioimageio.yaml" + EXAMPLE_DESCRIPTIONS + "models/unet2d_diff_output_shape/bioimageio.yaml" ), "unet2d_expand_output_shape": ( EXAMPLE_DESCRIPTIONS diff --git a/tests/test_cli.py b/tests/test_cli.py index 4d98de70..0102f698 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -38,6 +38,8 @@ def run_subprocess( ["predict", "--example", "unet2d_nuclei_broad_model"], ["update-format", "unet2d_path_old_version"], ["increase-weight-formats", "unet2d_nuclei_broad_model"], + ["update-hashes", "unet2d_path_old_version"], + ["update-hashes", "unet2d_path_old_version", "--output=stdout"], ], ) def test_cli( From 4affce29cf0c55fdd746fc7b7256614a083f5ba7 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 3 Mar 2025 10:43:04 +0100 Subject: [PATCH 106/187] log output path --- bioimageio/core/cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 703ff7d9..8fc2a60d 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -290,6 +290,8 @@ def run(self): rich_markdown = rich.markdown.Markdown(updated_md) console = rich.console.Console() console.print(rich_markdown) + elif self.output != "stdout": + logger.info(f"written updated description to {self.output}") class UpdateFormatCmd(UpdateCmdBase): From e97686cbfbe1e6459a86493ffe75a03b3a45dbe7 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 3 Mar 2025 10:43:53 +0100 Subject: [PATCH 107/187] unify cmd naming pattern --- bioimageio/core/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 8fc2a60d..057dfd68 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -662,7 +662,7 @@ def input_dataset(stat: Stat): save_sample(sp_out, sample_out) -class ConvertWeightsCmd(CmdBase, WithSource): +class AddWeightsCmd(CmdBase, WithSource): output: CliPositionalArg[Path] """The path to write the updated model package to.""" @@ -732,7 +732,7 @@ class Bioimageio( update_hashes: CliSubCommand[UpdateHashesCmd] = Field(alias="update-hashes") """Create a bioimageio.yaml description with updated file hashes.""" - add_weights: CliSubCommand[ConvertWeightsCmd] = Field(alias="add-weights") + add_weights: CliSubCommand[AddWeightsCmd] = Field(alias="add-weights") """Add additional weights to the model descriptions converted from available formats to improve deployability.""" From a2fefeb49528d6e5cc15c529dbee059e4882f6dc Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 3 Mar 2025 10:44:26 +0100 Subject: [PATCH 108/187] limit converters to latest format --- .../weight_converters/keras_to_tensorflow.py | 42 ++++--- .../core/weight_converters/pytorch_to_onnx.py | 24 ++-- .../pytorch_to_torchscript.py | 110 ++---------------- .../weight_converters/torchscript_to_onnx.py | 16 +-- 4 files changed, 53 insertions(+), 139 deletions(-) diff --git a/bioimageio/core/weight_converters/keras_to_tensorflow.py b/bioimageio/core/weight_converters/keras_to_tensorflow.py index 9670d2c2..696b039a 100644 --- a/bioimageio/core/weight_converters/keras_to_tensorflow.py +++ b/bioimageio/core/weight_converters/keras_to_tensorflow.py @@ -6,11 +6,18 @@ import tensorflow -from bioimageio.core.io import ensure_unzipped from bioimageio.spec._internal.io import download from bioimageio.spec._internal.version_type import Version from bioimageio.spec.common import ZipPath -from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.model.v0_5 import ( + InputTensorDescr, + ModelDescr, + OutputTensorDescr, + TensorflowSavedModelBundleWeightsDescr, +) + +from .. import __version__ +from ..io import ensure_unzipped try: # try to build the tf model with the keras import from tensorflow @@ -21,8 +28,8 @@ def convert( - model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], *, output_path: Path -) -> v0_5.TensorflowSavedModelBundleWeightsDescr: + model_descr: ModelDescr, output_path: Path +) -> TensorflowSavedModelBundleWeightsDescr: """ Convert model weights from the 'keras_hdf5' format to the 'tensorflow_saved_model_bundle' format. @@ -34,14 +41,11 @@ def convert( https://github.com/deepimagej/pydeepimagej/blob/5aaf0e71f9b04df591d5ca596f0af633a7e024f5/pydeepimagej/yaml/create_config.py Args: - model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + model_descr: The bioimage.io model description containing the model's metadata and weights. - output_path (Path): + output_path: The directory where the TensorFlow SavedModel bundle will be saved. This path must not already exist and, if necessary, will be zipped into a .zip file. - use_tracing (bool): - Placeholder argument; currently not used in this method but required to match the abstract method signature. - Raises: ValueError: - If the specified `output_path` already exists. @@ -52,8 +56,7 @@ def convert( If the model has multiple inputs or outputs and TensorFlow 1.x is being used. Returns: - v0_5.TensorflowSavedModelBundleWeightsDescr: - A descriptor object containing information about the converted TensorFlow SavedModel bundle. + A descriptor object containing information about the converted TensorFlow SavedModel bundle. """ tf_major_ver = int(tensorflow.__version__.split(".")[0]) @@ -87,12 +90,12 @@ def convert( input_name = str( d.id - if isinstance((d := model_descr.inputs[0]), v0_5.InputTensorDescr) + if isinstance((d := model_descr.inputs[0]), InputTensorDescr) else d.name ) output_name = str( d.id - if isinstance((d := model_descr.outputs[0]), v0_5.OutputTensorDescr) + if isinstance((d := model_descr.outputs[0]), OutputTensorDescr) else d.name ) return _convert_tf1( @@ -108,7 +111,7 @@ def convert( def _convert_tf2( keras_weight_path: Union[Path, ZipPath], output_path: Path, zip_weights: bool -) -> v0_5.TensorflowSavedModelBundleWeightsDescr: +) -> TensorflowSavedModelBundleWeightsDescr: model = keras.models.load_model(keras_weight_path) # type: ignore keras.models.save_model(model, output_path) # type: ignore @@ -116,10 +119,11 @@ def _convert_tf2( output_path = _zip_model_bundle(output_path) print("TensorFlow model exported to", output_path) - return v0_5.TensorflowSavedModelBundleWeightsDescr( + return TensorflowSavedModelBundleWeightsDescr( source=output_path, parent="keras_hdf5", tensorflow_version=Version(tensorflow.__version__), + comment=f"Converted with bioimageio.core {__version__}.", ) @@ -131,7 +135,7 @@ def _convert_tf1( input_name: str, output_name: str, zip_weights: bool, -) -> v0_5.TensorflowSavedModelBundleWeightsDescr: +) -> TensorflowSavedModelBundleWeightsDescr: @no_type_check def build_tf_model(): @@ -144,7 +148,9 @@ def build_tf_model(): ) signature_def_map = { - tensorflow.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature + tensorflow.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: ( + signature + ) } builder.add_meta_graph_and_variables( @@ -160,7 +166,7 @@ def build_tf_model(): output_path = _zip_model_bundle(output_path) print("TensorFlow model exported to", output_path) - return v0_5.TensorflowSavedModelBundleWeightsDescr( + return TensorflowSavedModelBundleWeightsDescr( source=output_path, parent="keras_hdf5", tensorflow_version=Version(tensorflow.__version__), diff --git a/bioimageio/core/weight_converters/pytorch_to_onnx.py b/bioimageio/core/weight_converters/pytorch_to_onnx.py index 9fd4615e..201567eb 100644 --- a/bioimageio/core/weight_converters/pytorch_to_onnx.py +++ b/bioimageio/core/weight_converters/pytorch_to_onnx.py @@ -1,9 +1,8 @@ from pathlib import Path -from typing import Union import torch.jit -from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.model.v0_5 import ModelDescr, OnnxWeightsDescr from .. import __version__ from ..backends.pytorch_backend import load_torch_model @@ -12,23 +11,23 @@ def convert( - model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], - *, + model_descr: ModelDescr, output_path: Path, + *, verbose: bool = False, opset_version: int = 20, -) -> v0_5.OnnxWeightsDescr: +) -> OnnxWeightsDescr: """ Convert model weights from the Torchscript state_dict format to the ONNX format. Args: - model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + model_descr: The model description object that contains the model and its weights. - output_path (Path): + output_path: The file path where the ONNX model will be saved. - verbose (bool, optional): + verbose: If True, will print out detailed information during the ONNX export process. Defaults to False. - opset_version (int, optional): + opset_version: The ONNX opset version to use for the export. Defaults to 15. Raises: @@ -36,8 +35,7 @@ def convert( If the provided model does not have weights in the PyTorch state_dict format. Returns: - v0_5.OnnxWeightsDescr: - A descriptor object that contains information about the exported ONNX weights. + A descriptor object that contains information about the exported ONNX weights. """ state_dict_weights_descr = model_descr.weights.pytorch_state_dict @@ -69,9 +67,9 @@ def convert( opset_version=opset_version, ) - return v0_5.OnnxWeightsDescr( + return OnnxWeightsDescr( source=output_path, parent="pytorch_state_dict", opset_version=opset_version, - comment=(f"Converted with bioimageio.core {__version__}."), + comment=f"Converted with bioimageio.core {__version__}.", ) diff --git a/bioimageio/core/weight_converters/pytorch_to_torchscript.py b/bioimageio/core/weight_converters/pytorch_to_torchscript.py index 8a362af9..3d0f281c 100644 --- a/bioimageio/core/weight_converters/pytorch_to_torchscript.py +++ b/bioimageio/core/weight_converters/pytorch_to_torchscript.py @@ -1,34 +1,31 @@ from pathlib import Path -from typing import Any, List, Sequence, Tuple, Union +from typing import Any, Tuple, Union -import numpy as np import torch -from numpy.testing import assert_array_almost_equal from torch.jit import ScriptModule -from typing_extensions import assert_never from bioimageio.spec._internal.version_type import Version -from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.model.v0_5 import ModelDescr, TorchscriptWeightsDescr from .. import __version__ from ..backends.pytorch_backend import load_torch_model def convert( - model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], - *, + model_descr: ModelDescr, output_path: Path, + *, use_tracing: bool = True, -) -> v0_5.TorchscriptWeightsDescr: +) -> TorchscriptWeightsDescr: """ Convert model weights from the PyTorch `state_dict` format to TorchScript. Args: - model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + model_descr: The model description object that contains the model and its weights in the PyTorch `state_dict` format. - output_path (Path): + output_path: The file path where the TorchScript model will be saved. - use_tracing (bool): + use_tracing: Whether to use tracing or scripting to export the TorchScript format. - `True`: Use tracing, which is recommended for models with straightforward control flow. - `False`: Use scripting, which is better for models with dynamic control flow (e.g., loops, conditionals). @@ -38,8 +35,7 @@ def convert( If the provided model does not have weights in the PyTorch `state_dict` format. Returns: - v0_5.TorchscriptWeightsDescr: - A descriptor object that contains information about the exported TorchScript weights. + A descriptor object that contains information about the exported TorchScript weights. """ state_dict_weights_descr = model_descr.weights.pytorch_state_dict if state_dict_weights_descr is None: @@ -60,16 +56,10 @@ def convert( else torch.jit.script(model) ) assert not isinstance(scripted_model, tuple), scripted_model - _check_predictions( # TODO: remove - model=model, - scripted_model=scripted_model, - model_spec=model_descr, - input_data=input_data, - ) scripted_model.save(output_path) - return v0_5.TorchscriptWeightsDescr( + return TorchscriptWeightsDescr( source=output_path, pytorch_version=Version(torch.__version__), parent="pytorch_state_dict", @@ -78,83 +68,3 @@ def convert( + f" with use_tracing={use_tracing}." ), ) - - -def _check_predictions( - model: Any, - scripted_model: Any, - model_spec: v0_4.ModelDescr | v0_5.ModelDescr, - input_data: Sequence[torch.Tensor], -): - def _check(input_: Sequence[torch.Tensor]) -> None: - expected_tensors = model(*input_) - if isinstance(expected_tensors, torch.Tensor): - expected_tensors = [expected_tensors] - expected_outputs: List[np.ndarray[Any, Any]] = [ - out.numpy() for out in expected_tensors - ] - - output_tensors = scripted_model(*input_) - if isinstance(output_tensors, torch.Tensor): - output_tensors = [output_tensors] - outputs: List[np.ndarray[Any, Any]] = [out.numpy() for out in output_tensors] - - try: - for exp, out in zip(expected_outputs, outputs): - assert_array_almost_equal(exp, out, decimal=4) - except AssertionError as e: - raise ValueError( - f"Results before and after weights conversion do not agree:\n {str(e)}" - ) - - _check(input_data) - - if len(model_spec.inputs) > 1: - return # FIXME: why don't we check multiple inputs? - - input_descr = model_spec.inputs[0] - if isinstance(input_descr, v0_4.InputTensorDescr): - if not isinstance(input_descr.shape, v0_4.ParameterizedInputShape): - return - min_shape = input_descr.shape.min - step = input_descr.shape.step - else: - min_shape: List[int] = [] - step: List[int] = [] - for axis in input_descr.axes: - if isinstance(axis.size, v0_5.ParameterizedSize): - min_shape.append(axis.size.min) - step.append(axis.size.step) - elif isinstance(axis.size, int): - min_shape.append(axis.size) - step.append(0) - elif axis.size is None: - raise NotImplementedError( - f"Can't verify inputs that don't specify their shape fully: {axis}" - ) - elif isinstance(axis.size, v0_5.SizeReference): - raise NotImplementedError(f"Can't handle axes like '{axis}' yet") - else: - assert_never(axis.size) - - input_tensor = input_data[0] - max_shape = input_tensor.shape - max_steps = 4 - - # check that input and output agree for decreasing input sizes - for step_factor in range(1, max_steps + 1): - slice_ = tuple( - ( - slice(None) - if step_dim == 0 - else slice(0, max_dim - step_factor * step_dim, 1) - ) - for max_dim, step_dim in zip(max_shape, step) - ) - sliced_input = input_tensor[slice_] - if any( - sliced_dim < min_dim - for sliced_dim, min_dim in zip(sliced_input.shape, min_shape) - ): - return - _check([sliced_input]) diff --git a/bioimageio/core/weight_converters/torchscript_to_onnx.py b/bioimageio/core/weight_converters/torchscript_to_onnx.py index f7ca10f3..8b24034a 100644 --- a/bioimageio/core/weight_converters/torchscript_to_onnx.py +++ b/bioimageio/core/weight_converters/torchscript_to_onnx.py @@ -1,21 +1,21 @@ from pathlib import Path -from typing import Union import torch.jit -from bioimageio.core.digest_spec import get_member_id, get_test_inputs -from bioimageio.core.proc_setup import get_pre_and_postprocessing -from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.model.v0_5 import ModelDescr, OnnxWeightsDescr from bioimageio.spec.utils import download +from ..digest_spec import get_member_id, get_test_inputs +from ..proc_setup import get_pre_and_postprocessing + def convert( - model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], - *, + model_descr: ModelDescr, output_path: Path, + *, verbose: bool = False, opset_version: int = 15, -) -> v0_5.OnnxWeightsDescr: +) -> OnnxWeightsDescr: """ Convert model weights from the PyTorch state_dict format to the ONNX format. @@ -71,6 +71,6 @@ def convert( opset_version=opset_version, ) - return v0_5.OnnxWeightsDescr( + return OnnxWeightsDescr( source=output_path, parent="pytorch_state_dict", opset_version=opset_version ) From 5185936143be2d9701d4e2a3fa84c432b6b0a0a5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 3 Mar 2025 10:58:10 +0100 Subject: [PATCH 109/187] update tests --- setup.py | 2 +- tests/conftest.py | 2 +- tests/test_cli.py | 18 ++++++++++++------ tests/test_weight_converters.py | 5 ++++- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/setup.py b/setup.py index 651e3796..2e9dbf02 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,7 @@ "packaging>=17.0", "pre-commit", "pdoc", - "pyright==1.1.394", + "pyright==1.1.396", "pytest-cov", "pytest", ] diff --git a/tests/conftest.py b/tests/conftest.py index b8ea7d0f..32880b05 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -81,7 +81,7 @@ EXAMPLE_DESCRIPTIONS + "models/unet2d_keras_tf/v0_4.bioimageio.yaml" ), "unet2d_multi_tensor": ( - EXAMPLE_DESCRIPTIONS + "models/unet2d_multi_tensor/v0_4.bioimageio.yaml" + EXAMPLE_DESCRIPTIONS + "models/unet2d_multi_tensor/bioimageio.yaml" ), "unet2d_nuclei_broad_model_old": ( EXAMPLE_DESCRIPTIONS + "models/unet2d_nuclei_broad/v0_4_9.bioimageio.yaml" diff --git a/tests/test_cli.py b/tests/test_cli.py index 0102f698..203677ec 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,4 +1,5 @@ import subprocess +from pathlib import Path from typing import Any, List, Sequence import pytest @@ -36,14 +37,17 @@ def run_subprocess( ], ["test", "unet2d_nuclei_broad_model"], ["predict", "--example", "unet2d_nuclei_broad_model"], - ["update-format", "unet2d_path_old_version"], - ["increase-weight-formats", "unet2d_nuclei_broad_model"], - ["update-hashes", "unet2d_path_old_version"], - ["update-hashes", "unet2d_path_old_version", "--output=stdout"], + ["update-format", "unet2d_nuclei_broad_model_old"], + ["add-weights", "unet2d_nuclei_broad_model", "tmp_path"], + ["update-hashes", "unet2d_nuclei_broad_model_old"], + ["update-hashes", "unet2d_nuclei_broad_model_old", "--output=stdout"], ], ) def test_cli( - args: List[str], unet2d_nuclei_broad_model: str, unet2d_nuclei_broad_model_old: str + args: List[str], + unet2d_nuclei_broad_model: str, + unet2d_nuclei_broad_model_old: str, + tmp_path: Path, ): resolved_args = [ ( @@ -52,7 +56,9 @@ def test_cli( else ( unet2d_nuclei_broad_model_old if arg == "unet2d_nuclei_broad_model_old" - else arg + else ( + arg.replace("tmp_path", str(tmp_path)) if "tmp_path" in arg else arg + ) ) ) for arg in args diff --git a/tests/test_weight_converters.py b/tests/test_weight_converters.py index 67d662fc..c2eb30fc 100644 --- a/tests/test_weight_converters.py +++ b/tests/test_weight_converters.py @@ -13,7 +13,10 @@ def test_pytorch_to_torchscript(any_torch_model, tmp_path): from bioimageio.core.weight_converters.pytorch_to_torchscript import convert - model_descr = load_description(any_torch_model) + model_descr = load_description(any_torch_model, perform_io_checks=False) + if model_descr.implemented_format_version_tuple[:2] == (0, 4): + pytest.skip("cannot convert to old 0.4 format") + out_path = tmp_path / "weights.pt" ret_val = convert(model_descr, out_path) assert out_path.exists() From c310947d43902e308da853c4d00229535fcfc858 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 3 Mar 2025 15:23:43 +0100 Subject: [PATCH 110/187] alaways zip converted tf weights --- .../weight_converters/keras_to_tensorflow.py | 25 +++++++------------ tests/test_weight_converters.py | 17 +------------ 2 files changed, 10 insertions(+), 32 deletions(-) diff --git a/bioimageio/core/weight_converters/keras_to_tensorflow.py b/bioimageio/core/weight_converters/keras_to_tensorflow.py index 696b039a..261e335c 100644 --- a/bioimageio/core/weight_converters/keras_to_tensorflow.py +++ b/bioimageio/core/weight_converters/keras_to_tensorflow.py @@ -44,8 +44,8 @@ def convert( model_descr: The bioimage.io model description containing the model's metadata and weights. output_path: - The directory where the TensorFlow SavedModel bundle will be saved. - This path must not already exist and, if necessary, will be zipped into a .zip file. + Path with .zip suffix (.zip is appended otherwise) to which a zip archive + with the TensorFlow SavedModel bundle will be saved. Raises: ValueError: - If the specified `output_path` already exists. @@ -60,11 +60,8 @@ def convert( """ tf_major_ver = int(tensorflow.__version__.split(".")[0]) - if output_path.suffix == ".zip": + if output_path.suffix != ".zip": output_path = output_path.with_suffix("") - zip_weights = True - else: - zip_weights = False if output_path.exists(): raise ValueError(f"The ouptut directory at {output_path} must not exist.") @@ -103,20 +100,18 @@ def convert( output_path, input_name, output_name, - zip_weights, ) else: - return _convert_tf2(weight_path, output_path, zip_weights) + return _convert_tf2(weight_path, output_path) def _convert_tf2( - keras_weight_path: Union[Path, ZipPath], output_path: Path, zip_weights: bool + keras_weight_path: Union[Path, ZipPath], output_path: Path ) -> TensorflowSavedModelBundleWeightsDescr: model = keras.models.load_model(keras_weight_path) # type: ignore - keras.models.save_model(model, output_path) # type: ignore + model.export(output_path) # type: ignore - if zip_weights: - output_path = _zip_model_bundle(output_path) + output_path = _zip_model_bundle(output_path) print("TensorFlow model exported to", output_path) return TensorflowSavedModelBundleWeightsDescr( @@ -134,13 +129,11 @@ def _convert_tf1( output_path: Path, input_name: str, output_name: str, - zip_weights: bool, ) -> TensorflowSavedModelBundleWeightsDescr: @no_type_check def build_tf_model(): keras_model = keras.models.load_model(keras_weight_path) - assert tensorflow is not None builder = tensorflow.saved_model.builder.SavedModelBuilder(output_path) signature = tensorflow.saved_model.signature_def_utils.predict_signature_def( inputs={input_name: keras_model.input}, @@ -162,14 +155,14 @@ def build_tf_model(): build_tf_model() - if zip_weights: - output_path = _zip_model_bundle(output_path) + output_path = _zip_model_bundle(output_path) print("TensorFlow model exported to", output_path) return TensorflowSavedModelBundleWeightsDescr( source=output_path, parent="keras_hdf5", tensorflow_version=Version(tensorflow.__version__), + comment=f"Converted with bioimageio.core {__version__}.", ) diff --git a/tests/test_weight_converters.py b/tests/test_weight_converters.py index c2eb30fc..49208103 100644 --- a/tests/test_weight_converters.py +++ b/tests/test_weight_converters.py @@ -48,23 +48,8 @@ def test_pytorch_to_onnx(convert_to_onnx, tmp_path): assert summary.status == "passed", summary.display() -def test_keras_to_tensorflow(any_keras_model: Path, tmp_path: Path): - from bioimageio.core.weight_converters.keras_to_tensorflow import convert - - model_descr = load_description(any_keras_model) - out_path = tmp_path / "weights" - ret_val = convert(model_descr, output_path=out_path) - assert out_path.exists() - assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) - assert ret_val.source == out_path - - model_descr.weights.keras = ret_val - summary = test_model(model_descr, weight_format="keras_hdf5") - assert summary.status == "passed", summary.display() - - @pytest.mark.skip() -def test_keras_to_tensorflow_zipped(any_keras_model: Path, tmp_path: Path): +def test_keras_to_tensorflow(any_keras_model: Path, tmp_path: Path): from bioimageio.core.weight_converters.keras_to_tensorflow import convert out_path = tmp_path / "weights.zip" From 5563e920d40578f7efc21277aa9f8cc5e1e72ac5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 3 Mar 2025 15:27:20 +0100 Subject: [PATCH 111/187] add epsilon --- bioimageio/core/_resource_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index c016f5be..d4c07900 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -558,7 +558,7 @@ def _test_model_inference( ) mismatched_elements = mismatched.sum().item() if mismatched_elements / expected.size > mismatched_tol / 1e6: - r_max_idx = (r_diff := abs_diff / abs(expected)).argmax() + r_max_idx = (r_diff := abs_diff / (abs(expected) + 1e-6)).argmax() r_max = r_diff[r_max_idx].item() r_actual = actual[r_max_idx].item() r_expected = expected[r_max_idx].item() @@ -570,7 +570,7 @@ def _test_model_inference( f"Output '{m}' disagrees with {mismatched_elements} of" + f" {expected.size} expected values." + f"\n Max relative difference: {r_max:.2e}" - + rf" (= \|{r_actual:.2e} - {r_expected:.2e}\|/\|{r_expected:.2e}\|)" + + rf" (= \|{r_actual:.2e} - {r_expected:.2e}\|/\|{r_expected:.2e} + 1e-6|)" + f" at {r_max_idx}" + f"\n Max absolute difference: {a_max:.2e}" + rf" (= \|{a_actual:.2e} - {a_expected:.2e}\|) at {a_max_idx}" From fe15700e5860505ee1a488ec51f030e3483f126e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 3 Mar 2025 16:04:16 +0100 Subject: [PATCH 112/187] move test_model imports --- tests/test_weight_converters.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_weight_converters.py b/tests/test_weight_converters.py index 49208103..1bf65782 100644 --- a/tests/test_weight_converters.py +++ b/tests/test_weight_converters.py @@ -5,12 +5,12 @@ import pytest -from bioimageio.core import test_model from bioimageio.spec import load_description from bioimageio.spec.model import v0_5 def test_pytorch_to_torchscript(any_torch_model, tmp_path): + from bioimageio.core import test_model from bioimageio.core.weight_converters.pytorch_to_torchscript import convert model_descr = load_description(any_torch_model, perform_io_checks=False) @@ -28,6 +28,7 @@ def test_pytorch_to_torchscript(any_torch_model, tmp_path): def test_pytorch_to_onnx(convert_to_onnx, tmp_path): + from bioimageio.core import test_model from bioimageio.core.weight_converters.pytorch_to_onnx import convert model_descr = load_description(convert_to_onnx, format_version="latest") @@ -50,6 +51,7 @@ def test_pytorch_to_onnx(convert_to_onnx, tmp_path): @pytest.mark.skip() def test_keras_to_tensorflow(any_keras_model: Path, tmp_path: Path): + from bioimageio.core import test_model from bioimageio.core.weight_converters.keras_to_tensorflow import convert out_path = tmp_path / "weights.zip" From 165e9e8f09524d6578c9a94b95a06f1bdcb74910 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 4 Mar 2025 14:47:17 +0100 Subject: [PATCH 113/187] add segment-anything for testing --- dev/env.yaml | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/dev/env.yaml b/dev/env.yaml index db12748a..6d97e402 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -34,6 +34,7 @@ dependencies: - rich - ruff - ruyaml + - segment-anything # for model testing - tensorflow>=2,<3 - torchvision - tqdm diff --git a/setup.py b/setup.py index 2e9dbf02..4ae3ccee 100644 --- a/setup.py +++ b/setup.py @@ -65,6 +65,7 @@ "pyright==1.1.396", "pytest-cov", "pytest", + "segment-anything", # for model testing ] ), }, From a436bebedbb94dda9de7a3e58e7466aaf7e4f2b0 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 4 Mar 2025 14:51:04 +0100 Subject: [PATCH 114/187] ignore bioimageio_unzipped_tf_weights/ --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d8be60be..c45ec89d 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ __pycache__/ *.egg-info/ *.pyc **/tmp +bioimageio_unzipped_tf_weights/ build/ cache coverage.xml From 3865b3905ce790f9eeb738290bbf0a7f0f94aeae Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 4 Mar 2025 14:52:27 +0100 Subject: [PATCH 115/187] use remembered file_name for prettier context --- bioimageio/core/_resource_tests.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index d4c07900..35372548 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -417,6 +417,7 @@ def load_description_and_test( """Test RDF dynamically, e.g. model inference of test inputs""" if isinstance(source, ResourceDescrBase): root = source.root + file_name = source.file_name if ( ( format_version @@ -436,12 +437,15 @@ def load_description_and_test( source = dump_description(source) else: root = Path() + file_name = None if isinstance(source, ResourceDescrBase): rd = source elif isinstance(source, dict): # check context for a given root; default to root of source - context = validation_context_var.get(ValidationContext(root=root)).replace( + context = validation_context_var.get( + ValidationContext(root=root, file_name=file_name) + ).replace( perform_io_checks=True # make sure we perform io checks though ) @@ -558,7 +562,7 @@ def _test_model_inference( ) mismatched_elements = mismatched.sum().item() if mismatched_elements / expected.size > mismatched_tol / 1e6: - r_max_idx = (r_diff := abs_diff / (abs(expected) + 1e-6)).argmax() + r_max_idx = (r_diff := (abs_diff / (abs(expected) + 1e-6))).argmax() r_max = r_diff[r_max_idx].item() r_actual = actual[r_max_idx].item() r_expected = expected[r_max_idx].item() @@ -570,10 +574,10 @@ def _test_model_inference( f"Output '{m}' disagrees with {mismatched_elements} of" + f" {expected.size} expected values." + f"\n Max relative difference: {r_max:.2e}" - + rf" (= \|{r_actual:.2e} - {r_expected:.2e}\|/\|{r_expected:.2e} + 1e-6|)" + + rf" (= \|{r_actual:.2e} - {r_expected:.2e}\|/\|{r_expected:.2e} + 1e-6\|)" + f" at {r_max_idx}" + f"\n Max absolute difference: {a_max:.2e}" - + rf" (= \|{a_actual:.2e} - {a_expected:.2e}\|) at {a_max_idx}" + + rf" (= \|{a_actual:.7e} - {a_expected:.7e}\|) at {a_max_idx}" ) break except Exception as e: From 4d4f3d3491068170c3c7602553fabfeaf8f49fb8 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 4 Mar 2025 14:52:56 +0100 Subject: [PATCH 116/187] skip known failure --- tests/test_bioimageio_collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_bioimageio_collection.py b/tests/test_bioimageio_collection.py index 2cf9ced0..1fdfc491 100644 --- a/tests/test_bioimageio_collection.py +++ b/tests/test_bioimageio_collection.py @@ -38,7 +38,7 @@ def yield_bioimageio_yaml_urls() -> Iterable[ParameterSet]: yield pytest.param(descr_url, sha, key, id=key) -KNOWN_INVALID: Collection[str] = set() +KNOWN_INVALID: Collection[str] = {"stupendous-sheep/1.2"} @expensive_test From 060603dcd7e9dcf8c11783fc31d8e51c5bf09847 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 4 Mar 2025 14:53:31 +0100 Subject: [PATCH 117/187] default to an absolute_tolerance for legacy models too --- bioimageio/core/_resource_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 35372548..25770354 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -517,7 +517,7 @@ def _get_tolerance( rtol = 0 mismatched_tol = 0 else: - atol = deprecated.get("absolute_tolerance", 0) + atol = deprecated.get("absolute_tolerance", 1e-5) rtol = deprecated.get("relative_tolerance", 1e-3) mismatched_tol = 0 From 570a7ff13a7c48c2c196d3f0135c92020821c8a5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 4 Mar 2025 14:57:26 +0100 Subject: [PATCH 118/187] ignore dogfood/ --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c45ec89d..1ed5e10a 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ cache coverage.xml dist/ docs/ +dogfood/ typings/pooch/ From 55ef47ea2e3ab6099e65a37287d153f81ee4a1fd Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 4 Mar 2025 16:22:59 +0100 Subject: [PATCH 119/187] add timm for model testing --- dev/env.yaml | 1 + setup.py | 1 + tests/test_bioimageio_spec_version.py | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dev/env.yaml b/dev/env.yaml index 6d97e402..067528c8 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -36,6 +36,7 @@ dependencies: - ruyaml - segment-anything # for model testing - tensorflow>=2,<3 + - timm # for model testing - torchvision - tqdm - typing-extensions diff --git a/setup.py b/setup.py index 4ae3ccee..6a7bb5dd 100644 --- a/setup.py +++ b/setup.py @@ -66,6 +66,7 @@ "pytest-cov", "pytest", "segment-anything", # for model testing + "timm", # for model testing ] ), }, diff --git a/tests/test_bioimageio_spec_version.py b/tests/test_bioimageio_spec_version.py index 921ecd9c..2418baa5 100644 --- a/tests/test_bioimageio_spec_version.py +++ b/tests/test_bioimageio_spec_version.py @@ -8,7 +8,7 @@ def test_bioimageio_spec_version(conda_cmd: Optional[str]): if conda_cmd is None: - pytest.skip("requires mamba") + pytest.skip("requires conda") from importlib.metadata import metadata From 36a0812b8deb785eb3af805b595ba07a75333897 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 4 Mar 2025 16:23:31 +0100 Subject: [PATCH 120/187] return tested model descr --- bioimageio/core/weight_converters/_add_weights.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/weight_converters/_add_weights.py b/bioimageio/core/weight_converters/_add_weights.py index ce77ef5c..978c8450 100644 --- a/bioimageio/core/weight_converters/_add_weights.py +++ b/bioimageio/core/weight_converters/_add_weights.py @@ -11,7 +11,7 @@ ) from bioimageio.spec.model.v0_5 import ModelDescr, WeightsFormat -from .._resource_tests import test_model +from .._resource_tests import load_description_and_test def add_weights( @@ -168,5 +168,6 @@ def add_weights( logger.info("added weights formats {}", originally_missing - missing) # resave model with updated rdf.yaml _ = save_bioimageio_package_as_folder(model_descr, output_path=output_path) - _ = test_model(model_descr) - return model_descr + tested_model_descr = load_description_and_test(model_descr) + assert isinstance(tested_model_descr, ModelDescr) + return tested_model_descr From b74686e271a3af4c79aa54728ff12f2f3c3bedf4 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 10 Mar 2025 09:58:36 +0100 Subject: [PATCH 121/187] check legacy test kwargs for weight format specific tolerance --- bioimageio/core/_resource_tests.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 25770354..42e824ea 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -494,6 +494,20 @@ def _get_tolerance( ) -> Tuple[RelativeTolerance, AbsoluteTolerance, MismatchedElementsPerMillion]: if isinstance(model, v0_5.ModelDescr): applicable = v0_5.ReproducibilityTolerance() + + # check legacy test kwargs for weight format specific tolerance + if model.config.bioimageio.model_extra is not None: + for weights_format, test_kwargs in model.config.bioimageio.model_extra.get( + "test_kwargs", {} + ).items(): + if wf == weights_format: + applicable = v0_5.ReproducibilityTolerance( + relative_tolerance=test_kwargs.get("relative_tolerance", 1e-3), + absolute_tolerance=test_kwargs.get("absolute_tolerance", 1e-4), + ) + break + + # check for weights format and output tensor specific tolerance for a in model.config.bioimageio.reproducibility_tolerance: if (not a.weights_formats or wf in a.weights_formats) and ( not a.output_ids or m in a.output_ids @@ -517,6 +531,7 @@ def _get_tolerance( rtol = 0 mismatched_tol = 0 else: + # use given (deprecated) test kwargs atol = deprecated.get("absolute_tolerance", 1e-5) rtol = deprecated.get("relative_tolerance", 1e-3) mismatched_tol = 0 From c88c73e47dcde9636e7fad1b459aefd03ed6e37b Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 13 Mar 2025 12:43:29 +0100 Subject: [PATCH 122/187] mark known invalid resource descriptions --- bioimageio/core/commands.py | 1 + dev/env.yaml | 1 + tests/test_bioimageio_collection.py | 7 ++++++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index feb95434..37d5ef87 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -64,6 +64,7 @@ def validate_format( return 0 if descr.validation_summary.status == "passed" else 1 +# TODO: absorb into `save_bioimageio_package` def package( descr: ResourceDescr, path: Path, diff --git a/dev/env.yaml b/dev/env.yaml index 067528c8..adffd15e 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -6,6 +6,7 @@ channels: dependencies: - bioimageio.spec>=0.5.3.6 - black + # - careamics # TODO: add careamics for model testing (currently pins pydantic to <2.9) # - crick # currently requires python<=3.9 - h5py - imagecodecs diff --git a/tests/test_bioimageio_collection.py b/tests/test_bioimageio_collection.py index 1fdfc491..3477bc85 100644 --- a/tests/test_bioimageio_collection.py +++ b/tests/test_bioimageio_collection.py @@ -38,7 +38,12 @@ def yield_bioimageio_yaml_urls() -> Iterable[ParameterSet]: yield pytest.param(descr_url, sha, key, id=key) -KNOWN_INVALID: Collection[str] = {"stupendous-sheep/1.2"} +KNOWN_INVALID: Collection[str] = { + "stupendous-sheep/1.2", + "wild-rhino/0.1.0", # requires careamics + "dazzling-spider/0.1.0", # requires careamics + "humorous-fox/0.1.0", # requires careamics +} @expensive_test From db0f0da67f3c7a572acb65579b91e4fbe4e89f86 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 13 Mar 2025 12:45:22 +0100 Subject: [PATCH 123/187] load from zip for images/tensors --- bioimageio/core/digest_spec.py | 11 +++++++---- bioimageio/core/io.py | 9 ++++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index 0eef0a72..44e362ee 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -25,7 +25,7 @@ from typing_extensions import Unpack, assert_never from bioimageio.spec._internal.io import HashKwargs, resolve_and_extract -from bioimageio.spec.common import FileSource +from bioimageio.spec.common import FileDescr, FileSource, ZipPath from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 from bioimageio.spec.model.v0_4 import CallableFromDepencency, CallableFromFile from bioimageio.spec.model.v0_5 import ( @@ -33,7 +33,7 @@ ArchitectureFromLibraryDescr, ParameterizedSize_N, ) -from bioimageio.spec.utils import load_array +from bioimageio.spec.utils import download, load_array from .axis import Axis, AxisId, AxisInfo, AxisLike, PerAxis from .block_meta import split_multiple_shapes_into_blocks @@ -315,7 +315,7 @@ def get_io_sample_block_metas( def get_tensor( - src: TensorSource, + src: Union[ZipPath, TensorSource], ipt: Union[v0_4.InputTensorDescr, v0_5.InputTensorDescr], ): """helper to cast/load various tensor sources""" @@ -329,7 +329,10 @@ def get_tensor( if isinstance(src, np.ndarray): return Tensor.from_numpy(src, dims=get_axes_infos(ipt)) - if isinstance(src, Path): + if isinstance(src, FileDescr): + src = download(src).path + + if isinstance(src, (ZipPath, Path, str)): return load_tensor(src, axes=get_axes_infos(ipt)) assert_never(src) diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index 146ab115..81ac60c4 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -44,7 +44,7 @@ def load_image( - source: PermissiveFileSource, is_volume: Optional[bool] = None + source: Union[ZipPath, PermissiveFileSource], is_volume: Optional[bool] = None ) -> NDArray[Any]: """load a single image as numpy array @@ -55,7 +55,10 @@ def load_image( if is_volume is not None: warnings.warn("**is_volume** is deprecated and will be removed soon.") - parsed_source = interprete_file_source(source) + if isinstance(source, ZipPath): + parsed_source = source + else: + parsed_source = interprete_file_source(source) if isinstance(parsed_source, RelativeFilePath): src = parsed_source.absolute() @@ -110,7 +113,7 @@ def load_image( def load_tensor( - path: Union[Path, str], axes: Optional[Sequence[AxisLike]] = None + path: Union[ZipPath, Path, str], axes: Optional[Sequence[AxisLike]] = None ) -> Tensor: # TODO: load axis meta data array = load_image(path) From 9ad111be2c83a68e65d8144cdd7331e9e827e1f3 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 13 Mar 2025 13:08:19 +0100 Subject: [PATCH 124/187] improve update commands --- bioimageio/core/cli.py | 95 +++++++++++++++++++------------ bioimageio/core/utils/__init__.py | 2 + bioimageio/core/utils/_compare.py | 30 ++++++++++ 3 files changed, 91 insertions(+), 36 deletions(-) create mode 100644 bioimageio/core/utils/_compare.py diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 057dfd68..ed33d292 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -56,20 +56,16 @@ update_format, update_hashes, ) +from bioimageio.spec._internal.io import is_yaml_value from bioimageio.spec._internal.io_basics import ZipPath -from bioimageio.spec._internal.io_utils import yaml +from bioimageio.spec._internal.io_utils import open_bioimageio_yaml from bioimageio.spec._internal.types import NotEmpty from bioimageio.spec.dataset import DatasetDescr from bioimageio.spec.model import ModelDescr, v0_4, v0_5 from bioimageio.spec.notebook import NotebookDescr -from bioimageio.spec.utils import download, ensure_description_is_model +from bioimageio.spec.utils import download, ensure_description_is_model, write_yaml -from .commands import ( - WeightFormatArgAll, - WeightFormatArgAny, - package, - test, -) +from .commands import WeightFormatArgAll, WeightFormatArgAny, package, test from .common import MemberId, SampleId, SupportedWeightsFormat from .digest_spec import get_member_ids, load_sample_for_model from .io import load_dataset_stat, save_dataset_stat, save_sample @@ -83,7 +79,7 @@ ) from .sample import Sample from .stat_measures import Stat -from .utils import VERSION +from .utils import VERSION, compare from .weight_converters._add_weights import add_weights @@ -193,7 +189,7 @@ def run(self): ) -class PackageCmd(CmdBase, WithSource): +class PackageCmd(CmdBase, WithSource, WithSummaryLogging): """Save a resource's metadata with its associated files.""" path: CliPositionalArg[Path] @@ -206,10 +202,8 @@ class PackageCmd(CmdBase, WithSource): def run(self): if isinstance(self.descr, InvalidDescr): - paths = self.descr.validation_summary.log() - raise ValueError( - f"Invalid {self.descr.type} description. Logged details to {paths}" - ) + self.log(self.descr) + raise ValueError(f"Invalid {self.descr.type} description.") sys.exit( package( @@ -258,6 +252,12 @@ class UpdateCmdBase(CmdBase, WithSource, ABC): output: Union[Literal["render", "stdout"], Path] = "render" """Output updated bioimageio.yaml to the terminal or write to a file.""" + diff: Union[bool, Path] = Field(True, alias="diff") + """Output a diff of original and updated bioimageio.yaml. + If a given path has an `.html` extension, a standalone HTML file is written, + otherwise the diff is saved in unified diff format (pure text). + """ + exclude_unset: bool = Field(True, alias="exclude-unset") """Exclude fields that have not explicitly be set.""" @@ -269,29 +269,50 @@ def updated(self) -> Union[ResourceDescr, InvalidDescr]: raise NotImplementedError def run(self): - if self.output == "render": - out = StringIO() - elif self.output == "stdout": - out = sys.stdout - else: - out = self.output + original_yaml = open_bioimageio_yaml(self.source).local_source.read_text( + encoding="utf-8" + ) + assert isinstance(original_yaml, str) + stream = StringIO() save_bioimageio_yaml_only( self.updated, - out, + stream, exclude_unset=self.exclude_unset, exclude_defaults=self.exclude_defaults, ) + updated_yaml = stream.getvalue() + + diff = compare( + original_yaml.split("\n"), + updated_yaml.split("\n"), + diff_format=( + "html" + if isinstance(self.diff, Path) and self.diff.suffix == ".html" + else "unified" + ), + ) - if self.output == "render": - assert isinstance(out, StringIO) - updated_md = f"```yaml\n{out.getvalue()}\n```" + if isinstance(self.diff, Path): + _ = self.diff.write_text(diff, encoding="utf-8") + elif self.diff: + diff_md = f"````````diff\n{diff}\n````````" + rich.console.Console().print(rich.markdown.Markdown(diff_md)) - rich_markdown = rich.markdown.Markdown(updated_md) - console = rich.console.Console() - console.print(rich_markdown) - elif self.output != "stdout": + if isinstance(self.output, Path): + _ = self.output.write_text(updated_yaml, encoding="utf-8") logger.info(f"written updated description to {self.output}") + elif self.output == "render": + updated_md = f"```yaml\n{updated_yaml}\n```" + rich.console.Console().print(rich.markdown.Markdown(updated_md)) + elif self.output == "stdout": + print(updated_yaml) + else: + assert_never(self.output) + + if isinstance(self.updated, InvalidDescr): + logger.warning("Update resulted in invalid description") + _ = self.updated.validation_summary.display() class UpdateFormatCmd(UpdateCmdBase): @@ -339,7 +360,7 @@ class PredictCmd(CmdBase, WithSource): Example inputs to process sample 'a' and 'b' for a model expecting a 'raw' and a 'mask' input tensor: - --inputs="[[\"a_raw.tif\",\"a_mask.tif\"],[\"b_raw.tif\",\"b_mask.tif\"]]" + --inputs="[[\\"a_raw.tif\\",\\"a_mask.tif\\"],[\\"b_raw.tif\\",\\"b_mask.tif\\"]]" (Note that JSON double quotes need to be escaped.) Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file @@ -435,13 +456,15 @@ def _example(self): bioimageio_cli_path = example_path / YAML_FILE stats_file = "dataset_statistics.json" stats = (example_path / stats_file).as_posix() - yaml.dump( - dict( - inputs=inputs, - outputs=output_pattern, - stats=stats_file, - blockwise=self.blockwise, - ), + cli_example_args = dict( + inputs=inputs, + outputs=output_pattern, + stats=stats_file, + blockwise=self.blockwise, + ) + assert is_yaml_value(cli_example_args) + write_yaml( + cli_example_args, bioimageio_cli_path, ) diff --git a/bioimageio/core/utils/__init__.py b/bioimageio/core/utils/__init__.py index 84e94d38..695f0172 100644 --- a/bioimageio/core/utils/__init__.py +++ b/bioimageio/core/utils/__init__.py @@ -2,6 +2,8 @@ import sys from pathlib import Path +from ._compare import compare as compare + if sys.version_info < (3, 9): def files(package_name: str): diff --git a/bioimageio/core/utils/_compare.py b/bioimageio/core/utils/_compare.py new file mode 100644 index 00000000..b8c673a9 --- /dev/null +++ b/bioimageio/core/utils/_compare.py @@ -0,0 +1,30 @@ +from difflib import HtmlDiff, unified_diff +from typing import Sequence + +from typing_extensions import Literal, assert_never + + +def compare( + a: Sequence[str], + b: Sequence[str], + name_a: str = "source", + name_b: str = "updated", + *, + diff_format: Literal["unified", "html"], +): + if diff_format == "html": + diff = HtmlDiff().make_file(a, b, name_a, name_b, charset="utf-8") + elif diff_format == "unified": + diff = "\n".join( + unified_diff( + a, + b, + name_a, + name_b, + lineterm="", + ) + ) + else: + assert_never(diff_format) + + return diff From 24797e6e75b3b481430c41f0daf33488187af1e5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 13 Mar 2025 13:13:12 +0100 Subject: [PATCH 125/187] show ruff fixes during precommit --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ef0eba58..2bee435e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ repos: rev: v0.3.2 hooks: - id: ruff - args: [--fix] + args: [--fix, --show-fixes] - repo: local hooks: - id: pyright From 97929a6eb4b4c00bb28d60c05339c167794d5e41 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 13 Mar 2025 14:39:05 +0100 Subject: [PATCH 126/187] add cellpose for model testing --- dev/env-py38.yaml | 2 +- dev/env-tf.yaml | 2 +- dev/env-wo-python.yaml | 2 +- dev/env.yaml | 3 ++- setup.py | 1 + 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index d29201e8..8e4275a7 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -1,4 +1,4 @@ -# manipulated copy of env.yaml +# manipulated copy of env.yaml wo dependencies 'for model testing' name: core38 channels: - conda-forge diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index af054496..87038879 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -1,4 +1,4 @@ -# modified copy of env.yaml +# modified copy of env.yaml wo dependencies 'for model testing' name: core-tf # changed channels: - conda-forge diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index 3308f64a..28356364 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -1,4 +1,4 @@ -# modified copy of env.yaml +# modified copy of env.yaml wo dependencies 'for model testing' name: core channels: - conda-forge diff --git a/dev/env.yaml b/dev/env.yaml index adffd15e..c42fe738 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -7,6 +7,7 @@ dependencies: - bioimageio.spec>=0.5.3.6 - black # - careamics # TODO: add careamics for model testing (currently pins pydantic to <2.9) + - cellpose # for model testing # - crick # currently requires python<=3.9 - h5py - imagecodecs @@ -29,7 +30,7 @@ dependencies: - pyright - pytest - pytest-cov - - python=3.12 + - python=3.11 # 3.12 not supported by cellpose->fastremap - pytorch>=2.1,<3 - requests - rich diff --git a/setup.py b/setup.py index 6a7bb5dd..10a49f1e 100644 --- a/setup.py +++ b/setup.py @@ -67,6 +67,7 @@ "pytest", "segment-anything", # for model testing "timm", # for model testing + "cellpose", # for model testing ] ), }, From ed3e38d547b878b6714e2a66bad37a18726b70d1 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 13 Mar 2025 15:01:09 +0100 Subject: [PATCH 127/187] bump spec version --- dev/env-py38.yaml | 2 +- dev/env-tf.yaml | 2 +- dev/env-wo-python.yaml | 2 +- dev/env.yaml | 2 +- setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 8e4275a7..5ec6199c 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -5,7 +5,7 @@ channels: - nodefaults - pytorch dependencies: - - bioimageio.spec>=0.5.3.6 + - bioimageio.spec>=0.5.4.0 - black - crick # uncommented - h5py diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index 87038879..595665e8 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -5,7 +5,7 @@ channels: - nodefaults # - pytroch # removed dependencies: - - bioimageio.spec>=0.5.3.6 + - bioimageio.spec>=0.5.4.0 - black # - crick # currently requires python<=3.9 - h5py diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index 28356364..69013085 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -5,7 +5,7 @@ channels: - nodefaults - pytorch dependencies: - - bioimageio.spec>=0.5.3.6 + - bioimageio.spec>=0.5.4.0 - black # - crick # currently requires python<=3.9 - h5py diff --git a/dev/env.yaml b/dev/env.yaml index c42fe738..bcfa7d3c 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -4,7 +4,7 @@ channels: - nodefaults - pytorch dependencies: - - bioimageio.spec>=0.5.3.6 + - bioimageio.spec>=0.5.4.0 - black # - careamics # TODO: add careamics for model testing (currently pins pydantic to <2.9) - cellpose # for model testing diff --git a/setup.py b/setup.py index 10a49f1e..47229f75 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ ], packages=find_namespace_packages(exclude=["tests"]), install_requires=[ - "bioimageio.spec ==0.5.3.6", + "bioimageio.spec ==0.5.4.0", "h5py", "imagecodecs", "imageio>=2.10", From af396aa2f4f42bec86b640c007597c9c935670de Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 17 Mar 2025 15:24:11 +0100 Subject: [PATCH 128/187] various fixes --- bioimageio/core/_resource_tests.py | 2 +- bioimageio/core/backends/onnx_backend.py | 5 ++--- bioimageio/core/backends/torchscript_backend.py | 4 +++- tests/test_bioimageio_collection.py | 5 +++++ tests/test_stat_calculators.py | 3 +-- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 42e824ea..09ca463e 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -428,7 +428,7 @@ def load_description_and_test( ) ) or (c := source.validation_summary.details[0].context) is None - or not c["perform_io_checks"] + or not c.perform_io_checks ): logger.debug( "deserializing source to ensure we validate and test using format {} and perform io checks", diff --git a/bioimageio/core/backends/onnx_backend.py b/bioimageio/core/backends/onnx_backend.py index 901deb5e..b66c32fe 100644 --- a/bioimageio/core/backends/onnx_backend.py +++ b/bioimageio/core/backends/onnx_backend.py @@ -23,9 +23,8 @@ def __init__( if model_description.weights.onnx is None: raise ValueError("No ONNX weights specified for {model_description.name}") - self._session = rt.InferenceSession( - str(download(model_description.weights.onnx.source).path) - ) + local_path = download(model_description.weights.onnx.source).path + self._session = rt.InferenceSession(local_path.read_bytes()) onnx_inputs = self._session.get_inputs() # type: ignore self._input_names: List[str] = [ipt.name for ipt in onnx_inputs] # type: ignore diff --git a/bioimageio/core/backends/torchscript_backend.py b/bioimageio/core/backends/torchscript_backend.py index 26924e3c..cb153a49 100644 --- a/bioimageio/core/backends/torchscript_backend.py +++ b/bioimageio/core/backends/torchscript_backend.py @@ -36,7 +36,9 @@ def __init__( "Multiple devices for single torchscript model not yet implemented" ) - self._model = torch.jit.load(weight_path) + with weight_path.open("rb") as f: + self._model = torch.jit.load(f) + self._model.to(self.devices[0]) self._model = self._model.eval() diff --git a/tests/test_bioimageio_collection.py b/tests/test_bioimageio_collection.py index 3477bc85..1cc8132a 100644 --- a/tests/test_bioimageio_collection.py +++ b/tests/test_bioimageio_collection.py @@ -43,6 +43,11 @@ def yield_bioimageio_yaml_urls() -> Iterable[ParameterSet]: "wild-rhino/0.1.0", # requires careamics "dazzling-spider/0.1.0", # requires careamics "humorous-fox/0.1.0", # requires careamics + "ambitious-sloth/1.2", # requires inferno + "dynamic-t-rex/1", # model.v0_4.ScaleLinearKwargs with axes + "famous-fish/0.1.0", # list index out of range `fl[3]` + "happy-elephant/0.1.0", # list index out of range `fl[3]` + "affectionate-cow/0.1.0", # custom dependencies } diff --git a/tests/test_stat_calculators.py b/tests/test_stat_calculators.py index 55ea1fec..dd2823b6 100644 --- a/tests/test_stat_calculators.py +++ b/tests/test_stat_calculators.py @@ -1,8 +1,7 @@ -from typing import Tuple +from typing import Optional, Tuple import numpy as np import pytest -from git import Optional from xarray.testing import assert_allclose # pyright: ignore[reportUnknownVariableType] from bioimageio.core.axis import AxisId From 8a06149bbad2636c49def3ae04077c560fda231d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 17 Mar 2025 15:28:53 +0100 Subject: [PATCH 129/187] import architecture without explicitly extracting the package --- bioimageio/core/digest_spec.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index 44e362ee..1071370c 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -24,7 +24,7 @@ from numpy.typing import NDArray from typing_extensions import Unpack, assert_never -from bioimageio.spec._internal.io import HashKwargs, resolve_and_extract +from bioimageio.spec._internal.io import HashKwargs, resolve from bioimageio.spec.common import FileDescr, FileSource, ZipPath from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 from bioimageio.spec.model.v0_4 import CallableFromDepencency, CallableFromFile @@ -84,17 +84,10 @@ def import_callable( def _import_from_file_impl( source: FileSource, callable_name: str, **kwargs: Unpack[HashKwargs] ): - local_file = resolve_and_extract(source, **kwargs) - module_name = local_file.path.stem - importlib_spec = importlib.util.spec_from_file_location( - module_name, local_file.path - ) - if importlib_spec is None: - raise ImportError(f"Failed to import {module_name} from {source}.") - - dep = importlib.util.module_from_spec(importlib_spec) - importlib_spec.loader.exec_module(dep) # type: ignore # todo: possible to use "loader.load_module"? - return getattr(dep, callable_name) + code = resolve(source, **kwargs).path.read_text(encoding="utf-8") + module_globals: Dict[str, Any] = {} + exec(code, module_globals) + return module_globals[callable_name] def get_axes_infos( From bacfd76bced0e01738046ae8c09eeb6ebeac9a56 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 18 Mar 2025 10:38:51 +0100 Subject: [PATCH 130/187] fix type annotation --- bioimageio/core/backends/pytorch_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/backends/pytorch_backend.py b/bioimageio/core/backends/pytorch_backend.py index 2b688901..0a1c01e2 100644 --- a/bioimageio/core/backends/pytorch_backend.py +++ b/bioimageio/core/backends/pytorch_backend.py @@ -46,7 +46,7 @@ def __init__( self._primary_device = devices[0] def _forward_impl( - self, input_arrays: Sequence[NDArray[Any] | None] + self, input_arrays: Sequence[Optional[NDArray[Any]]] ) -> List[Optional[NDArray[Any]]]: tensors = [ None if a is None else torch.from_numpy(a).to(self._primary_device) From b847ced152dccafdad31b5eeb4a7db2e73e540a3 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 18 Mar 2025 11:08:58 +0100 Subject: [PATCH 131/187] improve onnx converters --- bioimageio/core/weight_converters/_utils_onnx.py | 15 +++++++++++++++ .../core/weight_converters/pytorch_to_onnx.py | 6 +++++- .../core/weight_converters/torchscript_to_onnx.py | 10 +++++++++- 3 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 bioimageio/core/weight_converters/_utils_onnx.py diff --git a/bioimageio/core/weight_converters/_utils_onnx.py b/bioimageio/core/weight_converters/_utils_onnx.py new file mode 100644 index 00000000..3c45d245 --- /dev/null +++ b/bioimageio/core/weight_converters/_utils_onnx.py @@ -0,0 +1,15 @@ +from collections import defaultdict +from itertools import chain +from typing import DefaultDict, Dict + +from bioimageio.spec.model.v0_5 import ModelDescr + + +def get_dynamic_axes(model_descr: ModelDescr): + dynamic_axes: DefaultDict[str, Dict[int, str]] = defaultdict(dict) + for d in chain(model_descr.inputs, model_descr.outputs): + for i, ax in enumerate(d.axes): + if not isinstance(ax.size, int): + dynamic_axes[str(d.id)][i] = str(ax.id) + + return dynamic_axes diff --git a/bioimageio/core/weight_converters/pytorch_to_onnx.py b/bioimageio/core/weight_converters/pytorch_to_onnx.py index 201567eb..cc3ed75b 100644 --- a/bioimageio/core/weight_converters/pytorch_to_onnx.py +++ b/bioimageio/core/weight_converters/pytorch_to_onnx.py @@ -8,6 +8,7 @@ from ..backends.pytorch_backend import load_torch_model from ..digest_spec import get_member_id, get_test_inputs from ..proc_setup import get_pre_and_postprocessing +from ._utils_onnx import get_dynamic_axes def convert( @@ -15,7 +16,7 @@ def convert( output_path: Path, *, verbose: bool = False, - opset_version: int = 20, + opset_version: int = 15, ) -> OnnxWeightsDescr: """ Convert model weights from the Torchscript state_dict format to the ONNX format. @@ -63,6 +64,9 @@ def convert( model, tuple(inputs_torch), str(output_path), + input_names=[str(d.id) for d in model_descr.inputs], + output_names=[str(d.id) for d in model_descr.outputs], + dynamic_axes=get_dynamic_axes(model_descr), verbose=verbose, opset_version=opset_version, ) diff --git a/bioimageio/core/weight_converters/torchscript_to_onnx.py b/bioimageio/core/weight_converters/torchscript_to_onnx.py index 8b24034a..d58b47ab 100644 --- a/bioimageio/core/weight_converters/torchscript_to_onnx.py +++ b/bioimageio/core/weight_converters/torchscript_to_onnx.py @@ -5,8 +5,10 @@ from bioimageio.spec.model.v0_5 import ModelDescr, OnnxWeightsDescr from bioimageio.spec.utils import download +from .. import __version__ from ..digest_spec import get_member_id, get_test_inputs from ..proc_setup import get_pre_and_postprocessing +from ._utils_onnx import get_dynamic_axes def convert( @@ -67,10 +69,16 @@ def convert( model, # type: ignore tuple(inputs_torch), str(output_path), + input_names=[str(d.id) for d in model_descr.inputs], + output_names=[str(d.id) for d in model_descr.outputs], + dynamic_axes=get_dynamic_axes(model_descr), verbose=verbose, opset_version=opset_version, ) return OnnxWeightsDescr( - source=output_path, parent="pytorch_state_dict", opset_version=opset_version + source=output_path, + parent="torchscript", + opset_version=opset_version, + comment=f"Converted with bioimageio.core {__version__}.", ) From aa1079b57f02dc881db1df9097f9020fcac990e5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 18 Mar 2025 11:30:57 +0100 Subject: [PATCH 132/187] fix UpdateCmdBase.run --- bioimageio/core/cli.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index ed33d292..6ac3f092 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -269,9 +269,7 @@ def updated(self) -> Union[ResourceDescr, InvalidDescr]: raise NotImplementedError def run(self): - original_yaml = open_bioimageio_yaml(self.source).local_source.read_text( - encoding="utf-8" - ) + original_yaml = open_bioimageio_yaml(self.source).unparsed_content assert isinstance(original_yaml, str) stream = StringIO() From a1cd381288f9bedf3b7c43cbe93ca3bae8838655 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 19 Mar 2025 11:07:29 +0100 Subject: [PATCH 133/187] add monai for model testing --- dev/env.yaml | 1 + setup.py | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dev/env.yaml b/dev/env.yaml index bcfa7d3c..1eba41f7 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -17,6 +17,7 @@ dependencies: - keras>=3.0,<4 - loguru - matplotlib + - monai # for model testing - numpy - onnx - onnxruntime diff --git a/setup.py b/setup.py index 47229f75..4e9d670b 100644 --- a/setup.py +++ b/setup.py @@ -53,21 +53,22 @@ pytorch_deps + [ "black", - # "crick", # currently requires python<=3.9 - "jupyter", + "cellpose", # for model testing "jupyter-black", + "jupyter", "matplotlib", + "monai", # for model testing "onnx", "onnxruntime", "packaging>=17.0", - "pre-commit", "pdoc", + "pre-commit", "pyright==1.1.396", "pytest-cov", "pytest", "segment-anything", # for model testing "timm", # for model testing - "cellpose", # for model testing + # "crick", # currently requires python<=3.9 ] ), }, From 02883c1088e40beb3f3d2bc1073772dc7f159e73 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 19 Mar 2025 17:25:50 +0100 Subject: [PATCH 134/187] update dev env --- dev/env.yaml | 3 ++- setup.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dev/env.yaml b/dev/env.yaml index 1eba41f7..7aaa1fed 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -40,9 +40,10 @@ dependencies: - segment-anything # for model testing - tensorflow>=2,<3 - timm # for model testing - - torchvision + - torchvision>=0.21 - tqdm - typing-extensions - xarray - pip: + - git+https://github.com/ChaoningZhang/MobileSAM.git # for model testing - -e .. diff --git a/setup.py b/setup.py index 4e9d670b..187cb3eb 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,9 @@ ], include_package_data=True, extras_require={ - "pytorch": (pytorch_deps := ["torch>=1.6,<3", "torchvision", "keras>=3.0,<4"]), + "pytorch": ( + pytorch_deps := ["torch>=1.6,<3", "torchvision>=0.21", "keras>=3.0,<4"] + ), "tensorflow": ["tensorflow", "keras>=2.15,<4"], "onnx": ["onnxruntime"], "dev": ( From 312440a2f56ccb114a2e544e90bd3753ff8ab695 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 20 Mar 2025 21:32:54 +0100 Subject: [PATCH 135/187] always use AxisId that now includes axis id normalization --- bioimageio/core/proc_ops.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/bioimageio/core/proc_ops.py b/bioimageio/core/proc_ops.py index 1b2cf183..e504bf07 100644 --- a/bioimageio/core/proc_ops.py +++ b/bioimageio/core/proc_ops.py @@ -59,12 +59,7 @@ def _convert_axis_ids( else: assert_never(mode) - ret.extend( - [ - AxisId(v0_5._AXIS_ID_MAP.get(a, a)) # pyright: ignore[reportPrivateUsage] - for a in axes - ] - ) + ret.extend([AxisId(a) for a in axes]) return tuple(ret) @@ -620,7 +615,7 @@ def from_proc_descr( if isinstance(descr.kwargs, v0_5.FixedZeroMeanUnitVarianceKwargs): dims = None elif isinstance(descr.kwargs, v0_5.FixedZeroMeanUnitVarianceAlongAxisKwargs): - dims = (descr.kwargs.axis,) + dims = (AxisId(descr.kwargs.axis),) else: assert_never(descr.kwargs) From e16a9ca71032a09cab87604cbf4222ad28cd3818 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Sat, 22 Mar 2025 19:36:49 +0100 Subject: [PATCH 136/187] only run parameterized tests if tests passed so far --- bioimageio/core/_resource_tests.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 09ca463e..87a86f34 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -477,7 +477,10 @@ def load_description_and_test( enable_determinism(determinism, weight_formats=weight_formats) for w in weight_formats: _test_model_inference(rd, w, devices, **deprecated) - if not isinstance(rd, v0_4.ModelDescr): + if ( + not isinstance(rd, v0_4.ModelDescr) + and rd.validation_summary.status == "passed" + ): _test_model_inference_parametrized(rd, w, devices) # TODO: add execution of jupyter notebooks From 014bc92d8b9840b787e6336197cd59145a8ad2c3 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Sat, 22 Mar 2025 20:14:33 +0100 Subject: [PATCH 137/187] rename CLI argument weight_format -> weight-format (also accept weights-format) --- bioimageio/core/cli.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 6ac3f092..a7fc42e4 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -32,7 +32,7 @@ import rich.markdown from loguru import logger -from pydantic import BaseModel, Field, model_validator +from pydantic import AliasChoices, BaseModel, Field, model_validator from pydantic_settings import ( BaseSettings, CliPositionalArg, @@ -82,6 +82,11 @@ from .utils import VERSION, compare from .weight_converters._add_weights import add_weights +WEIGHT_FORMAT_ALIASES = AliasChoices( + "weight-format", + "weights-format", +) + class CmdBase(BaseModel, use_attribute_docstrings=True, cli_implicit_flags=True): pass @@ -154,7 +159,11 @@ def run(self): class TestCmd(CmdBase, WithSource, WithSummaryLogging): """Test a bioimageio resource (beyond meta data formatting).""" - weight_format: WeightFormatArgAll = "all" + weight_format: WeightFormatArgAll = Field( + "all", + alias="weight-format", + validation_alias=WEIGHT_FORMAT_ALIASES, + ) """The weight format to limit testing to. (only relevant for model resources)""" @@ -197,7 +206,11 @@ class PackageCmd(CmdBase, WithSource, WithSummaryLogging): If it does not have a `.zip` suffix this command will save the package as an unzipped folder instead.""" - weight_format: WeightFormatArgAll = "all" + weight_format: WeightFormatArgAll = Field( + "all", + alias="weight-format", + validation_alias=WEIGHT_FORMAT_ALIASES, + ) """The weight format to include in the package (for model descriptions only).""" def run(self): @@ -406,7 +419,11 @@ class PredictCmd(CmdBase, WithSource): """preview which files would be processed and what outputs would be generated.""" - weight_format: WeightFormatArgAny = "any" + weight_format: WeightFormatArgAny = Field( + "any", + alias="weight-format", + validation_alias=WEIGHT_FORMAT_ALIASES, + ) """The weight format to use.""" example: bool = False From 0c35b8695ddf5be4e0a6ae7a40ad75cc573a2806 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Sun, 23 Mar 2025 13:37:51 +0100 Subject: [PATCH 138/187] WIP add stop_early arg --- bioimageio/core/_resource_tests.py | 57 +++++++++++++++++++++++++---- bioimageio/core/cli.py | 5 +++ tests/test_bioimageio_collection.py | 2 +- 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 87a86f34..f9980601 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -164,6 +164,7 @@ def test_model( *, determinism: Literal["seed_only", "full"] = "seed_only", sha256: Optional[Sha256] = None, + stop_early: bool = False, **deprecated: Unpack[DeprecatedKwargs], ) -> ValidationSummary: """Test model inference""" @@ -174,6 +175,7 @@ def test_model( determinism=determinism, expected_type="model", sha256=sha256, + stop_early=stop_early, **deprecated, ) @@ -192,13 +194,15 @@ def test_description( determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, sha256: Optional[Sha256] = None, + stop_early: bool = False, runtime_env: Union[ Literal["currently-active", "as-described"], Path, BioimageioCondaEnv ] = ("currently-active"), run_command: Callable[[Sequence[str]], None] = default_run_command, **deprecated: Unpack[DeprecatedKwargs], ) -> ValidationSummary: - """Test a bioimage.io resource dynamically, e.g. prediction of test tensors for models. + """Test a bioimage.io resource dynamically, + for example run prediction of test tensors for models. Args: source: model description source. @@ -207,6 +211,10 @@ def test_description( devices: Devices to test with, e.g. 'cpu', 'cuda'. Default (may be weight format dependent): ['cuda'] if available, ['cpu'] otherwise. determinism: Modes to improve reproducibility of test outputs. + expected_type: Assert an expected resource description `type`. + sha256: Expected SHA256 value of **source**. + (Ignored if **source** already is a loaded `ResourceDescr` object.) + stop_early: Do not run further subtests after a failed one. runtime_env: (Experimental feature!) The Python environment to run the tests in - `"currently-active"`: Use active Python interpreter. - `"as-described"`: Use `bioimageio.spec.get_conda_env` to generate a conda @@ -225,6 +233,7 @@ def test_description( determinism=determinism, expected_type=expected_type, sha256=sha256, + stop_early=stop_early, **deprecated, ) return rd.validation_summary @@ -254,6 +263,9 @@ def test_description( conda_env=conda_env, devices=devices, determinism=determinism, + expected_type=expected_type, + sha256=sha256, + stop_early=stop_early, run_command=run_command, **deprecated, ) @@ -268,6 +280,9 @@ def _test_in_env( devices: Optional[Sequence[str]], determinism: Literal["seed_only", "full"], run_command: Callable[[Sequence[str]], None], + stop_early: bool, + expected_type: Optional[str], + sha256: Optional[Sha256], **deprecated: Unpack[DeprecatedKwargs], ) -> ValidationSummary: descr = load_description(source) @@ -293,6 +308,9 @@ def _test_in_env( determinism=determinism, conda_env=conda_env, run_command=run_command, + expected_type=expected_type, + sha256=sha256, + stop_early=stop_early, **deprecated, ) for wf in all_present_wfs[1:]: @@ -304,6 +322,9 @@ def _test_in_env( determinism=determinism, conda_env=conda_env, run_command=run_command, + expected_type=expected_type, + sha256=sha256, + stop_early=stop_early, **deprecated, ) for d in additional_summary.details: @@ -370,7 +391,10 @@ def _test_in_env( "test", str(source), f"--summary-path={summary_path}", + f"--determinism={determinism}", ] + + ([f"--expected-type={expected_type}"] if expected_type else []) + + (["--stop-early"] if stop_early else []) ) return ValidationSummary.model_validate_json(summary_path.read_bytes()) @@ -385,6 +409,7 @@ def load_description_and_test( determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, sha256: Optional[Sha256] = None, + stop_early: bool = False, **deprecated: Unpack[DeprecatedKwargs], ) -> Union[LatestResourceDescr, InvalidDescr]: ... @@ -399,6 +424,7 @@ def load_description_and_test( determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, sha256: Optional[Sha256] = None, + stop_early: bool = False, **deprecated: Unpack[DeprecatedKwargs], ) -> Union[ResourceDescr, InvalidDescr]: ... @@ -412,9 +438,18 @@ def load_description_and_test( determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, sha256: Optional[Sha256] = None, + stop_early: bool = False, **deprecated: Unpack[DeprecatedKwargs], ) -> Union[ResourceDescr, InvalidDescr]: - """Test RDF dynamically, e.g. model inference of test inputs""" + """Test a bioimage.io resource dynamically, + for example run prediction of test tensors for models. + + See `test_description` for more details. + + Returns: + A (possibly invalid) resource description object + with a populated `.validation_summary` attribute. + """ if isinstance(source, ResourceDescrBase): root = source.root file_name = source.file_name @@ -477,11 +512,15 @@ def load_description_and_test( enable_determinism(determinism, weight_formats=weight_formats) for w in weight_formats: _test_model_inference(rd, w, devices, **deprecated) - if ( - not isinstance(rd, v0_4.ModelDescr) - and rd.validation_summary.status == "passed" - ): - _test_model_inference_parametrized(rd, w, devices) + if stop_early and rd.validation_summary.status != "passed": + break + + if not isinstance(rd, v0_4.ModelDescr): + _test_model_inference_parametrized( + rd, w, devices, stop_early=stop_early + ) + if stop_early and rd.validation_summary.status != "passed": + break # TODO: add execution of jupyter notebooks # TODO: add more tests @@ -631,6 +670,8 @@ def _test_model_inference_parametrized( model: v0_5.ModelDescr, weight_format: SupportedWeightsFormat, devices: Optional[Sequence[str]], + *, + stop_early: bool, ) -> None: if not any( isinstance(a.size, v0_5.ParameterizedSize) @@ -772,6 +813,8 @@ def get_ns(n: int): ), ) ) + if stop_early and error is not None: + break except Exception as e: if validation_context_var.get().raise_errors: raise e diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index a7fc42e4..50f0b3f4 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -185,6 +185,11 @@ class TestCmd(CmdBase, WithSource, WithSummaryLogging): determinism: Literal["seed_only", "full"] = "seed_only" """Modes to improve reproducibility of test outputs.""" + stop_early: bool = Field( + False, alias="stop-early", validation_alias=AliasChoices("stop-early", "x") + ) + """Do not run further subtests after a failed one.""" + def run(self): sys.exit( test( diff --git a/tests/test_bioimageio_collection.py b/tests/test_bioimageio_collection.py index 1cc8132a..2e03f152 100644 --- a/tests/test_bioimageio_collection.py +++ b/tests/test_bioimageio_collection.py @@ -63,7 +63,7 @@ def test_rdf( from bioimageio.core import load_description_and_test - descr = load_description_and_test(descr_url, sha256=sha) + descr = load_description_and_test(descr_url, sha256=sha, stop_early=True) assert not isinstance(descr, InvalidDescr) assert ( descr.validation_summary.status == "passed" From d1a27cd599599aa1d9cd14ac5eb569eaa36d7b24 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 24 Mar 2025 14:40:02 +0100 Subject: [PATCH 139/187] improve error messages --- bioimageio/core/_resource_tests.py | 2 +- bioimageio/core/backends/pytorch_backend.py | 7 ++- bioimageio/core/digest_spec.py | 53 +++++++++++++++++++-- 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index f9980601..11ae2daf 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -642,7 +642,7 @@ def _test_model_inference( raise e error = str(e) - tb = traceback.format_tb(e.__traceback__) + tb = traceback.format_exception(type(e), e, e.__traceback__, chain=True) model.validation_summary.add_detail( ValidationDetail( diff --git a/bioimageio/core/backends/pytorch_backend.py b/bioimageio/core/backends/pytorch_backend.py index 0a1c01e2..c4d6d184 100644 --- a/bioimageio/core/backends/pytorch_backend.py +++ b/bioimageio/core/backends/pytorch_backend.py @@ -110,7 +110,12 @@ def load_torch_model( if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) else weight_spec.architecture.kwargs ) - network = arch(**model_kwargs) + try: + # calling custom user code + network = arch(**model_kwargs) + except Exception as e: + raise RuntimeError("Failed to initialize PyTorch model") from e + if not isinstance(network, nn.Module): raise ValueError( f"calling {weight_spec.architecture.callable_name if isinstance(weight_spec.architecture, (v0_4.CallableFromFile, v0_4.CallableFromDepencency)) else weight_spec.architecture.callable} did not return a torch.nn.Module" diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index 1071370c..44d83a85 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -2,6 +2,7 @@ import collections.abc import importlib.util +import sys from itertools import chain from pathlib import Path from typing import ( @@ -24,7 +25,8 @@ from numpy.typing import NDArray from typing_extensions import Unpack, assert_never -from bioimageio.spec._internal.io import HashKwargs, resolve +from bioimageio.spec import get_validation_context +from bioimageio.spec._internal.io import HashKwargs from bioimageio.spec.common import FileDescr, FileSource, ZipPath from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 from bioimageio.spec.model.v0_4 import CallableFromDepencency, CallableFromFile @@ -84,10 +86,51 @@ def import_callable( def _import_from_file_impl( source: FileSource, callable_name: str, **kwargs: Unpack[HashKwargs] ): - code = resolve(source, **kwargs).path.read_text(encoding="utf-8") - module_globals: Dict[str, Any] = {} - exec(code, module_globals) - return module_globals[callable_name] + with get_validation_context().replace(perform_io_checks=True): + src_descr = FileDescr(source=source, **kwargs) + assert src_descr.sha256 is not None + + local_source = src_descr.download() + source_code = local_source.path.read_text(encoding="utf-8") + + module_name = local_source.original_file_name.replace("-", "_") + if module_name.endswith(".py"): + module_name = module_name[:-3] + + # make sure we have a unique module name to avoid conflicts and confusion + module_name = f"{module_name}_{src_descr.sha256}" + + # make sure we have a valid module name + if not module_name.isidentifier(): + module_name = f"custom_module_{src_descr.sha256}" + assert module_name.isidentifier(), module_name + + module = sys.modules.get(module_name) + if module is None: + try: + module_spec = importlib.util.spec_from_loader(module_name, loader=None) + assert module_spec is not None + module = importlib.util.module_from_spec(module_spec) + exec(source_code, module.__dict__) + sys.modules[module_spec.name] = module # cache this module + except Exception as e: + raise ImportError( + f"Failed to import {module_name[:-58]}... from {source}" + ) from e + + try: + callable_attr = getattr(module, callable_name) + except AttributeError as e: + raise AttributeError( + f"Imported custom module `{module_name[:-58]}...` has no `{callable_name}` attribute" + ) from e + except Exception as e: + raise AttributeError( + f"Failed to access `{callable_name}` attribute from imported custom module `{module_name[:-58]}...`" + ) from e + + else: + return callable_attr def get_axes_infos( From f22438f59dce97912b8b44275324591ee21a6f8c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 25 Mar 2025 14:19:42 +0100 Subject: [PATCH 140/187] call validate_sha256 directly --- bioimageio/core/digest_spec.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index 44d83a85..f318c5ce 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -25,7 +25,6 @@ from numpy.typing import NDArray from typing_extensions import Unpack, assert_never -from bioimageio.spec import get_validation_context from bioimageio.spec._internal.io import HashKwargs from bioimageio.spec.common import FileDescr, FileSource, ZipPath from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 @@ -86,9 +85,10 @@ def import_callable( def _import_from_file_impl( source: FileSource, callable_name: str, **kwargs: Unpack[HashKwargs] ): - with get_validation_context().replace(perform_io_checks=True): - src_descr = FileDescr(source=source, **kwargs) - assert src_descr.sha256 is not None + src_descr = FileDescr(source=source, **kwargs) + # ensure sha is valid even if perform_io_checks=False + src_descr.validate_sha256() + assert src_descr.sha256 is not None local_source = src_descr.download() source_code = local_source.path.read_text(encoding="utf-8") From 545ac5423f93df557f0406e0a41ced267878d4df Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 25 Mar 2025 15:03:19 +0100 Subject: [PATCH 141/187] improve test error handling --- bioimageio/core/_resource_tests.py | 57 ++++++++++----------- bioimageio/core/backends/_model_adapter.py | 30 +++++------ bioimageio/core/backends/pytorch_backend.py | 32 ++++++------ bioimageio/core/digest_spec.py | 13 ++--- 4 files changed, 66 insertions(+), 66 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 11ae2daf..046a4be8 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -2,7 +2,6 @@ import os import platform import subprocess -import traceback import warnings from io import StringIO from itertools import product @@ -47,7 +46,10 @@ MismatchedElementsPerMillion, RelativeTolerance, ) -from bioimageio.spec._internal.validation_context import validation_context_var +from bioimageio.spec._internal.validation_context import ( + get_validation_context, + validation_context_var, +) from bioimageio.spec.common import BioimageioYamlContent, PermissiveFileSource, Sha256 from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.model.v0_5 import WeightsFormat @@ -589,8 +591,17 @@ def _test_model_inference( ) -> None: test_name = f"Reproduce test outputs from test inputs ({weight_format})" logger.debug("starting '{}'", test_name) - error: Optional[str] = None - tb: List[str] = [] + errors: List[ErrorEntry] = [] + + def add_error_entry(msg: str, with_traceback: bool = False): + errors.append( + ErrorEntry( + loc=("weights", weight_format), + msg=msg, + type="bioimageio.core", + with_traceback=with_traceback, + ) + ) try: inputs = get_test_inputs(model) @@ -602,13 +613,15 @@ def _test_model_inference( results = prediction_pipeline.predict_sample_without_blocking(inputs) if len(results.members) != len(expected.members): - error = f"Expected {len(expected.members)} outputs, but got {len(results.members)}" + add_error_entry( + f"Expected {len(expected.members)} outputs, but got {len(results.members)}" + ) else: for m, expected in expected.members.items(): actual = results.members.get(m) if actual is None: - error = "Output tensors for test case may not be None" + add_error_entry("Output tensors for test case may not be None") break rtol, atol, mismatched_tol = _get_tolerance( @@ -627,7 +640,7 @@ def _test_model_inference( a_max = abs_diff[a_max_idx].item() a_actual = actual[a_max_idx].item() a_expected = expected[a_max_idx].item() - error = ( + add_error_entry( f"Output '{m}' disagrees with {mismatched_elements} of" + f" {expected.size} expected values." + f"\n Max relative difference: {r_max:.2e}" @@ -638,30 +651,18 @@ def _test_model_inference( ) break except Exception as e: - if validation_context_var.get().raise_errors: + if get_validation_context().raise_errors: raise e - error = str(e) - tb = traceback.format_exception(type(e), e, e.__traceback__, chain=True) + add_error_entry(str(e), with_traceback=True) model.validation_summary.add_detail( ValidationDetail( name=test_name, loc=("weights", weight_format), - status="passed" if error is None else "failed", + status="failed" if errors else "passed", recommended_env=get_conda_env(entry=dict(model.weights)[weight_format]), - errors=( - [] - if error is None - else [ - ErrorEntry( - loc=("weights", weight_format), - msg=error, - type="bioimageio.core", - traceback=tb, - ) - ] - ), + errors=errors, ) ) @@ -816,11 +817,9 @@ def get_ns(n: int): if stop_early and error is not None: break except Exception as e: - if validation_context_var.get().raise_errors: + if get_validation_context().raise_errors: raise e - error = str(e) - tb = traceback.format_tb(e.__traceback__) model.validation_summary.add_detail( ValidationDetail( name=f"Run {weight_format} inference for parametrized inputs", @@ -829,9 +828,9 @@ def get_ns(n: int): errors=[ ErrorEntry( loc=("weights", weight_format), - msg=error, + msg=str(e), type="bioimageio.core", - traceback=tb, + with_traceback=True, ) ], ) @@ -854,7 +853,7 @@ def _test_expected_resource_type( ErrorEntry( loc=("type",), type="type", - msg=f"expected type {expected_type}, found {rd.type}", + msg=f"Expected type {expected_type}, found {rd.type}", ) ] ), diff --git a/bioimageio/core/backends/_model_adapter.py b/bioimageio/core/backends/_model_adapter.py index db4ff949..db4d44e9 100644 --- a/bioimageio/core/backends/_model_adapter.py +++ b/bioimageio/core/backends/_model_adapter.py @@ -1,3 +1,4 @@ +import sys import warnings from abc import ABC, abstractmethod from typing import ( @@ -87,7 +88,7 @@ def create( ) weights = model_description.weights - errors: List[Tuple[SupportedWeightsFormat, Exception]] = [] + errors: List[Exception] = [] weight_format_priority_order = ( DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER if weight_format_priority_order is None @@ -112,7 +113,7 @@ def create( model_description=model_description, devices=devices ) except Exception as e: - errors.append((wf, e)) + errors.append(e) elif wf == "tensorflow_saved_model_bundle": assert weights.tensorflow_saved_model_bundle is not None try: @@ -122,7 +123,7 @@ def create( model_description=model_description, devices=devices ) except Exception as e: - errors.append((wf, e)) + errors.append(e) elif wf == "onnx": assert weights.onnx is not None try: @@ -132,7 +133,7 @@ def create( model_description=model_description, devices=devices ) except Exception as e: - errors.append((wf, e)) + errors.append(e) elif wf == "torchscript": assert weights.torchscript is not None try: @@ -142,7 +143,7 @@ def create( model_description=model_description, devices=devices ) except Exception as e: - errors.append((wf, e)) + errors.append(e) elif wf == "keras_hdf5": assert weights.keras_hdf5 is not None # keras can either be installed as a separate package or used as part of tensorflow @@ -158,27 +159,24 @@ def create( model_description=model_description, devices=devices ) except Exception as e: - errors.append((wf, e)) + errors.append(e) else: assert_never(wf) assert errors if len(weight_format_priority_order) == 1: assert len(errors) == 1 - wf, e = errors[0] - raise ValueError( - f"The '{wf}' model adapter could not be created" - + f" in this environment:\n{e.__class__.__name__}({e}).\n\n" - ) from e + raise errors[0] else: - error_list = "\n - ".join( - f"{wf}: {e.__class__.__name__}({e})" for wf, e in errors - ) - raise ValueError( + msg = ( "None of the weight format specific model adapters could be created" - + f" in this environment. Errors are:\n\n{error_list}.\n\n" + + " in this environment." ) + if sys.version_info[:2] >= (3, 11): + raise ExceptionGroup(msg, errors) + else: + raise ValueError(msg) from Exception(errors) @final def load(self, *, devices: Optional[Sequence[str]] = None) -> None: diff --git a/bioimageio/core/backends/pytorch_backend.py b/bioimageio/core/backends/pytorch_backend.py index c4d6d184..54983700 100644 --- a/bioimageio/core/backends/pytorch_backend.py +++ b/bioimageio/core/backends/pytorch_backend.py @@ -97,7 +97,7 @@ def load_torch_model( load_state: bool = True, devices: Optional[Sequence[Union[str, torch.device]]] = None, ) -> nn.Module: - arch = import_callable( + custom_callable = import_callable( weight_spec.architecture, sha256=( weight_spec.architecture_sha256 @@ -110,27 +110,29 @@ def load_torch_model( if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) else weight_spec.architecture.kwargs ) - try: - # calling custom user code - network = arch(**model_kwargs) - except Exception as e: - raise RuntimeError("Failed to initialize PyTorch model") from e - - if not isinstance(network, nn.Module): - raise ValueError( - f"calling {weight_spec.architecture.callable_name if isinstance(weight_spec.architecture, (v0_4.CallableFromFile, v0_4.CallableFromDepencency)) else weight_spec.architecture.callable} did not return a torch.nn.Module" - ) + torch_model = custom_callable(**model_kwargs) + + if not isinstance(torch_model, nn.Module): + if isinstance( + weight_spec.architecture, + (v0_4.CallableFromFile, v0_4.CallableFromDepencency), + ): + callable_name = weight_spec.architecture.callable_name + else: + callable_name = weight_spec.architecture.callable + + raise ValueError(f"Calling {callable_name} did not return a torch.nn.Module.") if load_state or devices: use_devices = get_devices(devices) - network = network.to(use_devices[0]) + torch_model = torch_model.to(use_devices[0]) if load_state: - network = load_torch_state_dict( - network, + torch_model = load_torch_state_dict( + torch_model, path=download(weight_spec).path, devices=use_devices, ) - return network + return torch_model def load_torch_state_dict( diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index f318c5ce..fc601c2b 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -111,22 +111,23 @@ def _import_from_file_impl( module_spec = importlib.util.spec_from_loader(module_name, loader=None) assert module_spec is not None module = importlib.util.module_from_spec(module_spec) - exec(source_code, module.__dict__) + source_compiled = compile( + source_code, str(local_source.path), "exec" + ) # compile source to attach file name + exec(source_compiled, module.__dict__) sys.modules[module_spec.name] = module # cache this module except Exception as e: - raise ImportError( - f"Failed to import {module_name[:-58]}... from {source}" - ) from e + raise ImportError(f"Failed to import {source} .") from e try: callable_attr = getattr(module, callable_name) except AttributeError as e: raise AttributeError( - f"Imported custom module `{module_name[:-58]}...` has no `{callable_name}` attribute" + f"Imported custom module from {source} has no `{callable_name}` attribute." ) from e except Exception as e: raise AttributeError( - f"Failed to access `{callable_name}` attribute from imported custom module `{module_name[:-58]}...`" + f"Failed to access `{callable_name}` attribute from custom module imported from {source} ." ) from e else: From a255f25b76d023e369cb7b9e88f8cb2541d614a4 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 25 Mar 2025 15:07:23 +0100 Subject: [PATCH 142/187] ValidationSummary.log -> save --- bioimageio/core/cli.py | 4 ++-- bioimageio/core/commands.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 50f0b3f4..0b5fa9e5 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -106,7 +106,7 @@ class WithSummaryLogging(ArgMixin): """ def log(self, descr: Union[ResourceDescr, InvalidDescr]): - _ = descr.validation_summary.log(self.summary) + _ = descr.validation_summary.save(self.summary) class WithSource(ArgMixin): @@ -735,7 +735,7 @@ def run(self): if updated_model_descr is None: return - _ = updated_model_descr.validation_summary.log() + _ = updated_model_descr.validation_summary.save() JSON_FILE = "bioimageio-cli.json" diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 37d5ef87..df98c51e 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -46,7 +46,7 @@ def test( runtime_env=runtime_env, determinism=determinism, ) - _ = test_summary.log(summary) + _ = test_summary.save(summary) return 0 if test_summary.status == "passed" else 1 @@ -60,7 +60,7 @@ def validate_format( Args: descr: a bioimageio resource description """ - _ = descr.validation_summary.log(summary) + _ = descr.validation_summary.save(summary) return 0 if descr.validation_summary.status == "passed" else 1 @@ -82,7 +82,7 @@ def package( weight-format: include only this single weight-format (if not 'all'). """ if isinstance(descr, InvalidDescr): - logged = descr.validation_summary.log() + logged = descr.validation_summary.save() msg = f"Invalid {descr.type} description." if logged: msg += f" Details saved to {logged}." From 5402b6a8744f033a49ab39cdb30274ff3e2a5231 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 25 Mar 2025 22:05:35 +0100 Subject: [PATCH 143/187] CLI render -> display --- bioimageio/core/cli.py | 32 +++++++++++++++++++++++--------- bioimageio/core/commands.py | 26 ++++++++++++++------------ 2 files changed, 37 insertions(+), 21 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 0b5fa9e5..090559bb 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -97,16 +97,25 @@ class ArgMixin(BaseModel, use_attribute_docstrings=True, cli_implicit_flags=True class WithSummaryLogging(ArgMixin): - summary: Union[Path, Sequence[Path]] = Field( - (), examples=[Path("summary.md"), Path("bioimageio_summaries/")] + summary: Union[ + Literal["display"], Path, Sequence[Union[Literal["display"], Path]] + ] = Field( + "display", + examples=[ + "display", + Path("summary.md"), + Path("bioimageio_summaries/"), + ["display", Path("summary.md")], + ], ) - """Save the validation summary as JSON, Markdown or HTML. + """Display the validation summary or save it as JSON, Markdown or HTML. The format is chosen based on the suffix: `.json`, `.md`, `.html`. If a folder is given (path w/o suffix) the summary is saved in all formats. + Choose/add `"display"` to render the validation summary to the terminal. """ def log(self, descr: Union[ResourceDescr, InvalidDescr]): - _ = descr.validation_summary.save(self.summary) + _ = descr.validation_summary.log(self.summary) class WithSource(ArgMixin): @@ -267,8 +276,13 @@ def _get_stat( class UpdateCmdBase(CmdBase, WithSource, ABC): - output: Union[Literal["render", "stdout"], Path] = "render" - """Output updated bioimageio.yaml to the terminal or write to a file.""" + output: Union[Literal["display", "stdout"], Path] = "display" + """Output updated bioimageio.yaml to the terminal or write to a file. + Notes: + - `"display"`: Render to the terminal with syntax highlighting. + - `"stdout"`: Write to sys.stdout without syntax highligthing. + (More convenient for copying the updated bioimageio.yaml from the terminal.) + """ diff: Union[bool, Path] = Field(True, alias="diff") """Output a diff of original and updated bioimageio.yaml. @@ -318,7 +332,7 @@ def run(self): if isinstance(self.output, Path): _ = self.output.write_text(updated_yaml, encoding="utf-8") logger.info(f"written updated description to {self.output}") - elif self.output == "render": + elif self.output == "display": updated_md = f"```yaml\n{updated_yaml}\n```" rich.console.Console().print(rich.markdown.Markdown(updated_md)) elif self.output == "stdout": @@ -705,7 +719,7 @@ def input_dataset(stat: Stat): save_sample(sp_out, sample_out) -class AddWeightsCmd(CmdBase, WithSource): +class AddWeightsCmd(CmdBase, WithSource, WithSummaryLogging): output: CliPositionalArg[Path] """The path to write the updated model package to.""" @@ -735,7 +749,7 @@ def run(self): if updated_model_descr is None: return - _ = updated_model_descr.validation_summary.save() + self.log(updated_model_descr) JSON_FILE = "bioimageio-cli.json" diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index df98c51e..a6c97139 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -25,7 +25,9 @@ def test( *, weight_format: WeightFormatArgAll = "all", devices: Optional[Union[str, Sequence[str]]] = None, - summary: Union[Path, Sequence[Path]] = (), + summary: Union[ + Literal["display"], Path, Sequence[Union[Literal["display"], Path]] + ] = "display", runtime_env: Union[ Literal["currently-active", "as-described"], Path ] = "currently-active", @@ -36,17 +38,17 @@ def test( Arguments as described in `bioimageio.core.cli.TestCmd` """ if isinstance(descr, InvalidDescr): - descr.validation_summary.display() - return 1 - - test_summary = test_description( - descr, - weight_format=None if weight_format == "all" else weight_format, - devices=[devices] if isinstance(devices, str) else devices, - runtime_env=runtime_env, - determinism=determinism, - ) - _ = test_summary.save(summary) + test_summary = descr.validation_summary + else: + test_summary = test_description( + descr, + weight_format=None if weight_format == "all" else weight_format, + devices=[devices] if isinstance(devices, str) else devices, + runtime_env=runtime_env, + determinism=determinism, + ) + + _ = test_summary.log(summary) return 0 if test_summary.status == "passed" else 1 From c0ff94be7b1d66b62aafb0c608eb171d8acc283a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Mar 2025 10:31:31 +0100 Subject: [PATCH 144/187] fix traceback for custom module from zipfile --- bioimageio/core/digest_spec.py | 49 +++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index fc601c2b..700cfbd6 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -1,6 +1,7 @@ from __future__ import annotations import collections.abc +import hashlib import importlib.util import sys from itertools import chain @@ -36,6 +37,7 @@ ) from bioimageio.spec.utils import download, load_array +from ._settings import settings from .axis import Axis, AxisId, AxisInfo, AxisLike, PerAxis from .block_meta import split_multiple_shapes_into_blocks from .common import Halo, MemberId, PerMember, SampleId, TotalNumberOfBlocks @@ -91,33 +93,50 @@ def _import_from_file_impl( assert src_descr.sha256 is not None local_source = src_descr.download() - source_code = local_source.path.read_text(encoding="utf-8") - module_name = local_source.original_file_name.replace("-", "_") - if module_name.endswith(".py"): - module_name = module_name[:-3] + source_bytes = local_source.path.read_bytes() + assert isinstance(source_bytes, bytes) + source_sha = hashlib.sha256(source_bytes).hexdigest() - # make sure we have a unique module name to avoid conflicts and confusion - module_name = f"{module_name}_{src_descr.sha256}" + # make sure we have unique module name + module_name = f"{local_source.path.stem}_{source_sha}" # make sure we have a valid module name if not module_name.isidentifier(): - module_name = f"custom_module_{src_descr.sha256}" + module_name = f"custom_module_{source_sha}" assert module_name.isidentifier(), module_name module = sys.modules.get(module_name) if module is None: try: - module_spec = importlib.util.spec_from_loader(module_name, loader=None) - assert module_spec is not None - module = importlib.util.module_from_spec(module_spec) - source_compiled = compile( - source_code, str(local_source.path), "exec" - ) # compile source to attach file name - exec(source_compiled, module.__dict__) - sys.modules[module_spec.name] = module # cache this module + if isinstance(local_source.path, Path): + module_path = local_source.path + elif isinstance(local_source.path, ZipPath): + # save extract source to cache + # loading from a file from disk ensure we get readable tracebacks + # if any errors occur + module_path = ( + settings.cache_path / f"{source_sha}-{local_source.path.name}" + ) + _ = module_path.write_bytes(source_bytes) + else: + assert_never(local_source.path) + + importlib_spec = importlib.util.spec_from_file_location( + module_name, module_path + ) + + if importlib_spec is None: + raise ImportError(f"Failed to import {source}.") + + module = importlib.util.module_from_spec(importlib_spec) + assert importlib_spec.loader is not None + importlib_spec.loader.exec_module(module) + except Exception as e: raise ImportError(f"Failed to import {source} .") from e + else: + sys.modules[module_name] = module # cache this module try: callable_attr = getattr(module, callable_name) From bf5b247a9228dda6c0c317231e8b0c81c564bf6d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Mar 2025 14:40:49 +0100 Subject: [PATCH 145/187] adapt for new status valid-format --- bioimageio/core/_resource_tests.py | 7 +++++-- bioimageio/core/cli.py | 6 +++++- bioimageio/core/commands.py | 2 +- tests/test_any_model_fixture.py | 2 +- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 046a4be8..3311381b 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -514,19 +514,22 @@ def load_description_and_test( enable_determinism(determinism, weight_formats=weight_formats) for w in weight_formats: _test_model_inference(rd, w, devices, **deprecated) - if stop_early and rd.validation_summary.status != "passed": + if stop_early and rd.validation_summary.status == "failed": break if not isinstance(rd, v0_4.ModelDescr): _test_model_inference_parametrized( rd, w, devices, stop_early=stop_early ) - if stop_early and rd.validation_summary.status != "passed": + if stop_early and rd.validation_summary.status == "failed": break # TODO: add execution of jupyter notebooks # TODO: add more tests + if rd.validation_summary.status == "valid-format": + rd.validation_summary.status = "passed" + return rd diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 090559bb..7378f138 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -162,7 +162,11 @@ def descr(self): def run(self): self.log(self.descr) - sys.exit(0 if self.descr.validation_summary.status == "passed" else 1) + sys.exit( + 0 + if self.descr.validation_summary.status in ("valid-format", "passed") + else 1 + ) class TestCmd(CmdBase, WithSource, WithSummaryLogging): diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index a6c97139..7184014c 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -63,7 +63,7 @@ def validate_format( descr: a bioimageio resource description """ _ = descr.validation_summary.save(summary) - return 0 if descr.validation_summary.status == "passed" else 1 + return 0 if descr.validation_summary.status in ("valid-format", "passed") else 1 # TODO: absorb into `save_bioimageio_package` diff --git a/tests/test_any_model_fixture.py b/tests/test_any_model_fixture.py index 92701109..77225f18 100644 --- a/tests/test_any_model_fixture.py +++ b/tests/test_any_model_fixture.py @@ -3,4 +3,4 @@ def test_model(any_model: str): summary = load_description_and_validate_format_only(any_model) - assert summary.status == "passed", summary.display() + assert summary.status == "valid-format", summary.display() From f10fcc47e29e68f936a1f1c4c2640465ced80865 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Mar 2025 14:45:26 +0100 Subject: [PATCH 146/187] WIP fix tests --- pyproject.toml | 3 ++- tests/test_bioimageio_collection.py | 31 +++++++++++++++++++++-------- tests/test_proc_ops.py | 2 +- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c7dae606..5633c6b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ preview = true exclude = [ "**/__pycache__", "**/node_modules", + "dogfood", "presentations", "scripts/pdoc/original.py", "scripts/pdoc/patched.py", @@ -39,7 +40,7 @@ typeCheckingMode = "strict" useLibraryCodeForTypes = true [tool.pytest.ini_options] -addopts = "--cov=bioimageio --cov-report=xml --cov-append --capture=no --doctest-modules --failed-first" +addopts = "--cov=bioimageio --cov-report=xml --cov-append --capture=no --doctest-modules --failed-first --ignore=dogfood" [tool.ruff] line-length = 88 diff --git a/tests/test_bioimageio_collection.py b/tests/test_bioimageio_collection.py index 2e03f152..8ba9c3f2 100644 --- a/tests/test_bioimageio_collection.py +++ b/tests/test_bioimageio_collection.py @@ -4,7 +4,7 @@ import requests from pydantic import HttpUrl -from bioimageio.spec import InvalidDescr +from bioimageio.spec import InvalidDescr, ValidationContext from bioimageio.spec.common import Sha256 from tests.utils import ParameterSet, expensive_test @@ -39,15 +39,28 @@ def yield_bioimageio_yaml_urls() -> Iterable[ParameterSet]: KNOWN_INVALID: Collection[str] = { - "stupendous-sheep/1.2", - "wild-rhino/0.1.0", # requires careamics - "dazzling-spider/0.1.0", # requires careamics - "humorous-fox/0.1.0", # requires careamics + "affable-shark/1.1", # onnx weights expect fixed input shape + "affectionate-cow/0.1.0", # custom dependencies "ambitious-sloth/1.2", # requires inferno + "dazzling-spider/0.1.0", # requires careamics "dynamic-t-rex/1", # model.v0_4.ScaleLinearKwargs with axes + "efficient-chipmunk/1", # needs plantseg "famous-fish/0.1.0", # list index out of range `fl[3]` + "greedy-whale/1", # batch size is actually limited to 1 "happy-elephant/0.1.0", # list index out of range `fl[3]` - "affectionate-cow/0.1.0", # custom dependencies + "humorous-crab/1", # batch size is actually limited to 1 + "humorous-fox/0.1.0", # requires careamics + "humorous-owl/1", # error deserializing GlorotUniform + "noisy-ox/1", # batch size is actually limited to 1 + "stupendous-sheep/1.2", + "wild-rhino/0.1.0", # requires careamics + "idealistic-turtle/0.1.0", # requires biapy + "intelligent-lion/0.1.0", # requires biapy + "merry-water-buffalo/0.1.0", # requires biapy + "venomous-swan/0.1.0", # requires biapy + "heroic-otter/0.1.0", # requires biapy + "stupendous-sheep/1.1", # requires relativ import of attachment + "commited-turkey/1.2", # error deserializng VarianceScaling } @@ -63,8 +76,10 @@ def test_rdf( from bioimageio.core import load_description_and_test - descr = load_description_and_test(descr_url, sha256=sha, stop_early=True) + with ValidationContext(): + descr = load_description_and_test(descr_url, sha256=sha, stop_early=True) + assert not isinstance(descr, InvalidDescr) assert ( descr.validation_summary.status == "passed" - ), descr.validation_summary.format() + ), descr.validation_summary.display() diff --git a/tests/test_proc_ops.py b/tests/test_proc_ops.py index 0b93f08b..0587d898 100644 --- a/tests/test_proc_ops.py +++ b/tests/test_proc_ops.py @@ -268,7 +268,7 @@ def test_scale_mean_variance_per_channel(tid: MemberId, axes_str: Optional[str]) axes = None if axes_str is None else tuple(map(AxisId, axes_str)) shape = (3, 32, 46) - ipt_axes = ("c", "y", "x") + ipt_axes = ("channel", "y", "x") np_data = np.random.rand(*shape) ipt_data = xr.DataArray(np_data, dims=ipt_axes) From dbefc67700c2824c534693143dd959189e697a31 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Mar 2025 22:23:49 +0100 Subject: [PATCH 147/187] use get_validation_context --- bioimageio/core/_resource_tests.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 3311381b..dd5fc09e 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -46,10 +46,7 @@ MismatchedElementsPerMillion, RelativeTolerance, ) -from bioimageio.spec._internal.validation_context import ( - get_validation_context, - validation_context_var, -) +from bioimageio.spec._internal.validation_context import get_validation_context from bioimageio.spec.common import BioimageioYamlContent, PermissiveFileSource, Sha256 from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.model.v0_5 import WeightsFormat @@ -480,7 +477,7 @@ def load_description_and_test( rd = source elif isinstance(source, dict): # check context for a given root; default to root of source - context = validation_context_var.get( + context = get_validation_context( ValidationContext(root=root, file_name=file_name) ).replace( perform_io_checks=True # make sure we perform io checks though From 1f741d9247ca85b32f58cf657e75618a4032688a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Mar 2025 22:27:21 +0100 Subject: [PATCH 148/187] add underscore weight-format aliases --- bioimageio/core/cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 7378f138..d3cf1560 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -85,6 +85,8 @@ WEIGHT_FORMAT_ALIASES = AliasChoices( "weight-format", "weights-format", + "weight_format", + "weights_format", ) From d8eb5dcde747e3a8e404cc25404210aa2effbfd5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Mar 2025 22:27:51 +0100 Subject: [PATCH 149/187] add source default --- bioimageio/core/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index d3cf1560..37d45b99 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -121,8 +121,8 @@ def log(self, descr: Union[ResourceDescr, InvalidDescr]): class WithSource(ArgMixin): - source: CliPositionalArg[str] - """Url/path to a bioimageio.yaml/rdf.yaml file + source: CliPositionalArg[str] = "." + """Url/path to a (folder with a) bioimageio.yaml/rdf.yaml file or a bioimage.io resource identifier, e.g. 'affable-shark'""" @cached_property From 25ff83204bf885ce5d9b0433c981ad1feac0e7b6 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Mar 2025 22:28:24 +0100 Subject: [PATCH 150/187] add sections to console output for update commands --- bioimageio/core/cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 37d45b99..32dd2ff3 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -332,14 +332,15 @@ def run(self): if isinstance(self.diff, Path): _ = self.diff.write_text(diff, encoding="utf-8") elif self.diff: - diff_md = f"````````diff\n{diff}\n````````" - rich.console.Console().print(rich.markdown.Markdown(diff_md)) + console = rich.console.Console() + diff_md = f"## Diff\n\n````````diff\n{diff}\n````````" + console.print(rich.markdown.Markdown(diff_md)) if isinstance(self.output, Path): _ = self.output.write_text(updated_yaml, encoding="utf-8") logger.info(f"written updated description to {self.output}") elif self.output == "display": - updated_md = f"```yaml\n{updated_yaml}\n```" + updated_md = f"## Updated bioimageio.yaml\n\n```yaml\n{updated_yaml}\n```" rich.console.Console().print(rich.markdown.Markdown(updated_md)) elif self.output == "stdout": print(updated_yaml) From 339d21625754b648eb20ce19a26de01bc2508ffb Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Mar 2025 22:28:56 +0100 Subject: [PATCH 151/187] set default exclude_defaults=True for update-format --- bioimageio/core/cli.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 32dd2ff3..43a20282 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -355,6 +355,13 @@ def run(self): class UpdateFormatCmd(UpdateCmdBase): """Update the metadata format to the latest format version.""" + exclude_defaults: bool = Field(True, alias="exclude-defaults") + """Exclude fields that have the default value (even if set explicitly). + + Note: + The update process sets most unset fields explicitly with their default value. + """ + perform_io_checks: bool = Field( settings.perform_io_checks, alias="perform-io-checks" ) From ecd2aab574f7e159b873d7fda1cd6e9a07812582 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Mar 2025 22:29:34 +0100 Subject: [PATCH 152/187] WIP fix tests --- bioimageio/core/digest_spec.py | 4 ++-- tests/test_bioimageio_collection.py | 28 ++++++++++++++++++---------- tests/test_proc_ops.py | 29 ++++++++++++++++------------- 3 files changed, 36 insertions(+), 25 deletions(-) diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index 700cfbd6..fb0462f5 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -127,14 +127,14 @@ def _import_from_file_impl( ) if importlib_spec is None: - raise ImportError(f"Failed to import {source}.") + raise ImportError(f"Failed to import {source}") module = importlib.util.module_from_spec(importlib_spec) assert importlib_spec.loader is not None importlib_spec.loader.exec_module(module) except Exception as e: - raise ImportError(f"Failed to import {source} .") from e + raise ImportError(f"Failed to import {source}") from e else: sys.modules[module_name] = module # cache this module diff --git a/tests/test_bioimageio_collection.py b/tests/test_bioimageio_collection.py index 8ba9c3f2..ce652545 100644 --- a/tests/test_bioimageio_collection.py +++ b/tests/test_bioimageio_collection.py @@ -4,7 +4,7 @@ import requests from pydantic import HttpUrl -from bioimageio.spec import InvalidDescr, ValidationContext +from bioimageio.spec import InvalidDescr from bioimageio.spec.common import Sha256 from tests.utils import ParameterSet, expensive_test @@ -42,25 +42,34 @@ def yield_bioimageio_yaml_urls() -> Iterable[ParameterSet]: "affable-shark/1.1", # onnx weights expect fixed input shape "affectionate-cow/0.1.0", # custom dependencies "ambitious-sloth/1.2", # requires inferno + "committed-turkey/1.2", # error deserializing VarianceScaling + "creative-panda/1", # error deserializing Conv2D "dazzling-spider/0.1.0", # requires careamics - "dynamic-t-rex/1", # model.v0_4.ScaleLinearKwargs with axes + "discreete-rooster/1", # error deserializing VarianceScaling + "dynamic-t-rex/1", # needs update to 0.5 for scale_linear with axes processing + "easy-going-sauropod/1", # CPU implementation of Conv3D currently only supports the NHWC tensor format. "efficient-chipmunk/1", # needs plantseg "famous-fish/0.1.0", # list index out of range `fl[3]` "greedy-whale/1", # batch size is actually limited to 1 "happy-elephant/0.1.0", # list index out of range `fl[3]` + "happy-honeybee/0.1.0", # requires biapy + "heroic-otter/0.1.0", # requires biapy "humorous-crab/1", # batch size is actually limited to 1 "humorous-fox/0.1.0", # requires careamics "humorous-owl/1", # error deserializing GlorotUniform - "noisy-ox/1", # batch size is actually limited to 1 - "stupendous-sheep/1.2", - "wild-rhino/0.1.0", # requires careamics "idealistic-turtle/0.1.0", # requires biapy + "impartial-shark/1", # error deserializing VarianceScaling "intelligent-lion/0.1.0", # requires biapy + "joyful-deer/1", # needs update to 0.5 for scale_linear with axes processing "merry-water-buffalo/0.1.0", # requires biapy - "venomous-swan/0.1.0", # requires biapy - "heroic-otter/0.1.0", # requires biapy + "naked-microbe/1", # unknown layer Convolution2D + "noisy-ox/1", # batch size is actually limited to 1 + "non-judgemental-eagle/1", # error deserializing GlorotUniform + "straightforward-crocodile/1", # needs update to 0.5 for scale_linear with axes processing "stupendous-sheep/1.1", # requires relativ import of attachment - "commited-turkey/1.2", # error deserializng VarianceScaling + "stupendous-sheep/1.2", + "venomous-swan/0.1.0", # requires biapy + "wild-rhino/0.1.0", # requires careamics } @@ -76,8 +85,7 @@ def test_rdf( from bioimageio.core import load_description_and_test - with ValidationContext(): - descr = load_description_and_test(descr_url, sha256=sha, stop_early=True) + descr = load_description_and_test(descr_url, sha256=sha, stop_early=True) assert not isinstance(descr, InvalidDescr) assert ( diff --git a/tests/test_proc_ops.py b/tests/test_proc_ops.py index 0587d898..382083f7 100644 --- a/tests/test_proc_ops.py +++ b/tests/test_proc_ops.py @@ -21,15 +21,17 @@ def tid(): def test_scale_linear(tid: MemberId): from bioimageio.core.proc_ops import ScaleLinear - offset = xr.DataArray([1, 2, 42], dims=("c")) - gain = xr.DataArray([1, 2, 3], dims=("c")) - data = xr.DataArray(np.arange(6).reshape((1, 2, 3)), dims=("x", "y", "c")) + offset = xr.DataArray([1, 2, 42], dims=("channel",)) + gain = xr.DataArray([1, 2, 3], dims=("channel",)) + data = xr.DataArray(np.arange(6).reshape((1, 2, 3)), dims=("x", "y", "channel")) sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) op = ScaleLinear(input=tid, output=tid, offset=offset, gain=gain) op(sample) - expected = xr.DataArray(np.array([[[1, 4, 48], [4, 10, 57]]]), dims=("x", "y", "c")) + expected = xr.DataArray( + np.array([[[1, 4, 48], [4, 10, 57]]]), dims=("x", "y", "channel") + ) xr.testing.assert_allclose(expected, sample.members[tid].data, rtol=1e-5, atol=1e-7) @@ -84,10 +86,10 @@ def test_zero_mean_unit_variance_fixed(tid: MemberId): op = FixedZeroMeanUnitVariance( tid, tid, - mean=xr.DataArray([3, 4, 5], dims=("c")), - std=xr.DataArray([2.44948974, 2.44948974, 2.44948974], dims=("c")), + mean=xr.DataArray([3, 4, 5], dims=("channel",)), + std=xr.DataArray([2.44948974, 2.44948974, 2.44948974], dims=("channel",)), ) - data = xr.DataArray(np.arange(9).reshape((1, 3, 3)), dims=("b", "c", "x")) + data = xr.DataArray(np.arange(9).reshape((1, 3, 3)), dims=("b", "channel", "x")) expected = xr.DataArray( np.array( [ @@ -124,7 +126,7 @@ def test_zero_mean_unit_variance_fixed2(tid: MemberId): def test_zero_mean_unit_across_axes(tid: MemberId): from bioimageio.core.proc_ops import ZeroMeanUnitVariance - data = xr.DataArray(np.arange(18).reshape((2, 3, 3)), dims=("c", "x", "y")) + data = xr.DataArray(np.arange(18).reshape((2, 3, 3)), dims=("channel", "x", "y")) op = ZeroMeanUnitVariance( tid, @@ -136,7 +138,8 @@ def test_zero_mean_unit_across_axes(tid: MemberId): sample.stat = compute_measures(op.required_measures, [sample]) expected = xr.concat( - [(data[i : i + 1] - data[i].mean()) / data[i].std() for i in range(2)], dim="c" + [(data[i : i + 1] - data[i].mean()) / data[i].std() for i in range(2)], + dim="channel", ) op(sample) xr.testing.assert_allclose(expected, sample.members[tid].data, rtol=1e-5, atol=1e-7) @@ -146,7 +149,7 @@ def test_binarize(tid: MemberId): from bioimageio.core.proc_ops import Binarize op = Binarize(tid, tid, threshold=14) - data = xr.DataArray(np.arange(30).reshape((2, 3, 5)), dims=("x", "y", "c")) + data = xr.DataArray(np.arange(30).reshape((2, 3, 5)), dims=("x", "y", "channel")) sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) expected = xr.zeros_like(data) expected[{"x": slice(1, None)}] = 1 @@ -158,7 +161,7 @@ def test_binarize2(tid: MemberId): from bioimageio.core.proc_ops import Binarize shape = (3, 32, 32) - axes = ("c", "y", "x") + axes = ("channel", "y", "x") np_data = np.random.rand(*shape) data = xr.DataArray(np_data, dims=axes) @@ -188,7 +191,7 @@ def test_clip(tid: MemberId): def test_combination_of_op_steps_with_dims_specified(tid: MemberId): from bioimageio.core.proc_ops import ZeroMeanUnitVariance - data = xr.DataArray(np.arange(18).reshape((2, 3, 3)), dims=("c", "x", "y")) + data = xr.DataArray(np.arange(18).reshape((2, 3, 3)), dims=("channel", "x", "y")) sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) op = ZeroMeanUnitVariance( tid, @@ -239,7 +242,7 @@ def test_scale_mean_variance(tid: MemberId, axes: Optional[Tuple[AxisId, ...]]): from bioimageio.core.proc_ops import ScaleMeanVariance shape = (3, 32, 46) - ipt_axes = ("c", "y", "x") + ipt_axes = ("channel", "y", "x") np_data = np.random.rand(*shape) ipt_data = xr.DataArray(np_data, dims=ipt_axes) ref_data = xr.DataArray((np_data * 2) + 3, dims=ipt_axes) From 3b23c7c6e4d0c6c85c967c47fe0b44ae0312786e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 26 Mar 2025 22:35:14 +0100 Subject: [PATCH 153/187] bump spec version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 187cb3eb..af0dbb64 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ ], packages=find_namespace_packages(exclude=["tests"]), install_requires=[ - "bioimageio.spec ==0.5.4.0", + "bioimageio.spec ==0.5.4.1", "h5py", "imagecodecs", "imageio>=2.10", From 300af3722e8574d66459a7d6d3749453186093e7 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 08:01:03 +0100 Subject: [PATCH 154/187] fix tests --- tests/test_bioimageio_collection.py | 1 + tests/test_proc_ops.py | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/test_bioimageio_collection.py b/tests/test_bioimageio_collection.py index ce652545..f2f89ca0 100644 --- a/tests/test_bioimageio_collection.py +++ b/tests/test_bioimageio_collection.py @@ -45,6 +45,7 @@ def yield_bioimageio_yaml_urls() -> Iterable[ParameterSet]: "committed-turkey/1.2", # error deserializing VarianceScaling "creative-panda/1", # error deserializing Conv2D "dazzling-spider/0.1.0", # requires careamics + "discreet-rooster/1", # error deserializing VarianceScaling "discreete-rooster/1", # error deserializing VarianceScaling "dynamic-t-rex/1", # needs update to 0.5 for scale_linear with axes processing "easy-going-sauropod/1", # CPU implementation of Conv3D currently only supports the NHWC tensor format. diff --git a/tests/test_proc_ops.py b/tests/test_proc_ops.py index 382083f7..2a68309e 100644 --- a/tests/test_proc_ops.py +++ b/tests/test_proc_ops.py @@ -100,7 +100,7 @@ def test_zero_mean_unit_variance_fixed(tid: MemberId): ] ] ), - dims=("b", "c", "x"), + dims=("b", "channel", "x"), ) sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) op(sample) @@ -222,7 +222,7 @@ def test_combination_of_op_steps_with_dims_specified(tid: MemberId): ], ] ), - dims=("c", "x", "y"), + dims=("channel", "x", "y"), ) op(sample) @@ -337,7 +337,7 @@ def test_scale_range_axes(tid: MemberId): op = ScaleRange(tid, tid, lower_quantile, upper_quantile, eps=eps) np_data = np.arange(18).reshape((2, 3, 3)).astype("float32") - data = Tensor.from_xarray(xr.DataArray(np_data, dims=("c", "x", "y"))) + data = Tensor.from_xarray(xr.DataArray(np_data, dims=("channel", "x", "y"))) sample = Sample(members={tid: data}, stat={}, id=None) p_low_direct = lower_quantile.compute(sample) @@ -355,7 +355,7 @@ def test_scale_range_axes(tid: MemberId): np.testing.assert_allclose(p_up_expected.squeeze(), sample.stat[upper_quantile]) exp_data = (np_data - p_low_expected) / (p_up_expected - p_low_expected + eps) - expected = xr.DataArray(exp_data, dims=("c", "x", "y")) + expected = xr.DataArray(exp_data, dims=("channel", "x", "y")) op(sample) # NOTE xarray.testing.assert_allclose compares irrelavant properties here and fails although the result is correct @@ -366,7 +366,7 @@ def test_sigmoid(tid: MemberId): from bioimageio.core.proc_ops import Sigmoid shape = (3, 32, 32) - axes = ("c", "y", "x") + axes = ("channel", "y", "x") np_data = np.random.rand(*shape) data = xr.DataArray(np_data, dims=axes) sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) From 7c5b52828bf05d374a8cce2aa5e2f77048202b13 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 08:10:44 +0100 Subject: [PATCH 155/187] update dev envs --- dev/env-gpu.yaml | 51 ++++++++++++++++++++++++++++++++++++++++++ dev/env-py38.yaml | 2 +- dev/env-tf.yaml | 2 +- dev/env-wo-python.yaml | 2 +- dev/env.yaml | 2 +- 5 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 dev/env-gpu.yaml diff --git a/dev/env-gpu.yaml b/dev/env-gpu.yaml new file mode 100644 index 00000000..c2ac2903 --- /dev/null +++ b/dev/env-gpu.yaml @@ -0,0 +1,51 @@ +name: core-gpu +channels: + - conda-forge + - nodefaults +dependencies: + - bioimageio.spec==0.5.4.1 + - black + - cellpose # for model testing + # - crick # currently requires python<=3.9 + - h5py + - imagecodecs + - imageio>=2.5 + - jupyter + - jupyter-black + - keras>=3.0,<4 + - loguru + - matplotlib + - monai # for model testing + - numpy + - onnx + - packaging>=17.0 + - pdoc + - pip + - pre-commit + - psutil + - pydantic<2.9 + - pydantic-settings + - pyright + - pytest + - pytest-cov + - python=3.11 + - requests + - rich + - ruff + - ruyaml + - segment-anything # for model testing + - timm # for model testing + - tqdm + - typing-extensions + - xarray + - pip: + # - tf2onnx # TODO: add tf2onnx + - --extra-index-url https://download.pytorch.org/whl/cu126 + - careamics # TODO: add careamics for model testing (currently pins pydantic to <2.9) + - git+https://github.com/ChaoningZhang/MobileSAM.git # for model testing + - onnxruntime-gpu + - tensorflow + - torch + - torchaudio + - torchvision>=0.21 + - -e .. diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 5ec6199c..8bddaa28 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -5,7 +5,7 @@ channels: - nodefaults - pytorch dependencies: - - bioimageio.spec>=0.5.4.0 + - bioimageio.spec==0.5.4.1 - black - crick # uncommented - h5py diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index 595665e8..783b0aaa 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -5,7 +5,7 @@ channels: - nodefaults # - pytroch # removed dependencies: - - bioimageio.spec>=0.5.4.0 + - bioimageio.spec==0.5.4.1 - black # - crick # currently requires python<=3.9 - h5py diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index 69013085..9790c4a0 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -5,7 +5,7 @@ channels: - nodefaults - pytorch dependencies: - - bioimageio.spec>=0.5.4.0 + - bioimageio.spec==0.5.4.1 - black # - crick # currently requires python<=3.9 - h5py diff --git a/dev/env.yaml b/dev/env.yaml index 7aaa1fed..cf7a4015 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -4,7 +4,7 @@ channels: - nodefaults - pytorch dependencies: - - bioimageio.spec>=0.5.4.0 + - bioimageio.spec==0.5.4.1 - black # - careamics # TODO: add careamics for model testing (currently pins pydantic to <2.9) - cellpose # for model testing From 5d68384930573581a2c68bf4eada15c306441646 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 14:47:28 +0100 Subject: [PATCH 156/187] update build workflow --- .github/workflows/build.yaml | 283 +++++++----------- bioimageio/core/stat_calculators.py | 2 +- .../core/weight_converters/pytorch_to_onnx.py | 2 +- dev/{env.yaml => env-full.yaml} | 2 +- dev/env-py38.yaml | 4 +- 5 files changed, 121 insertions(+), 172 deletions(-) rename dev/{env.yaml => env-full.yaml} (93%) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index aac82160..cef25743 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -8,7 +8,7 @@ on: defaults: run: - shell: micromamba-shell {0} + shell: bash -el {0} jobs: black: @@ -26,42 +26,60 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] include: - - python-version: '3.12' + - python-version: '3.8' + conda-env: py38 + - python-version: '3.9' + conda-env: wo-python + - python-version: '3.9' + conda-env: tf + - python-version: '3.10' + conda-env: wo-python + - python-version: '3.11' + conda-env: full run-expensive-tests: true + - python-version: '3.12' + conda-env: wo-python + - python-version: '3.13' + conda-env: wo-python steps: + - id: setup + run: | + echo "env-name=conda-${{ matrix.conda-env }}-${{ matrix.python-version }}" + echo "env-name=conda-${{ matrix.conda-env }}-${{ matrix.python-version }}" >> $GITHUB_OUTPUT + echo "env-file=dev/env-${{ matrix.conda-env }}.yaml" + echo "env-file=dev/env-${{ matrix.conda-env }}.yaml" >> $GITHUB_OUTPUT + [ ! -f dev/env-${{ matrix.conda-env }}.yaml ] && exit 1 - uses: actions/checkout@v4 - - name: Install Conda environment with Micromamba - if: matrix.python-version != '3.8' - uses: mamba-org/setup-micromamba@v1 + - uses: conda-incubator/setup-miniconda@v3 with: - cache-downloads: true - cache-environment: true - environment-file: dev/env-wo-python.yaml - create-args: >- - python=${{ matrix.python-version }} - post-cleanup: 'all' - env: - PIP_NO_DEPS: true - - name: Install py3.8 environment - if: matrix.python-version == '3.8' - uses: mamba-org/setup-micromamba@v1 - with: - cache-downloads: true - cache-environment: true - environment-file: dev/env-py38.yaml - post-cleanup: 'all' - env: - PIP_NO_DEPS: true - - name: additional setup - run: pip install --no-deps -e . + auto-update-conda: true + auto-activate-base: true + activate-environment: ${{steps.setup.outputs.env-name}} + channel-priority: strict + miniforge-version: latest - name: Get Date id: get-date run: | echo "date=$(date +'%Y-%b')" echo "date=$(date +'%Y-%b')" >> $GITHUB_OUTPUT - shell: bash + echo "today=$(date -u '+%Y%m%d')" + echo "today=$(date -u '+%Y%m%d')" >> $GITHUB_OUTPUT + - name: Cache env + uses: actions/cache@v4 + with: + path: ${{ env.CONDA }}/envs/${{steps.setup.outputs.env-name}} + key: >- + conda-${{ runner.os }}-${{ runner.arch }}- + -${{steps.get-date.outputs.today }}- + -${{ hashFiles(matrix.conda-env) }}- + -${{env.CACHE_NUMBER }} + env: + CACHE_NUMBER: 0 + id: cache + - name: Update env + run: conda env update --name=${{steps.setup.outputs.env-name}} --file=${{steps.setup.output.env-file}} python=${{matrix.python-version}} + if: steps.cache.outputs.cache-hit != 'true' - uses: actions/cache@v4 with: path: bioimageio_cache @@ -77,148 +95,82 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.12'] include: - - python-version: '3.12' + - python-version: '3.8' + conda-env: py3.8 + - python-version: '3.9' + conda-env: tf + - python-version: '3.11' + conda-env: full report-coverage: true run-expensive-tests: true - steps: - - uses: actions/checkout@v4 - - name: Install Conda environment with Micromamba - if: matrix.python-version != '3.8' - uses: mamba-org/setup-micromamba@v1 - with: - cache-downloads: true - cache-environment: true - environment-file: dev/env-wo-python.yaml - create-args: >- - python=${{ matrix.python-version }} - post-cleanup: 'all' - env: - PIP_NO_DEPS: true - - name: Install py3.8 environment - if: matrix.python-version == '3.8' - uses: mamba-org/setup-micromamba@v1 - with: - cache-downloads: true - cache-environment: true - environment-file: dev/env-py38.yaml - post-cleanup: 'all' - env: - PIP_NO_DEPS: true - - name: additional setup spec - run: | - conda remove --yes --force bioimageio.spec || true # allow failure for cached env - pip install --no-deps git+https://github.com/bioimage-io/spec-bioimage-io - - name: additional setup core - run: pip install --no-deps -e . - - name: Get Date - id: get-date - run: | - echo "date=$(date +'%Y-%b')" - echo "date=$(date +'%Y-%b')" >> $GITHUB_OUTPUT - shell: bash - - uses: actions/cache@v4 - with: - path: bioimageio_cache - key: "test-spec-main-${{ steps.get-date.outputs.date }}" - - name: pytest-spec-main - run: pytest --disable-pytest-warnings - env: - BIOIMAGEIO_CACHE_PATH: bioimageio_cache - RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} - - if: matrix.report-coverage && github.event_name == 'pull_request' - uses: orgoro/coverage@v3.2 - with: - coverageFile: coverage.xml - token: ${{ secrets.GITHUB_TOKEN }} - - if: matrix.report-coverage && github.ref == 'refs/heads/main' - run: | - pip install genbadge[coverage] - genbadge coverage --input-file coverage.xml --output-file ./dist/coverage/coverage-badge.svg - coverage html -d dist/coverage - - if: matrix.report-coverage && github.ref == 'refs/heads/main' - uses: actions/upload-artifact@v4 - with: - name: coverage - retention-days: 1 - path: dist - - - test-spec-main-tf: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ['3.9', '3.12'] - steps: - - uses: actions/checkout@v4 - - uses: mamba-org/setup-micromamba@v1 - with: - cache-downloads: true - cache-environment: true - environment-file: dev/env-tf.yaml - condarc: | - channel-priority: flexible - create-args: >- - python=${{ matrix.python-version }} - post-cleanup: 'all' - env: - PIP_NO_DEPS: true - - name: additional setup spec - run: | - conda remove --yes --force bioimageio.spec || true # allow failure for cached env - pip install --no-deps git+https://github.com/bioimage-io/spec-bioimage-io - - name: additional setup core - run: pip install --no-deps -e . - - name: Get Date - id: get-date - run: | - echo "date=$(date +'%Y-%b')" - echo "date=$(date +'%Y-%b')" >> $GITHUB_OUTPUT - shell: bash - - uses: actions/cache@v4 - with: - path: bioimageio_cache - key: "test-spec-main-tf-${{ steps.get-date.outputs.date }}" - - run: pytest --disable-pytest-warnings - env: - BIOIMAGEIO_CACHE_PATH: bioimageio_cache + - python-version: '3.12' + conda-env: tf - test-spec-conda-tf: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ['3.9', '3.12'] steps: - - uses: actions/checkout@v4 - - uses: mamba-org/setup-micromamba@v1 - with: - cache-downloads: true - cache-environment: true - environment-file: dev/env-tf.yaml - condarc: | - channel-priority: flexible - create-args: >- - python=${{ matrix.python-version }} - post-cleanup: 'all' - env: - PIP_NO_DEPS: true - - name: additional setup - run: pip install --no-deps -e . - - name: Get Date - id: get-date - run: | - echo "date=$(date +'%Y-%b')" - echo "date=$(date +'%Y-%b')" >> $GITHUB_OUTPUT - shell: bash - - uses: actions/cache@v4 - with: - path: bioimageio_cache - key: "test-spec-conda-tf-${{ steps.get-date.outputs.date }}" - - name: pytest-spec-tf - run: pytest --disable-pytest-warnings - env: - BIOIMAGEIO_CACHE_PATH: bioimageio_cache + - id: setup + run: | + echo "env-name=main-${{ matrix.conda-env }}-${{ matrix.python-version }}" + echo "env-name=main-${{ matrix.conda-env }}-${{ matrix.python-version }}" >> $GITHUB_OUTPUT + - uses: actions/checkout@v4 + - uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + auto-activate-base: true + activate-environment: ${{steps.setup.outputs.env-name}} + channel-priority: strict + miniforge-version: latest + - name: Get Date + id: get-date + run: | + echo "date=$(date +'%Y-week%U')" + echo "date=$(date +'%Y-week%U')" >> $GITHUB_OUTPUT + echo "today=$(date -u '+%Y%m%d')" + echo "today=$(date -u '+%Y%m%d')" >> $GITHUB_OUTPUT + - name: Cache env + uses: actions/cache@v4 + with: + path: ${{ env.CONDA }}/envs/${{steps.setup.outputs.env-name}} + key: >- + conda-${{ runner.os }}-${{ runner.arch }} + -${{steps.get-date.outputs.today }} + -${{ hashFiles(matrix.conda-env) }} + -${{env.CACHE_NUMBER }} + env: + CACHE_NUMBER: 0 + id: cache + - name: Update env + run: | + conda env update --name=${{steps.setup.outputs.env-name}} --file=dev/env-${{matrix.conda-env}}.yaml python=${{matrix.python-version}} + conda remove --yes --force bioimageio.spec + if: steps.cache.outputs.cache-hit != 'true' + - name: Install spec from main branch + run: pip install --no-deps git+https://github.com/bioimage-io/spec-bioimage-io + - uses: actions/cache@v4 + with: + path: bioimageio_cache + key: "test-spec-main-${{ steps.get-date.outputs.date }}" + - name: pytest-spec-main + run: pytest --disable-pytest-warnings + env: + BIOIMAGEIO_CACHE_PATH: bioimageio_cache + RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} + - if: matrix.report-coverage && github.event_name == 'pull_request' + uses: orgoro/coverage@v3.2 + with: + coverageFile: coverage.xml + token: ${{ secrets.GITHUB_TOKEN }} + - if: matrix.report-coverage && github.ref == 'refs/heads/main' + run: | + pip install genbadge[coverage] + genbadge coverage --input-file coverage.xml --output-file ./dist/coverage/coverage-badge.svg + coverage html -d dist/coverage + - if: matrix.report-coverage && github.ref == 'refs/heads/main' + uses: actions/upload-artifact@v4 + with: + name: coverage + retention-days: 1 + path: dist conda-build: needs: test-spec-conda @@ -253,9 +205,6 @@ jobs: needs: [test-spec-main] if: github.ref == 'refs/heads/main' runs-on: ubuntu-latest - defaults: - run: - shell: bash -l {0} steps: - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 diff --git a/bioimageio/core/stat_calculators.py b/bioimageio/core/stat_calculators.py index efff5b63..95d3f729 100644 --- a/bioimageio/core/stat_calculators.py +++ b/bioimageio/core/stat_calculators.py @@ -1,6 +1,6 @@ from __future__ import annotations -import collections.abc +import collections import warnings from itertools import product from typing import ( diff --git a/bioimageio/core/weight_converters/pytorch_to_onnx.py b/bioimageio/core/weight_converters/pytorch_to_onnx.py index cc3ed75b..72d819b1 100644 --- a/bioimageio/core/weight_converters/pytorch_to_onnx.py +++ b/bioimageio/core/weight_converters/pytorch_to_onnx.py @@ -1,6 +1,6 @@ from pathlib import Path -import torch.jit +import torch from bioimageio.spec.model.v0_5 import ModelDescr, OnnxWeightsDescr diff --git a/dev/env.yaml b/dev/env-full.yaml similarity index 93% rename from dev/env.yaml rename to dev/env-full.yaml index cf7a4015..bd045c5d 100644 --- a/dev/env.yaml +++ b/dev/env-full.yaml @@ -1,4 +1,4 @@ -name: full +name: core-full channels: - conda-forge - nodefaults diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 8bddaa28..30c9dd6b 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -1,5 +1,5 @@ -# manipulated copy of env.yaml wo dependencies 'for model testing' -name: core38 +# manipulated copy of env-full.yaml wo dependencies 'for model testing' +name: core-py38 channels: - conda-forge - nodefaults From 0464851a83e0f11efbc0ef03f01c744a7e171267 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 15:21:01 +0100 Subject: [PATCH 157/187] merge conda and main test jobs --- .github/workflows/build.yaml | 133 +++++++++++------------------------ 1 file changed, 42 insertions(+), 91 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index cef25743..a6dab51a 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -22,35 +22,49 @@ jobs: jupyter: true version: "24.3" - test-spec-conda: + test: runs-on: ubuntu-latest strategy: matrix: include: - python-version: '3.8' conda-env: py38 + spec: conda + - python-version: '3.8' + conda-env: py38 + spec: main - python-version: '3.9' conda-env: wo-python + spec: conda - python-version: '3.9' conda-env: tf + spec: main - python-version: '3.10' conda-env: wo-python + spec: conda - python-version: '3.11' conda-env: full + spec: main run-expensive-tests: true + report-coverage: true - python-version: '3.12' conda-env: wo-python + spec: conda + - python-version: '3.12' + conda-env: tf + spec: conda - python-version: '3.13' conda-env: wo-python + spec: main steps: + - uses: actions/checkout@v4 - id: setup run: | - echo "env-name=conda-${{ matrix.conda-env }}-${{ matrix.python-version }}" - echo "env-name=conda-${{ matrix.conda-env }}-${{ matrix.python-version }}" >> $GITHUB_OUTPUT + echo "env-name=${{ matrix.spec }}-${{ matrix.conda-env }}-${{ matrix.python-version }}" + echo "env-name=${{ matrix.spec }}-${{ matrix.conda-env }}-${{ matrix.python-version }}" >> $GITHUB_OUTPUT echo "env-file=dev/env-${{ matrix.conda-env }}.yaml" echo "env-file=dev/env-${{ matrix.conda-env }}.yaml" >> $GITHUB_OUTPUT [ ! -f dev/env-${{ matrix.conda-env }}.yaml ] && exit 1 - - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true @@ -78,102 +92,39 @@ jobs: CACHE_NUMBER: 0 id: cache - name: Update env - run: conda env update --name=${{steps.setup.outputs.env-name}} --file=${{steps.setup.output.env-file}} python=${{matrix.python-version}} + run: conda env update --name=${{steps.setup.outputs.env-name}} --file=${{steps.setup.outputs.env-file}} python=${{matrix.python-version}} if: steps.cache.outputs.cache-hit != 'true' + - run: | + pyright --version + pyright - uses: actions/cache@v4 with: path: bioimageio_cache - key: "test-spec-conda-${{ steps.get-date.outputs.date }}" - - name: pytest-spec-conda + key: "test-${{matrix.spec}}-${{ steps.get-date.outputs.date }}" + - name: pytest run: pytest --disable-pytest-warnings env: BIOIMAGEIO_CACHE_PATH: bioimageio_cache RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} - - - test-spec-main: - runs-on: ubuntu-latest - strategy: - matrix: - include: - - python-version: '3.8' - conda-env: py3.8 - - python-version: '3.9' - conda-env: tf - - python-version: '3.11' - conda-env: full - report-coverage: true - run-expensive-tests: true - - python-version: '3.12' - conda-env: tf - - steps: - - id: setup - run: | - echo "env-name=main-${{ matrix.conda-env }}-${{ matrix.python-version }}" - echo "env-name=main-${{ matrix.conda-env }}-${{ matrix.python-version }}" >> $GITHUB_OUTPUT - - uses: actions/checkout@v4 - - uses: conda-incubator/setup-miniconda@v3 - with: - auto-update-conda: true - auto-activate-base: true - activate-environment: ${{steps.setup.outputs.env-name}} - channel-priority: strict - miniforge-version: latest - - name: Get Date - id: get-date - run: | - echo "date=$(date +'%Y-week%U')" - echo "date=$(date +'%Y-week%U')" >> $GITHUB_OUTPUT - echo "today=$(date -u '+%Y%m%d')" - echo "today=$(date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - - name: Cache env - uses: actions/cache@v4 - with: - path: ${{ env.CONDA }}/envs/${{steps.setup.outputs.env-name}} - key: >- - conda-${{ runner.os }}-${{ runner.arch }} - -${{steps.get-date.outputs.today }} - -${{ hashFiles(matrix.conda-env) }} - -${{env.CACHE_NUMBER }} - env: - CACHE_NUMBER: 0 - id: cache - - name: Update env - run: | - conda env update --name=${{steps.setup.outputs.env-name}} --file=dev/env-${{matrix.conda-env}}.yaml python=${{matrix.python-version}} - conda remove --yes --force bioimageio.spec - if: steps.cache.outputs.cache-hit != 'true' - - name: Install spec from main branch - run: pip install --no-deps git+https://github.com/bioimage-io/spec-bioimage-io - - uses: actions/cache@v4 - with: - path: bioimageio_cache - key: "test-spec-main-${{ steps.get-date.outputs.date }}" - - name: pytest-spec-main - run: pytest --disable-pytest-warnings - env: - BIOIMAGEIO_CACHE_PATH: bioimageio_cache - RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} - - if: matrix.report-coverage && github.event_name == 'pull_request' - uses: orgoro/coverage@v3.2 - with: - coverageFile: coverage.xml - token: ${{ secrets.GITHUB_TOKEN }} - - if: matrix.report-coverage && github.ref == 'refs/heads/main' - run: | - pip install genbadge[coverage] - genbadge coverage --input-file coverage.xml --output-file ./dist/coverage/coverage-badge.svg - coverage html -d dist/coverage - - if: matrix.report-coverage && github.ref == 'refs/heads/main' - uses: actions/upload-artifact@v4 - with: - name: coverage - retention-days: 1 - path: dist + - if: matrix.report-coverage && github.event_name == 'pull_request' + uses: orgoro/coverage@v3.2 + with: + coverageFile: coverage.xml + token: ${{ secrets.GITHUB_TOKEN }} + - if: matrix.report-coverage && github.ref == 'refs/heads/main' + run: | + pip install genbadge[coverage] + genbadge coverage --input-file coverage.xml --output-file ./dist/coverage/coverage-badge.svg + coverage html -d dist/coverage + - if: matrix.report-coverage && github.ref == 'refs/heads/main' + uses: actions/upload-artifact@v4 + with: + name: coverage + retention-days: 1 + path: dist conda-build: - needs: test-spec-conda + needs: test runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -202,7 +153,7 @@ jobs: conda-build -c conda-forge conda-recipe --no-test --output-folder ./pkgs docs: - needs: [test-spec-main] + needs: test if: github.ref == 'refs/heads/main' runs-on: ubuntu-latest steps: From 621b2dba963befa578d032a5dcfe0c524eb04946 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 15:22:44 +0100 Subject: [PATCH 158/187] merge conda and main test jobs --- .github/workflows/build.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index a6dab51a..d39d6e56 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -97,6 +97,7 @@ jobs: - run: | pyright --version pyright + if: matrix.run-expensive-tests - uses: actions/cache@v4 with: path: bioimageio_cache From 6411e6faa73ad2e6580d2be02c62f4b90970f476 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 15:28:46 +0100 Subject: [PATCH 159/187] remove tf env (tf is in env-full) --- .github/workflows/build.yaml | 14 +++----- dev/{env-wo-python.yaml => env-dev.yaml} | 2 +- dev/env-gpu.yaml | 1 + dev/env-py38.yaml | 2 +- dev/env-tf.yaml | 44 ------------------------ 5 files changed, 7 insertions(+), 56 deletions(-) rename dev/{env-wo-python.yaml => env-dev.yaml} (85%) delete mode 100644 dev/env-tf.yaml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index d39d6e56..fd00f7b4 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -34,13 +34,10 @@ jobs: conda-env: py38 spec: main - python-version: '3.9' - conda-env: wo-python + conda-env: dev spec: conda - - python-version: '3.9' - conda-env: tf - spec: main - python-version: '3.10' - conda-env: wo-python + conda-env: dev spec: conda - python-version: '3.11' conda-env: full @@ -48,13 +45,10 @@ jobs: run-expensive-tests: true report-coverage: true - python-version: '3.12' - conda-env: wo-python - spec: conda - - python-version: '3.12' - conda-env: tf + conda-env: dev spec: conda - python-version: '3.13' - conda-env: wo-python + conda-env: dev spec: main steps: - uses: actions/checkout@v4 diff --git a/dev/env-wo-python.yaml b/dev/env-dev.yaml similarity index 85% rename from dev/env-wo-python.yaml rename to dev/env-dev.yaml index 9790c4a0..08f3e3cc 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-dev.yaml @@ -1,4 +1,4 @@ -# modified copy of env.yaml wo dependencies 'for model testing' +# modified copy of env-full.yaml wo dependencies 'for model testing' name: core channels: - conda-forge diff --git a/dev/env-gpu.yaml b/dev/env-gpu.yaml index c2ac2903..ebc4b737 100644 --- a/dev/env-gpu.yaml +++ b/dev/env-gpu.yaml @@ -1,3 +1,4 @@ +# version of enf-full for running on GPU name: core-gpu channels: - conda-forge diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 30c9dd6b..0f8ebf7f 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -1,4 +1,4 @@ -# manipulated copy of env-full.yaml wo dependencies 'for model testing' +# manipulated copy of env-full.yaml wo dependencies 'for model testing' for python 3.8 name: core-py38 channels: - conda-forge diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml deleted file mode 100644 index 783b0aaa..00000000 --- a/dev/env-tf.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# modified copy of env.yaml wo dependencies 'for model testing' -name: core-tf # changed -channels: - - conda-forge - - nodefaults - # - pytroch # removed -dependencies: - - bioimageio.spec==0.5.4.1 - - black - # - crick # currently requires python<=3.9 - - h5py - - imagecodecs - - imageio>=2.5 - - jupyter - - jupyter-black - - keras>=2.15 # changed - - loguru - - matplotlib - - numpy - - onnx - - onnxruntime - - packaging>=17.0 - - pdoc - - pip - - pre-commit - - psutil - - pydantic - - pydantic-settings - - pyright - - pytest - - pytest-cov - # - python=3.9 # removed - # - pytorch>=2.1,<3 # removed - - requests - - rich - # - ruff # removed - - ruyaml - - tensorflow>=2.15 # added - # - torchvision # removed - - tqdm - - typing-extensions - - xarray - - pip: - - -e .. From e48fbda602c812eb1efddd3ede7795694fec326a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 15:36:31 +0100 Subject: [PATCH 160/187] search for env-file --- .github/workflows/build.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index fd00f7b4..bc3b6e67 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -58,7 +58,17 @@ jobs: echo "env-name=${{ matrix.spec }}-${{ matrix.conda-env }}-${{ matrix.python-version }}" >> $GITHUB_OUTPUT echo "env-file=dev/env-${{ matrix.conda-env }}.yaml" echo "env-file=dev/env-${{ matrix.conda-env }}.yaml" >> $GITHUB_OUTPUT - [ ! -f dev/env-${{ matrix.conda-env }}.yaml ] && exit 1 + - shell: python + run: | + from pathlib import Path + from pprint import pprint + if not (env_path:=Path("${{steps.setup.outputs.env-file}}")).exists(): + if env_path.parent.exists(): + pprint(env_path.parent.glob("*")) + else: + pprint(Path().glob("*")) + raise FileNotFoundError(f"{env_path} does not exist") + - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true From 58dcc92cd62991411d1749917efa31591f298bd5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 15:42:53 +0100 Subject: [PATCH 161/187] fix env-py38.yaml --- .github/workflows/build.yaml | 3 ++- dev/env-dev.yaml | 2 +- dev/env-py38.yaml | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index bc3b6e67..77d3c4b0 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -58,7 +58,8 @@ jobs: echo "env-name=${{ matrix.spec }}-${{ matrix.conda-env }}-${{ matrix.python-version }}" >> $GITHUB_OUTPUT echo "env-file=dev/env-${{ matrix.conda-env }}.yaml" echo "env-file=dev/env-${{ matrix.conda-env }}.yaml" >> $GITHUB_OUTPUT - - shell: python + - name: check on env-file + shell: python run: | from pathlib import Path from pprint import pprint diff --git a/dev/env-dev.yaml b/dev/env-dev.yaml index 08f3e3cc..7bccb459 100644 --- a/dev/env-dev.yaml +++ b/dev/env-dev.yaml @@ -29,7 +29,7 @@ dependencies: - pyright - pytest - pytest-cov - # - python=3.9 # removed + # - python=3.11 # removed - pytorch>=2.1,<3 - requests - rich diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 0f8ebf7f..e095bd4d 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -13,7 +13,7 @@ dependencies: - imageio>=2.5 - jupyter - jupyter-black - - # keras>=3.0,<4 # removed + # - keras>=3.0,<4 # removed - loguru - matplotlib - numpy From f050601e8e0afbe89cacc28c2d9d1c6253b4f700 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 16:08:10 +0100 Subject: [PATCH 162/187] add tests.__init__.py for tests.utils --- tests/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b From e8286e2cbc0fb81a2bcd3bb84a2f10aa44fa0d9d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 16:23:02 +0100 Subject: [PATCH 163/187] only save bioimageio_cache once --- .github/workflows/build.yaml | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 77d3c4b0..2da955ba 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -44,6 +44,7 @@ jobs: spec: main run-expensive-tests: true report-coverage: true + save-cache: true - python-version: '3.12' conda-env: dev spec: conda @@ -87,12 +88,12 @@ jobs: - name: Cache env uses: actions/cache@v4 with: - path: ${{ env.CONDA }}/envs/${{steps.setup.outputs.env-name}} + path: ${{env.CONDA}}/envs/${{steps.setup.outputs.env-name}} key: >- - conda-${{ runner.os }}-${{ runner.arch }}- - -${{steps.get-date.outputs.today }}- - -${{ hashFiles(matrix.conda-env) }}- - -${{env.CACHE_NUMBER }} + conda-${{runner.os}}-${{runner.arch}}- + -${{steps.get-date.outputs.today}}- + -${{hashFiles(matrix.conda-env)}}- + -${{env.CACHE_NUMBER}} env: CACHE_NUMBER: 0 id: cache @@ -104,9 +105,16 @@ jobs: pyright if: matrix.run-expensive-tests - uses: actions/cache@v4 + if: matrix.save-cache with: path: bioimageio_cache - key: "test-${{matrix.spec}}-${{ steps.get-date.outputs.date }}" + key: "test-${{matrix.spec}}-${{steps.get-date.outputs.date}}" + - uses: actions/cache/restore@v4 + if: ${{!matrix.save-cache}} + with: + path: bioimageio_cache + key: "test-${{matrix.spec}}-${{steps.get-date.outputs.date}}" + fail-on-cache-miss: true - name: pytest run: pytest --disable-pytest-warnings env: @@ -116,7 +124,7 @@ jobs: uses: orgoro/coverage@v3.2 with: coverageFile: coverage.xml - token: ${{ secrets.GITHUB_TOKEN }} + token: ${{secrets.GITHUB_TOKEN}} - if: matrix.report-coverage && github.ref == 'refs/heads/main' run: | pip install genbadge[coverage] From f68edf5339cadb38afb717920289f22391ec6abb Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 18:11:06 +0100 Subject: [PATCH 164/187] puplulate cache before testing --- .github/workflows/build.yaml | 45 +++++++++++++++++++++-------- tests/test_bioimageio_collection.py | 19 ++++++++++++ 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 2da955ba..8110ab90 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -22,7 +22,36 @@ jobs: jupyter: true version: "24.3" + populate-cache: + runs-on: ubuntu-latest + outputs: + cache-key: ${{steps.cache-key.outputs.cache-key}} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' + - name: Get Date + id: get-date + run: | + echo "date=$(date +'%Y-%b')" + echo "date=$(date +'%Y-%b')" >> $GITHUB_OUTPUT + - id: cache-key + run: echo "cache-key=test-${{steps.get-date.outputs.date}}" >> $GITHUB_OUTPUT + - uses: actions/cache@v4 + with: + path: bioimageio_cache + key: steps.get-cache-key.outputs.cache-key + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e .[dev] + - run: pytest --disable-pytest-warnings tests/test_bioimageio_collection.py::test_rdf_format_to_populate_cache + env: + BIOIMAGEIO_POPULATE_CACHE: '1' test: + needs: populate-cache runs-on: ubuntu-latest strategy: matrix: @@ -44,7 +73,6 @@ jobs: spec: main run-expensive-tests: true report-coverage: true - save-cache: true - python-version: '3.12' conda-env: dev spec: conda @@ -81,8 +109,6 @@ jobs: - name: Get Date id: get-date run: | - echo "date=$(date +'%Y-%b')" - echo "date=$(date +'%Y-%b')" >> $GITHUB_OUTPUT echo "today=$(date -u '+%Y%m%d')" echo "today=$(date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache env @@ -96,24 +122,19 @@ jobs: -${{env.CACHE_NUMBER}} env: CACHE_NUMBER: 0 - id: cache + id: cache-env - name: Update env run: conda env update --name=${{steps.setup.outputs.env-name}} --file=${{steps.setup.outputs.env-file}} python=${{matrix.python-version}} - if: steps.cache.outputs.cache-hit != 'true' + if: steps.cache-env.outputs.cache-hit != 'true' - run: | pyright --version pyright if: matrix.run-expensive-tests - - uses: actions/cache@v4 - if: matrix.save-cache - with: - path: bioimageio_cache - key: "test-${{matrix.spec}}-${{steps.get-date.outputs.date}}" - uses: actions/cache/restore@v4 - if: ${{!matrix.save-cache}} + id: bioimageio-cache with: path: bioimageio_cache - key: "test-${{matrix.spec}}-${{steps.get-date.outputs.date}}" + key: ${{needs.populate-cache.outputs.cache-key}} fail-on-cache-miss: true - name: pytest run: pytest --disable-pytest-warnings diff --git a/tests/test_bioimageio_collection.py b/tests/test_bioimageio_collection.py index f2f89ca0..26587dcb 100644 --- a/tests/test_bioimageio_collection.py +++ b/tests/test_bioimageio_collection.py @@ -1,3 +1,4 @@ +import os from typing import Any, Collection, Dict, Iterable, Mapping, Tuple import pytest @@ -74,6 +75,24 @@ def yield_bioimageio_yaml_urls() -> Iterable[ParameterSet]: } +@pytest.mark.parametrize("descr_url,sha,key", list(yield_bioimageio_yaml_urls())) +def test_rdf_format_to_populate_cache( + descr_url: HttpUrl, + sha: Sha256, + key: str, +): + """this test is redundant if `test_rdf` runs, but is used in the CI to populate the cache""" + if os.environ.get("BIOIMAGEIO_POPULATE_CACHE") != "1": + pytest.skip("only runs in CI to populate cache") + + if key in KNOWN_INVALID: + pytest.skip("known failure") + + from bioimageio.core import load_description + + _ = load_description(descr_url, sha256=sha, perform_io_checks=True) + + @expensive_test @pytest.mark.parametrize("descr_url,sha,key", list(yield_bioimageio_yaml_urls())) def test_rdf( From 6e2cd00a3ec8e12bf82b69cc3316f868e6b8b200 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 20:30:53 +0100 Subject: [PATCH 165/187] actually write to the cache! --- .github/workflows/build.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 8110ab90..518ecc64 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -50,6 +50,7 @@ jobs: - run: pytest --disable-pytest-warnings tests/test_bioimageio_collection.py::test_rdf_format_to_populate_cache env: BIOIMAGEIO_POPULATE_CACHE: '1' + BIOIMAGEIO_CACHE_PATH: bioimageio_cache test: needs: populate-cache runs-on: ubuntu-latest From f1bf29bde4908073ba62e8f1d521899a51b95d9a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 20:39:14 +0100 Subject: [PATCH 166/187] fix populating cache --- .github/workflows/build.yaml | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 518ecc64..59c61796 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -27,11 +27,6 @@ jobs: outputs: cache-key: ${{steps.cache-key.outputs.cache-key}} steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: '3.12' - cache: 'pip' - name: Get Date id: get-date run: | @@ -40,14 +35,30 @@ jobs: - id: cache-key run: echo "cache-key=test-${{steps.get-date.outputs.date}}" >> $GITHUB_OUTPUT - uses: actions/cache@v4 + id: look-up with: path: bioimageio_cache - key: steps.get-cache-key.outputs.cache-key + key: ${{steps.cache-key.outputs.cache-key}} + lookup-only: true + - uses: actions/checkout@v4 + if: steps.look-up.outputs.cache-hit != 'true' + - uses: actions/cache@v4 + if: steps.look-up.outputs.cache-hit != 'true' + with: + path: bioimageio_cache + key: ${{steps.cache-key.outputs.cache-key}} + - uses: actions/setup-python@v5 + if: steps.look-up.outputs.cache-hit != 'true' + with: + python-version: '3.12' + cache: 'pip' - name: Install dependencies + if: steps.look-up.outputs.cache-hit != 'true' run: | pip install --upgrade pip pip install -e .[dev] - run: pytest --disable-pytest-warnings tests/test_bioimageio_collection.py::test_rdf_format_to_populate_cache + if: steps.look-up.outputs.cache-hit != 'true' env: BIOIMAGEIO_POPULATE_CACHE: '1' BIOIMAGEIO_CACHE_PATH: bioimageio_cache From a7dc02dd6c4db69a1f44dc15c2dfcc4e9ae36f7a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 21:47:28 +0100 Subject: [PATCH 167/187] point pyright to pyproject.toml --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 59c61796..a843219f 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -140,7 +140,7 @@ jobs: if: steps.cache-env.outputs.cache-hit != 'true' - run: | pyright --version - pyright + pyright -p pyproject.toml --pythonversion ${{ matrix.python-version }} if: matrix.run-expensive-tests - uses: actions/cache/restore@v4 id: bioimageio-cache From e427e40b8afd6267b2ac10997fe46537e8244be9 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 21:51:22 +0100 Subject: [PATCH 168/187] use cache/restore for cache look-up --- .github/workflows/build.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index a843219f..f09a2589 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -35,6 +35,7 @@ jobs: - id: cache-key run: echo "cache-key=test-${{steps.get-date.outputs.date}}" >> $GITHUB_OUTPUT - uses: actions/cache@v4 + - uses: actions/cache/restore@v4 id: look-up with: path: bioimageio_cache From 5fefb844ce7102e67470da9d83fe8e8cbb173a4d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 22:15:23 +0100 Subject: [PATCH 169/187] fix env cache key --- .github/workflows/build.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index f09a2589..645d13e9 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -129,9 +129,9 @@ jobs: with: path: ${{env.CONDA}}/envs/${{steps.setup.outputs.env-name}} key: >- - conda-${{runner.os}}-${{runner.arch}}- - -${{steps.get-date.outputs.today}}- - -${{hashFiles(matrix.conda-env)}}- + conda-${{runner.os}}-${{runner.arch}} + -${{steps.get-date.outputs.today}} + -${{hashFiles(steps.setup.outputs.env-file)}} -${{env.CACHE_NUMBER}} env: CACHE_NUMBER: 0 From 1fdb9e88b02d8ec7f5818611e9d76b878ee880d1 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 22:54:25 +0100 Subject: [PATCH 170/187] pyright fixes --- .github/workflows/build.yaml | 1 - bioimageio/core/backends/keras_backend.py | 2 ++ bioimageio/core/backends/onnx_backend.py | 9 +++++---- bioimageio/core/backends/pytorch_backend.py | 10 ++++++++-- bioimageio/core/backends/torchscript_backend.py | 1 + 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 645d13e9..141a6795 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -34,7 +34,6 @@ jobs: echo "date=$(date +'%Y-%b')" >> $GITHUB_OUTPUT - id: cache-key run: echo "cache-key=test-${{steps.get-date.outputs.date}}" >> $GITHUB_OUTPUT - - uses: actions/cache@v4 - uses: actions/cache/restore@v4 id: look-up with: diff --git a/bioimageio/core/backends/keras_backend.py b/bioimageio/core/backends/keras_backend.py index ef65b8ea..afbcf62f 100644 --- a/bioimageio/core/backends/keras_backend.py +++ b/bioimageio/core/backends/keras_backend.py @@ -27,6 +27,8 @@ except Exception: import keras + tf_version = None + class KerasModelAdapter(ModelAdapter): def __init__( diff --git a/bioimageio/core/backends/onnx_backend.py b/bioimageio/core/backends/onnx_backend.py index b66c32fe..635a822e 100644 --- a/bioimageio/core/backends/onnx_backend.py +++ b/bioimageio/core/backends/onnx_backend.py @@ -1,3 +1,4 @@ +# pyright: reportUnknownVariableType=false import warnings from typing import Any, List, Optional, Sequence, Union @@ -25,8 +26,8 @@ def __init__( local_path = download(model_description.weights.onnx.source).path self._session = rt.InferenceSession(local_path.read_bytes()) - onnx_inputs = self._session.get_inputs() # type: ignore - self._input_names: List[str] = [ipt.name for ipt in onnx_inputs] # type: ignore + onnx_inputs = self._session.get_inputs() + self._input_names: List[str] = [ipt.name for ipt in onnx_inputs] if devices is not None: warnings.warn( @@ -40,11 +41,11 @@ def _forward_impl( None, dict(zip(self._input_names, input_arrays)) ) if is_list(result) or is_tuple(result): - result_seq = result + result_seq = list(result) else: result_seq = [result] - return result_seq # pyright: ignore[reportReturnType] + return result_seq def unload(self) -> None: warnings.warn( diff --git a/bioimageio/core/backends/pytorch_backend.py b/bioimageio/core/backends/pytorch_backend.py index 54983700..af1ea85d 100644 --- a/bioimageio/core/backends/pytorch_backend.py +++ b/bioimageio/core/backends/pytorch_backend.py @@ -146,10 +146,16 @@ def load_torch_state_dict( state = torch.load(f, map_location=devices[0], weights_only=True) incompatible = model.load_state_dict(state) - if incompatible is not None and incompatible.missing_keys: + if ( + incompatible is not None # pyright: ignore[reportUnnecessaryComparison] + and incompatible.missing_keys + ): logger.warning("Missing state dict keys: {}", incompatible.missing_keys) - if incompatible is not None and incompatible.unexpected_keys: + if ( + incompatible is not None # pyright: ignore[reportUnnecessaryComparison] + and incompatible.unexpected_keys + ): logger.warning("Unexpected state dict keys: {}", incompatible.unexpected_keys) return model diff --git a/bioimageio/core/backends/torchscript_backend.py b/bioimageio/core/backends/torchscript_backend.py index cb153a49..ce3ba131 100644 --- a/bioimageio/core/backends/torchscript_backend.py +++ b/bioimageio/core/backends/torchscript_backend.py @@ -1,3 +1,4 @@ +# pyright: reportUnknownVariableType=false import gc import warnings from typing import Any, List, Optional, Sequence, Union From 953c39346988bff4bc2a968002f00a140f1acc35 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 23:30:44 +0100 Subject: [PATCH 171/187] add bioimageio_cache --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1ed5e10a..688e4a88 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ dist/ docs/ dogfood/ typings/pooch/ +bioimageio_cache/ From 6582ca0a9a76139d7f9f71bd720c76d61357d65a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 23:46:43 +0100 Subject: [PATCH 172/187] limit pytest testfolders --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 5633c6b8..9353fd4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ useLibraryCodeForTypes = true [tool.pytest.ini_options] addopts = "--cov=bioimageio --cov-report=xml --cov-append --capture=no --doctest-modules --failed-first --ignore=dogfood" +testpaths = ["bioimageio/core", "tests"] [tool.ruff] line-length = 88 From 659c395f90aa0700857bdc5000d517b1774770b5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 27 Mar 2025 23:49:12 +0100 Subject: [PATCH 173/187] save env cache earlier --- .github/workflows/build.yaml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 141a6795..f9ac9a70 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -124,7 +124,7 @@ jobs: echo "today=$(date -u '+%Y%m%d')" echo "today=$(date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache env - uses: actions/cache@v4 + uses: actions/cache/restore@v4 with: path: ${{env.CONDA}}/envs/${{steps.setup.outputs.env-name}} key: >- @@ -138,6 +138,18 @@ jobs: - name: Update env run: conda env update --name=${{steps.setup.outputs.env-name}} --file=${{steps.setup.outputs.env-file}} python=${{matrix.python-version}} if: steps.cache-env.outputs.cache-hit != 'true' + - name: Update cached env + if: steps.cache-env.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + path: ${{env.CONDA}}/envs/${{steps.setup.outputs.env-name}} + key: >- + conda-${{runner.os}}-${{runner.arch}} + -${{steps.get-date.outputs.today}} + -${{hashFiles(steps.setup.outputs.env-file)}} + -${{env.CACHE_NUMBER}} + env: + CACHE_NUMBER: 0 - run: | pyright --version pyright -p pyproject.toml --pythonversion ${{ matrix.python-version }} From f3af7663ae89456dc2a8ca1554d14a28d2727946 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 09:42:39 +0100 Subject: [PATCH 174/187] ignore docstring tests in backends for their optional dependencies --- .github/workflows/build.yaml | 4 ++-- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index f9ac9a70..552acf90 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -135,10 +135,10 @@ jobs: env: CACHE_NUMBER: 0 id: cache-env - - name: Update env + - name: Restore cached env run: conda env update --name=${{steps.setup.outputs.env-name}} --file=${{steps.setup.outputs.env-file}} python=${{matrix.python-version}} if: steps.cache-env.outputs.cache-hit != 'true' - - name: Update cached env + - name: Cache env if: steps.cache-env.outputs.cache-hit != 'true' uses: actions/cache/save@v4 with: diff --git a/pyproject.toml b/pyproject.toml index 9353fd4a..72748107 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ typeCheckingMode = "strict" useLibraryCodeForTypes = true [tool.pytest.ini_options] -addopts = "--cov=bioimageio --cov-report=xml --cov-append --capture=no --doctest-modules --failed-first --ignore=dogfood" +addopts = "--cov=bioimageio --cov-report=xml --cov-append --capture=no --doctest-modules --failed-first --ignore=dogfood --ignore=bioimageio/core/backends" testpaths = ["bioimageio/core", "tests"] [tool.ruff] From f21ebed8bc456630a7ba910db0633af4c21660ae Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 09:47:49 +0100 Subject: [PATCH 175/187] fix step names --- .github/workflows/build.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 552acf90..5705c23c 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -123,7 +123,7 @@ jobs: run: | echo "today=$(date -u '+%Y%m%d')" echo "today=$(date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - - name: Cache env + - name: Restore cached env uses: actions/cache/restore@v4 with: path: ${{env.CONDA}}/envs/${{steps.setup.outputs.env-name}} @@ -135,7 +135,7 @@ jobs: env: CACHE_NUMBER: 0 id: cache-env - - name: Restore cached env + - name: Install env run: conda env update --name=${{steps.setup.outputs.env-name}} --file=${{steps.setup.outputs.env-file}} python=${{matrix.python-version}} if: steps.cache-env.outputs.cache-hit != 'true' - name: Cache env @@ -154,7 +154,8 @@ jobs: pyright --version pyright -p pyproject.toml --pythonversion ${{ matrix.python-version }} if: matrix.run-expensive-tests - - uses: actions/cache/restore@v4 + - name: Restore bioimageio cache + uses: actions/cache/restore@v4 id: bioimageio-cache with: path: bioimageio_cache From b21deb16ef115dffb289efa2eb99f7c581ef136d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 09:50:25 +0100 Subject: [PATCH 176/187] ignore docstring tests in weight_converters for their optional dependencies --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 72748107..8b64e7ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ typeCheckingMode = "strict" useLibraryCodeForTypes = true [tool.pytest.ini_options] -addopts = "--cov=bioimageio --cov-report=xml --cov-append --capture=no --doctest-modules --failed-first --ignore=dogfood --ignore=bioimageio/core/backends" +addopts = "--cov=bioimageio --cov-report=xml --cov-append --capture=no --doctest-modules --failed-first --ignore=dogfood --ignore=bioimageio/core/backends --ignore=bioimageio/core/weight_converters" testpaths = ["bioimageio/core", "tests"] [tool.ruff] From fec762ef860092dfa3d1cceace8730254e7ea3d7 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 10:44:54 +0100 Subject: [PATCH 177/187] try to ignore more explicitly --- .github/workflows/build.yaml | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 5705c23c..0182a0f3 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -162,7 +162,7 @@ jobs: key: ${{needs.populate-cache.outputs.cache-key}} fail-on-cache-miss: true - name: pytest - run: pytest --disable-pytest-warnings + run: pytest --disable-pytest-warnings --cov=bioimageio --cov-report=xml --cov-append --capture=no --failed-first --doctest-modules --ignore=bioimageio/core/backends --ignore=bioimageio/core/weight_converters env: BIOIMAGEIO_CACHE_PATH: bioimageio_cache RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} diff --git a/pyproject.toml b/pyproject.toml index 8b64e7ab..4d5c4083 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ useLibraryCodeForTypes = true [tool.pytest.ini_options] addopts = "--cov=bioimageio --cov-report=xml --cov-append --capture=no --doctest-modules --failed-first --ignore=dogfood --ignore=bioimageio/core/backends --ignore=bioimageio/core/weight_converters" +# FIXME: addopts is somehow not respected in the build.yaml workflow, so these opts are copied there! testpaths = ["bioimageio/core", "tests"] [tool.ruff] From 677298cc3caf2fb3b813380306953eb0287fa51a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 13:55:50 +0100 Subject: [PATCH 178/187] add conda list step --- .github/workflows/build.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0182a0f3..1d6e4980 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -150,6 +150,7 @@ jobs: -${{env.CACHE_NUMBER}} env: CACHE_NUMBER: 0 + - run: conda list - run: | pyright --version pyright -p pyproject.toml --pythonversion ${{ matrix.python-version }} From fab4c7f3adee9552ef8b0bfb807432dfd0b1d011 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 14:12:59 +0100 Subject: [PATCH 179/187] use long options instead --- .github/workflows/build.yaml | 2 +- pyproject.toml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 1d6e4980..def9720b 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -163,7 +163,7 @@ jobs: key: ${{needs.populate-cache.outputs.cache-key}} fail-on-cache-miss: true - name: pytest - run: pytest --disable-pytest-warnings --cov=bioimageio --cov-report=xml --cov-append --capture=no --failed-first --doctest-modules --ignore=bioimageio/core/backends --ignore=bioimageio/core/weight_converters + run: pytest --disable-pytest-warnings env: BIOIMAGEIO_CACHE_PATH: bioimageio_cache RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} diff --git a/pyproject.toml b/pyproject.toml index 4d5c4083..5d58fe72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,8 +40,7 @@ typeCheckingMode = "strict" useLibraryCodeForTypes = true [tool.pytest.ini_options] -addopts = "--cov=bioimageio --cov-report=xml --cov-append --capture=no --doctest-modules --failed-first --ignore=dogfood --ignore=bioimageio/core/backends --ignore=bioimageio/core/weight_converters" -# FIXME: addopts is somehow not respected in the build.yaml workflow, so these opts are copied there! +addopts = "--cov bioimageio --cov-report xml --cov-append --capture no --doctest-modules --failed-first --ignore dogfood --ignore bioimageio/core/backends --ignore bioimageio/core/weight_converters" testpaths = ["bioimageio/core", "tests"] [tool.ruff] From 33fb00c821ed8a35f97cdd1a38c30d72dc16b37b Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 14:35:45 +0100 Subject: [PATCH 180/187] pin xarray --- dev/env-dev.yaml | 2 +- dev/env-full.yaml | 2 +- dev/env-gpu.yaml | 2 +- dev/env-py38.yaml | 2 +- setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dev/env-dev.yaml b/dev/env-dev.yaml index 7bccb459..13378376 100644 --- a/dev/env-dev.yaml +++ b/dev/env-dev.yaml @@ -39,6 +39,6 @@ dependencies: - torchvision - tqdm - typing-extensions - - xarray + - xarray>=2024.01,<2025.3.0 - pip: - -e .. diff --git a/dev/env-full.yaml b/dev/env-full.yaml index bd045c5d..a9dc0132 100644 --- a/dev/env-full.yaml +++ b/dev/env-full.yaml @@ -43,7 +43,7 @@ dependencies: - torchvision>=0.21 - tqdm - typing-extensions - - xarray + - xarray>=2024.01,<2025.3.0 - pip: - git+https://github.com/ChaoningZhang/MobileSAM.git # for model testing - -e .. diff --git a/dev/env-gpu.yaml b/dev/env-gpu.yaml index ebc4b737..7fc2123c 100644 --- a/dev/env-gpu.yaml +++ b/dev/env-gpu.yaml @@ -38,7 +38,7 @@ dependencies: - timm # for model testing - tqdm - typing-extensions - - xarray + - xarray>=2024.01,<2025.3.0 - pip: # - tf2onnx # TODO: add tf2onnx - --extra-index-url https://download.pytorch.org/whl/cu126 diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index e095bd4d..911c27d0 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -39,6 +39,6 @@ dependencies: - torchvision - tqdm - typing-extensions - - xarray + - xarray>=2024.01,<2025.3.0 - pip: - -e .. diff --git a/setup.py b/setup.py index af0dbb64..d2587e2a 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "ruyaml", "tqdm", "typing-extensions", - "xarray", + "xarray>=2024.01,<2025.3.0", ], include_package_data=True, extras_require={ From 820e3c283af9cf42b0d58af00f4d180be5b73ba9 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 14:46:51 +0100 Subject: [PATCH 181/187] improve caching --- .github/workflows/build.yaml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index def9720b..c75aa055 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -85,12 +85,15 @@ jobs: spec: main run-expensive-tests: true report-coverage: true + save-cache: true - python-version: '3.12' conda-env: dev spec: conda - python-version: '3.13' conda-env: dev spec: main + save-cache: true + steps: - uses: actions/checkout@v4 - id: setup @@ -160,13 +163,18 @@ jobs: id: bioimageio-cache with: path: bioimageio_cache - key: ${{needs.populate-cache.outputs.cache-key}} - fail-on-cache-miss: true + key: ${{needs.populate-cache.outputs.cache-key}}${{matrix.run-expensive-tests && '' || '-light'}} - name: pytest run: pytest --disable-pytest-warnings env: BIOIMAGEIO_CACHE_PATH: bioimageio_cache RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} + - name: Save updated bioimageio cache + if: matrix.save-cache + uses: actions/cache/save@v4 + with: + path: bioimageio_cache + key: ${{needs.populate-cache.outputs.cache-key}}${{matrix.run-expensive-tests && '' || '-light'}} - if: matrix.report-coverage && github.event_name == 'pull_request' uses: orgoro/coverage@v3.2 with: From 51f163bcc830b9cf98fc65c66798bcba1a0aee86 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 14:55:33 +0100 Subject: [PATCH 182/187] xarray 2023 compatibility --- bioimageio/core/_resource_tests.py | 4 ++-- bioimageio/core/backends/keras_backend.py | 6 +++--- bioimageio/core/backends/onnx_backend.py | 2 +- bioimageio/core/backends/tensorflow_backend.py | 2 +- bioimageio/core/io.py | 2 +- bioimageio/core/stat_calculators.py | 10 +++++++++- .../core/weight_converters/keras_to_tensorflow.py | 4 ++-- dev/env-py38.yaml | 2 +- scripts/show_diff.py | 2 +- setup.py | 2 +- 10 files changed, 22 insertions(+), 14 deletions(-) diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index dd5fc09e..327e540a 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -133,7 +133,7 @@ def enable_determinism( try: os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0" try: - import tensorflow as tf + import tensorflow as tf # pyright: ignore[reportMissingTypeStubs] except ImportError: pass else: @@ -147,7 +147,7 @@ def enable_determinism( if weight_formats is None or "keras_hdf5" in weight_formats: try: try: - import keras + import keras # pyright: ignore[reportMissingTypeStubs] except ImportError: pass else: diff --git a/bioimageio/core/backends/keras_backend.py b/bioimageio/core/backends/keras_backend.py index afbcf62f..1c10da7d 100644 --- a/bioimageio/core/backends/keras_backend.py +++ b/bioimageio/core/backends/keras_backend.py @@ -18,14 +18,14 @@ # by default, we use the keras integrated with tensorflow # TODO: check if we should prefer keras try: - import tensorflow as tf - from tensorflow import ( + import tensorflow as tf # pyright: ignore[reportMissingTypeStubs] + from tensorflow import ( # pyright: ignore[reportMissingTypeStubs] keras, # pyright: ignore[reportUnknownVariableType,reportAttributeAccessIssue] ) tf_version = Version(tf.__version__) except Exception: - import keras + import keras # pyright: ignore[reportMissingTypeStubs] tf_version = None diff --git a/bioimageio/core/backends/onnx_backend.py b/bioimageio/core/backends/onnx_backend.py index 635a822e..d5b89152 100644 --- a/bioimageio/core/backends/onnx_backend.py +++ b/bioimageio/core/backends/onnx_backend.py @@ -2,7 +2,7 @@ import warnings from typing import Any, List, Optional, Sequence, Union -import onnxruntime as rt +import onnxruntime as rt # pyright: ignore[reportMissingTypeStubs] from numpy.typing import NDArray from bioimageio.spec._internal.type_guards import is_list, is_tuple diff --git a/bioimageio/core/backends/tensorflow_backend.py b/bioimageio/core/backends/tensorflow_backend.py index 83fa4813..99efe9ef 100644 --- a/bioimageio/core/backends/tensorflow_backend.py +++ b/bioimageio/core/backends/tensorflow_backend.py @@ -2,7 +2,7 @@ from typing import Any, Optional, Sequence, Union import numpy as np -import tensorflow as tf +import tensorflow as tf # pyright: ignore[reportMissingTypeStubs] from loguru import logger from numpy.typing import NDArray diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index 81ac60c4..dc5b70db 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -14,7 +14,7 @@ Union, ) -import h5py +import h5py # pyright: ignore[reportMissingTypeStubs] import numpy as np from imageio.v3 import imread, imwrite # type: ignore from loguru import logger diff --git a/bioimageio/core/stat_calculators.py b/bioimageio/core/stat_calculators.py index 95d3f729..515fe843 100644 --- a/bioimageio/core/stat_calculators.py +++ b/bioimageio/core/stat_calculators.py @@ -139,7 +139,15 @@ def compute( else: n = int(np.prod([tensor.sizes[d] for d in self._axes])) - var = xr.dot(c, c, dim=self._axes) / n + if xr.__version__.startswith("2023"): + var = ( # pyright: ignore[reportUnknownVariableType] + xr.dot(c, c, dims=self._axes) / n + ) + else: + var = ( # pyright: ignore[reportUnknownVariableType] + xr.dot(c, c, dim=self._axes) / n + ) + assert isinstance(var, xr.DataArray) std = np.sqrt(var) assert isinstance(std, xr.DataArray) diff --git a/bioimageio/core/weight_converters/keras_to_tensorflow.py b/bioimageio/core/weight_converters/keras_to_tensorflow.py index 261e335c..ac8886e1 100644 --- a/bioimageio/core/weight_converters/keras_to_tensorflow.py +++ b/bioimageio/core/weight_converters/keras_to_tensorflow.py @@ -4,7 +4,7 @@ from typing import Union, no_type_check from zipfile import ZipFile -import tensorflow +import tensorflow # pyright: ignore[reportMissingTypeStubs] from bioimageio.spec._internal.io import download from bioimageio.spec._internal.version_type import Version @@ -24,7 +24,7 @@ from tensorflow import keras # type: ignore except Exception: # if the above fails try to export with the standalone keras - import keras + import keras # pyright: ignore[reportMissingTypeStubs] def convert( diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 911c27d0..6fc6597a 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -39,6 +39,6 @@ dependencies: - torchvision - tqdm - typing-extensions - - xarray>=2024.01,<2025.3.0 + - xarray>=2023.01,<2025.3.0 - pip: - -e .. diff --git a/scripts/show_diff.py b/scripts/show_diff.py index 8889437c..3e273d79 100644 --- a/scripts/show_diff.py +++ b/scripts/show_diff.py @@ -2,7 +2,7 @@ from pathlib import Path from tempfile import TemporaryDirectory -import pooch +import pooch # pyright: ignore[reportMissingTypeStubs] from bioimageio.core import load_description, save_bioimageio_yaml_only diff --git a/setup.py b/setup.py index d2587e2a..0755ff2d 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "ruyaml", "tqdm", "typing-extensions", - "xarray>=2024.01,<2025.3.0", + "xarray>=2023.01,<2025.3.0", ], include_package_data=True, extras_require={ From 6f78383228c8ae12dc1070eb112be43570ca5ae5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 15:56:51 +0100 Subject: [PATCH 183/187] test_zero_mean_unit_variance_fixed --- tests/test_proc_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_proc_ops.py b/tests/test_proc_ops.py index 2a68309e..be87f54b 100644 --- a/tests/test_proc_ops.py +++ b/tests/test_proc_ops.py @@ -89,7 +89,7 @@ def test_zero_mean_unit_variance_fixed(tid: MemberId): mean=xr.DataArray([3, 4, 5], dims=("channel",)), std=xr.DataArray([2.44948974, 2.44948974, 2.44948974], dims=("channel",)), ) - data = xr.DataArray(np.arange(9).reshape((1, 3, 3)), dims=("b", "channel", "x")) + data = xr.DataArray(np.arange(9).reshape((1, 3, 3)), dims=("batch", "channel", "x")) expected = xr.DataArray( np.array( [ @@ -100,7 +100,7 @@ def test_zero_mean_unit_variance_fixed(tid: MemberId): ] ] ), - dims=("b", "channel", "x"), + dims=("batch", "channel", "x"), ) sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) op(sample) From e2596d1db0e5d9bc55497de58648bbef1a0a17be Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 15:57:04 +0100 Subject: [PATCH 184/187] name pyright step --- .github/workflows/build.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index c75aa055..2b1da585 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -154,7 +154,8 @@ jobs: env: CACHE_NUMBER: 0 - run: conda list - - run: | + - name: Pyright + run: | pyright --version pyright -p pyproject.toml --pythonversion ${{ matrix.python-version }} if: matrix.run-expensive-tests From 6a248ca5924f9cbbb8b715d311406e35168de142 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 15:57:16 +0100 Subject: [PATCH 185/187] remove default for source: CliPositionalArg --- bioimageio/core/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 43a20282..8e62239d 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -121,7 +121,7 @@ def log(self, descr: Union[ResourceDescr, InvalidDescr]): class WithSource(ArgMixin): - source: CliPositionalArg[str] = "." + source: CliPositionalArg[str] """Url/path to a (folder with a) bioimageio.yaml/rdf.yaml file or a bioimage.io resource identifier, e.g. 'affable-shark'""" From 2ef774f062bd3b91e0cad970aaabd67ac8d94597 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 16:37:47 +0100 Subject: [PATCH 186/187] install uncached pip deps --- .github/workflows/build.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 2b1da585..8ce50a1a 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -141,6 +141,14 @@ jobs: - name: Install env run: conda env update --name=${{steps.setup.outputs.env-name}} --file=${{steps.setup.outputs.env-file}} python=${{matrix.python-version}} if: steps.cache-env.outputs.cache-hit != 'true' + - name: Install uncached pip dependencies + run: | + pip install --upgrade pip + pip install --no-deps -e . + - name: Install uncached pip dependencies for 'full' environment + if: matrix.conda-env == 'full' + run: | + pip install git+https://github.com/ChaoningZhang/MobileSAM.git - name: Cache env if: steps.cache-env.outputs.cache-hit != 'true' uses: actions/cache/save@v4 From 7e9888a9dd213ef6d66a2633ed1c60e0b6d7e88e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 28 Mar 2025 20:44:27 +0100 Subject: [PATCH 187/187] display validation summary on test failure --- tests/test_bioimageio_collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_bioimageio_collection.py b/tests/test_bioimageio_collection.py index 26587dcb..92f3dd5c 100644 --- a/tests/test_bioimageio_collection.py +++ b/tests/test_bioimageio_collection.py @@ -107,7 +107,7 @@ def test_rdf( descr = load_description_and_test(descr_url, sha256=sha, stop_early=True) - assert not isinstance(descr, InvalidDescr) + assert not isinstance(descr, InvalidDescr), descr.validation_summary.display() assert ( descr.validation_summary.status == "passed" ), descr.validation_summary.display()