Merge pull request #229 from bioimage-io/update-build-model

constantinpape · web-flow · commit 504d845a9b6e · 2022-02-25T22:01:09.000+01:00
Update build model interface
diff --git a/bioimageio/core/build_spec/build_model.py b/bioimageio/core/build_spec/build_model.py
@@ -217,7 +217,7 @@ def _get_input_tensor(path, name, step, min_shape, data_range, axes, preprocessi
     data_range = _get_data_range(data_range, test_in.dtype)
     kwargs = {}
     if preprocessing is not None:
-        kwargs["preprocessing"] = [{"name": k, "kwargs": v} for k, v in preprocessing.items()]
+        kwargs["preprocessing"] = preprocessing
 
     inputs = model_spec.raw_nodes.InputTensor(
         name="input" if name is None else name,
@@ -245,7 +245,7 @@ def _get_output_tensor(path, name, reference_tensor, scale, offset, axes, data_r
     data_range = _get_data_range(data_range, test_out.dtype)
     kwargs = {}
     if postprocessing is not None:
-        kwargs["postprocessing"] = [{"name": k, "kwargs": v} for k, v in postprocessing.items()]
+        kwargs["postprocessing"] = postprocessing
     if halo is not None:
         kwargs["halo"] = halo
 
@@ -260,9 +260,16 @@ def _get_output_tensor(path, name, reference_tensor, scale, offset, axes, data_r
     return outputs
 
 
-# TODO The citation entry should be improved so that we can properly derive doi vs. url
-def _build_cite(cite: Dict[str, str]):
-    citation_list = [spec.rdf.raw_nodes.CiteEntry(text=k, url=v) for k, v in cite.items()]
+def _build_cite(cite: List[Dict[str, str]]):
+    citation_list = []
+    for entry in cite:
+        if "doi" in entry:
+            spec_entry = spec.rdf.raw_nodes.CiteEntry(text=entry["text"], doi=entry["doi"])
+        elif "url" in entry:
+            spec_entry = spec.rdf.raw_nodes.CiteEntry(text=entry["text"], url=entry["url"])
+        else:
+            raise ValueError(f"Expect one of doi or url in citation enrty {entry}")
+        citation_list.append(spec_entry)
     return citation_list
 
 
@@ -346,7 +353,7 @@ def _get_deepimagej_config(
     if any(preproc is not None for preproc in preprocessing):
         assert len(preprocessing) == 1
         preprocess_ij = [
-            _get_deepimagej_macro(name, kwargs, export_folder) for name, kwargs in preprocessing[0].items()
+            _get_deepimagej_macro(preproc["name"], preproc["kwargs"], export_folder) for preproc in preprocessing[0]
         ]
         attachments = [preproc["kwargs"] for preproc in preprocess_ij]
     else:
@@ -356,7 +363,7 @@ def _get_deepimagej_config(
     if any(postproc is not None for postproc in postprocessing):
         assert len(postprocessing) == 1
         postprocess_ij = [
-            _get_deepimagej_macro(name, kwargs, export_folder) for name, kwargs in postprocessing[0].items()
+            _get_deepimagej_macro(postproc["name"], postproc["kwargs"], export_folder) for postproc in postprocessing[0]
         ]
         if attachments is None:
             attachments = [postproc["kwargs"] for postproc in postprocess_ij]
@@ -595,7 +602,7 @@ def build_model(
     authors: List[Dict[str, str]],
     tags: List[Union[str, Path]],
     documentation: Union[str, Path],
-    cite: Dict[str, str],
+    cite: List[Dict[str, str]],
     output_path: Union[str, Path],
     # model specific optional
     architecture: Optional[str] = None,
@@ -614,8 +621,8 @@ def build_model(
     output_offset: Optional[List[List[int]]] = None,
     output_data_range: Optional[List[List[Union[int, str]]]] = None,
     halo: Optional[List[List[int]]] = None,
-    preprocessing: Optional[List[Dict[str, Dict[str, Union[int, float, str]]]]] = None,
-    postprocessing: Optional[List[Dict[str, Dict[str, Union[int, float, str]]]]] = None,
+    preprocessing: Optional[List[List[Dict[str, Dict[str, Union[int, float, str]]]]]] = None,
+    postprocessing: Optional[List[List[Dict[str, Dict[str, Union[int, float, str]]]]]] = None,
     pixel_sizes: Optional[List[Dict[str, float]]] = None,
     # general optional
     maintainers: Optional[List[Dict[str, str]]] = None,
@@ -625,7 +632,7 @@ def build_model(
     attachments: Optional[Dict[str, Union[str, List[str]]]] = None,
     packaged_by: Optional[List[str]] = None,
     run_mode: Optional[str] = None,
-    parent: Optional[Tuple[str, str]] = None,
+    parent: Optional[Dict[str, str]] = None,
     config: Optional[Dict[str, Any]] = None,
     dependencies: Optional[Union[Path, str]] = None,
     links: Optional[List[str]] = None,
@@ -655,7 +662,7 @@ def build_model(
         tags=["segmentation", "light sheet data"],
         license="CC-BY-4.0",
         documentation="./documentation.md",
-        cite={"Architecture": "https://my_architecture.com"},
+        cite=[{"text": "Ronneberger et al. U-Net", "doi": "10.1007/978-3-319-24574-4_28"}],
         output_path="my-model.zip"
     )
     ```
@@ -671,7 +678,7 @@ def build_model(
         authors: the authors of this model.
         tags: list of tags for this model.
         documentation: relative file path to markdown documentation for this model.
-        cite: citations for this model.
+        cite: references for this model.
         output_path: where to save the zipped model package.
         architecture: the file with the source code for the model architecture and the corresponding class.
             Only required for models with pytorch_state_dict weight format.
@@ -701,7 +708,7 @@ def build_model(
         attachments: list of additional files to package with the model.
         packaged_by: list of authors that have packaged this model.
         run_mode: custom run mode for this model.
-        parent: id of the parent model from which this model is derived and sha256 of the corresponding weight file.
+        parent: id of the parent model from which this model is derived and sha256 of the corresponding rdf file.
         config: custom configuration for this model.
         dependencies: relative path to file with dependencies for this model.
         root: optional root path for relative paths. This can be helpful when building a spec from another model spec.
@@ -882,7 +889,7 @@ def build_model(
         kwargs["maintainers"] = [model_spec.raw_nodes.Maintainer(**m) for m in maintainers]
     if parent is not None:
         assert len(parent) == 2
-        kwargs["parent"] = {"uri": parent[0], "sha256": parent[1]}
+        kwargs["parent"] = parent
 
     try:
         model = model_spec.raw_nodes.Model(
diff --git a/example/bioimageio-core-usage.ipynb b/example/bioimageio-core-usage.ipynb
@@ -68,11 +68,12 @@
     "# the model can be loaded using different representations:\n",
     "\n",
     "# the doi of the zenodo entry corresponding to the model\n",
-    "rdf_doi = \"10.5072/zenodo.934248\"\n",
+    "rdf_doi = \"10.5281/zenodo.6287342\"\n",
     "\n",
     "# the url of the yaml file containing the model resource description\n",
-    "rdf_url = \"https://sandbox.zenodo.org/record/934248/files/rdf.yaml\"\n",
+    "rdf_url = \"https://zenodo.org/record/6287342/files/rdf.yaml\"\n",
     "\n",
+    "# FIXME the model currently does not show up on the website\n",
     "# filepath to the downloaded model (either zipped package or yaml)\n",
     "# (go to https://bioimage.io/#/?id=10.5072%2Fzenodo.881940, select the download icon and select \"ilastik\")\n",
     "rdf_path = \"/home/pape/Downloads/dsb-nuclei-boundarymodelnew_pytorch_state_dict.zip\""
@@ -383,24 +384,22 @@
     "np.save(new_output_path, new_output)\n",
     "\n",
     "# add thresholding as post-processing procedure to our model\n",
-    "preprocessing = [\n",
-    "    {prep.name: prep.kwargs for prep in model_resource.inputs[0].preprocessing}\n",
-    "]\n",
-    "postprocessing = [{\"binarize\": {\"threshold\": threshold}}]\n",
+    "preprocessing = [[{\"name\": prep.name, \"kwargs\": prep.kwargs} for prep in inp.preprocessing] for inp in model_resource.inputs]\n",
+    "postprocessing = [[{\"name\": \"binarize\", \"kwargs\": {\"threshold\": threshold}}]]\n",
     "\n",
     "# get the model architecture\n",
     "# note that this is only necessary for pytorch state dict models\n",
     "model_source = get_architecture_source(rdf_doi)\n",
     "\n",
     "# we use the `parent` field to indicate that the new model is created based on\n",
     "# the nucleus segmentation model we have obtained from bioimage.io\n",
-    "# this field is optional and only needs to be given for models that are created based on\n",
-    "# other models from bioimage.io\n",
+    "# this field is optional and only needs to be given for models that are created based on other models from bioimage.io\n",
     "# the parent is specified via it's doi and the hash of its rdf file\n",
-    "rdf_file = os.path.join(model_resource.root_path, \"rdf.yaml\")\n",
+    "model_root_folder = os.path.split(model_resource.weights[\"pytorch_state_dict\"].source)[0]\n",
+    "rdf_file = os.path.join(model_root_folder, \"rdf.yaml\")\n",
     "with open(rdf_file, \"rb\") as f:\n",
-    "    weight_hash = hashlib.sha256(f.read()).hexdigest()\n",
-    "parent = (rdf_doi, weight_hash)\n",
+    "    rdf_hash = hashlib.sha256(f.read()).hexdigest()\n",
+    "parent = {\"uri\": rdf_doi, \"sha256\": rdf_hash}\n",
     "\n",
     "# the name of the new model and where to save the zipped model package\n",
     "name = \"new-model1\"\n",
@@ -410,7 +409,7 @@
     "# all this additional information is passed as plain python types and will be converted into the bioimageio representation internally  \n",
     "# for more informantion, check out the function signature\n",
     "# https://github.com/bioimage-io/core-bioimage-io-python/blob/main/bioimageio/core/build_spec/build_model.py#L252\n",
-    "cite = {cite_entry.text: cite_entry.url for cite_entry in model_resource.cite}\n",
+    "cite = [{\"text\": cite_entry.text, \"url\": cite_entry.url} for cite_entry in model_resource.cite]\n",
     "\n",
     "# the axes descriptions for the inputs / outputs\n",
     "input_axes = [\"bcyx\"]\n",
@@ -516,9 +515,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "python3.bkp"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -530,7 +529,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.11"
+   "version": "3.9.7"
   }
  },
  "nbformat": 4,
diff --git a/tests/build_spec/test_build_spec.py b/tests/build_spec/test_build_spec.py
@@ -25,7 +25,17 @@ def _test_build_spec(
     assert isinstance(model_spec, spec.model.raw_nodes.Model)
     weight_source = model_spec.weights[weight_type].source
 
-    cite = {entry.text: entry.doi if entry.url is missing else entry.url for entry in model_spec.cite}
+    cite = []
+    for entry in model_spec.cite:
+        entry_ = {"text": entry.text}
+        has_url = entry.url is not missing
+        has_doi = entry.doi is not missing
+        assert has_url != has_doi
+        if has_doi:
+            entry_["doi"] = entry.doi
+        else:
+            entry_["url"] = entry.url
+        cite.append(entry_)
 
     weight_spec = model_spec.weights[weight_type]
     dep_file = None if weight_spec.dependencies is missing else resolve_source(weight_spec.dependencies.file, root)
@@ -52,11 +62,15 @@ def _test_build_spec(
     input_axes = [input_.axes for input_ in model_spec.inputs]
     output_axes = [output.axes for output in model_spec.outputs]
     preprocessing = [
-        None if input_.preprocessing == missing else {preproc.name: preproc.kwargs for preproc in input_.preprocessing}
+        None
+        if input_.preprocessing is missing
+        else [{"name": preproc.name, "kwargs": preproc.kwargs} for preproc in input_.preprocessing]
         for input_ in model_spec.inputs
     ]
     postprocessing = [
-        None if output.postprocessing == missing else {preproc.name: preproc.kwargs for preproc in output.preprocessing}
+        None
+        if output.postprocessing is missing
+        else [{"name": preproc.name, "kwargs": preproc.kwargs} for preproc in output.preprocessing]
         for output in model_spec.outputs
     ]