|
1 |
| -import warnings |
2 |
| -from collections import defaultdict |
3 |
| -from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Type |
4 |
| - |
5 |
| -import xarray as xr |
| 1 | +from typing import List, Optional, Sequence, Union |
6 | 2 |
|
7 | 3 | from bioimageio.core.resource_io import nodes
|
8 |
| -from bioimageio.core.statistical_measures import Mean, Measure, Percentile, Std |
9 |
| -from bioimageio.spec.model.raw_nodes import PostprocessingName, PreprocessingName |
10 |
| -from ._processing import ( |
11 |
| - Binarize, |
12 |
| - Clip, |
13 |
| - EnsureDtype, |
14 |
| - Processing, |
15 |
| - ScaleLinear, |
16 |
| - ScaleMeanVariance, |
17 |
| - ScaleRange, |
18 |
| - Sigmoid, |
19 |
| - ZeroMeanUnitVariance, |
20 |
| -) |
| 4 | +from ._processing import EnsureDtype, KNOWN_PROCESSING, Processing |
| 5 | +from ._utils import ComputedMeasures, PER_DATASET, PER_SAMPLE, RequiredMeasures, Sample |
21 | 6 |
|
22 | 7 | try:
|
23 | 8 | from typing import Literal
|
24 | 9 | except ImportError:
|
25 | 10 | from typing_extensions import Literal # type: ignore
|
26 | 11 |
|
27 |
| -KNOWN_PREPROCESSING: Dict[PreprocessingName, Type[Processing]] = { |
28 |
| - "binarize": Binarize, |
29 |
| - "clip": Clip, |
30 |
| - "scale_linear": ScaleLinear, |
31 |
| - "scale_range": ScaleRange, |
32 |
| - "sigmoid": Sigmoid, |
33 |
| - "zero_mean_unit_variance": ZeroMeanUnitVariance, |
34 |
| -} |
35 |
| - |
36 |
| -KNOWN_POSTPROCESSING: Dict[PostprocessingName, Type[Processing]] = { |
37 |
| - "binarize": Binarize, |
38 |
| - "clip": Clip, |
39 |
| - "scale_linear": ScaleLinear, |
40 |
| - "scale_mean_variance": ScaleMeanVariance, |
41 |
| - "scale_range": ScaleRange, |
42 |
| - "sigmoid": Sigmoid, |
43 |
| - "zero_mean_unit_variance": ZeroMeanUnitVariance, |
44 |
| -} |
45 |
| - |
46 |
| - |
47 |
| -Scope = Literal["sample", "dataset"] |
48 |
| -SAMPLE: Literal["sample"] = "sample" |
49 |
| -DATASET: Literal["dataset"] = "dataset" |
50 |
| -SCOPES: Set[Scope] = {SAMPLE, DATASET} |
51 |
| - |
52 | 12 |
|
53 | 13 | class CombinedProcessing:
|
54 |
| - def __init__(self, inputs: List[nodes.InputTensor], outputs: List[nodes.OutputTensor]): |
55 |
| - self._prep = [ |
56 |
| - KNOWN_PREPROCESSING[step.name](tensor_name=ipt.name, **step.kwargs) |
57 |
| - for ipt in inputs |
58 |
| - for step in ipt.preprocessing or [] |
59 |
| - ] |
60 |
| - self._post = [ |
61 |
| - KNOWN_POSTPROCESSING.get(step.name)(tensor_name=out.name, **step.kwargs) |
62 |
| - for out in outputs |
63 |
| - for step in out.postprocessing or [] |
64 |
| - ] |
| 14 | + def __init__(self, tensor_specs: Union[List[nodes.InputTensor], List[nodes.OutputTensor]]): |
| 15 | + PRE: Literal["pre"] = "pre" |
| 16 | + POST: Literal["post"] = "post" |
| 17 | + proc_prefix: Optional[Literal["pre", "post"]] = None |
| 18 | + self._procs = [] |
| 19 | + for t in tensor_specs: |
| 20 | + if isinstance(t, nodes.InputTensor): |
| 21 | + steps = t.preprocessing or [] |
| 22 | + if proc_prefix is not None and proc_prefix != PRE: |
| 23 | + raise ValueError(f"Invalid mixed input/output tensor specs: {tensor_specs}") |
| 24 | + |
| 25 | + proc_prefix = PRE |
| 26 | + elif isinstance(t, nodes.OutputTensor): |
| 27 | + steps = t.postprocessing or [] |
| 28 | + if proc_prefix is not None and proc_prefix != POST: |
| 29 | + raise ValueError(f"Invalid mixed input/output tensor specs: {tensor_specs}") |
| 30 | + |
| 31 | + proc_prefix = POST |
| 32 | + else: |
| 33 | + raise NotImplementedError(t) |
| 34 | + |
| 35 | + for step in steps: |
| 36 | + self._procs.append(KNOWN_PROCESSING[proc_prefix][step.name](tensor_name=t.name, **step.kwargs)) |
65 | 37 |
|
66 | 38 | # There is a difference between pre-and-postprocessing:
|
67 | 39 | # Pre-processing always returns float32, because its output is consumed by the model.
|
68 | 40 | # Post-processing, however, should return the dtype that is specified in the model spec.
|
69 | 41 | # todo: cast dtype for inputs before preprocessing? or check dtype?
|
70 |
| - for out in outputs: |
71 |
| - self._post.append(EnsureDtype(tensor_name=out.name, dtype=out.data_type)) |
| 42 | + if proc_prefix == POST: |
| 43 | + for t in tensor_specs: |
| 44 | + self._procs.append(EnsureDtype(tensor_name=t.name, dtype=t.data_type)) |
72 | 45 |
|
73 |
| - self._req_input_stats = {s: self._collect_required_stats(self._prep, s) for s in SCOPES} |
74 |
| - self._req_output_stats = {s: self._collect_required_stats(self._post, s) for s in SCOPES} |
75 |
| - if self._req_output_stats[DATASET]: |
| 46 | + self.required_measures: RequiredMeasures = self._collect_required_measures(self._procs) |
| 47 | + if proc_prefix == POST and self.required_measures[PER_DATASET]: |
76 | 48 | raise NotImplementedError("computing statistics for output tensors per dataset is not yet implemented")
|
77 | 49 |
|
78 |
| - self._computed_dataset_stats: Optional[Dict[str, Dict[Measure, Any]]] = None |
79 |
| - |
80 |
| - self.input_tensor_names = [ipt.name for ipt in inputs] |
81 |
| - self.output_tensor_names = [out.name for out in outputs] |
82 |
| - assert not any(name in self.output_tensor_names for name in self.input_tensor_names) |
83 |
| - assert not any(name in self.input_tensor_names for name in self.output_tensor_names) |
84 |
| - |
85 |
| - @property |
86 |
| - def required_input_dataset_statistics(self) -> Dict[str, Set[Measure]]: |
87 |
| - return self._req_input_stats[DATASET] |
88 |
| - |
89 |
| - @property |
90 |
| - def required_output_dataset_statistics(self) -> Dict[str, Set[Measure]]: |
91 |
| - return self._req_output_stats[DATASET] |
92 |
| - |
93 |
| - @property |
94 |
| - def computed_dataset_statistics(self) -> Dict[str, Dict[Measure, Any]]: |
95 |
| - return self._computed_dataset_stats |
96 |
| - |
97 |
| - def apply_preprocessing( |
98 |
| - self, *input_tensors: xr.DataArray |
99 |
| - ) -> Tuple[List[xr.DataArray], Dict[str, Dict[Measure, Any]]]: |
100 |
| - assert len(input_tensors) == len(self.input_tensor_names) |
101 |
| - tensors = dict(zip(self.input_tensor_names, input_tensors)) |
102 |
| - sample_stats = self.compute_sample_statistics(tensors, self._req_input_stats[SAMPLE]) |
103 |
| - for proc in self._prep: |
104 |
| - proc.set_computed_sample_statistics(sample_stats) |
105 |
| - tensors[proc.tensor_name] = proc.apply(tensors[proc.tensor_name]) |
106 |
| - |
107 |
| - return [tensors[tn] for tn in self.input_tensor_names], sample_stats |
| 50 | + self.tensor_names = [t.name for t in tensor_specs] |
108 | 51 |
|
109 |
| - def apply_postprocessing( |
110 |
| - self, *output_tensors: xr.DataArray, input_sample_statistics: Dict[str, Dict[Measure, Any]] |
111 |
| - ) -> Tuple[List[xr.DataArray], Dict[str, Dict[Measure, Any]]]: |
112 |
| - assert len(output_tensors) == len(self.output_tensor_names) |
113 |
| - tensors = dict(zip(self.output_tensor_names, output_tensors)) |
114 |
| - sample_stats = { |
115 |
| - **input_sample_statistics, |
116 |
| - **self.compute_sample_statistics(tensors, self._req_output_stats[SAMPLE]), |
117 |
| - } |
118 |
| - for proc in self._post: |
119 |
| - proc.set_computed_sample_statistics(sample_stats) |
120 |
| - tensors[proc.tensor_name] = proc.apply(tensors[proc.tensor_name]) |
121 |
| - |
122 |
| - return [tensors[tn] for tn in self.output_tensor_names], sample_stats |
123 |
| - |
124 |
| - def set_computed_dataset_statistics(self, computed: Dict[str, Dict[Measure, Any]]): |
125 |
| - """ |
126 |
| - This method sets the externally computed dataset statistics. |
127 |
| - Which statistics are expected is specified by the `required_dataset_statistics` property. |
128 |
| - """ |
129 |
| - # always expect input tensor statistics |
130 |
| - for tensor_name, req_measures in self.required_input_dataset_statistics: |
131 |
| - comp_measures = computed.get(tensor_name, {}) |
132 |
| - for req_measure in req_measures: |
133 |
| - if req_measure not in comp_measures: |
134 |
| - raise ValueError(f"Missing required measure {req_measure} for input tensor {tensor_name}") |
135 |
| - |
136 |
| - # as output tensor statistics may initially not be available, we only warn about their absence |
137 |
| - output_statistics_missing = False |
138 |
| - for tensor_name, req_measures in self.required_output_dataset_statistics: |
139 |
| - comp_measures = computed.get(tensor_name, {}) |
140 |
| - for req_measure in req_measures: |
141 |
| - if req_measure not in comp_measures: |
142 |
| - output_statistics_missing = True |
143 |
| - warnings.warn(f"Missing required measure {req_measure} for output tensor {tensor_name}") |
144 |
| - |
145 |
| - self._computed_dataset_stats = computed |
146 |
| - |
147 |
| - # set dataset statistics for each processing step |
148 |
| - for proc in self._prep: |
149 |
| - proc.set_computed_dataset_statistics(self.computed_dataset_statistics) |
150 |
| - |
151 |
| - @classmethod |
152 |
| - def compute_sample_statistics( |
153 |
| - cls, tensors: Dict[str, xr.DataArray], measures: Dict[str, Set[Measure]] |
154 |
| - ) -> Dict[str, Dict[Measure, Any]]: |
155 |
| - return {tname: cls._compute_tensor_statistics(tensors[tname], ms) for tname, ms in measures.items()} |
| 52 | + def apply(self, sample: Sample, computed_measures: ComputedMeasures) -> None: |
| 53 | + for proc in self._procs: |
| 54 | + proc.set_computed_measures(computed_measures) |
| 55 | + sample[proc.tensor_name] = proc.apply(sample[proc.tensor_name]) |
156 | 56 |
|
157 | 57 | @staticmethod
|
158 |
| - def _compute_tensor_statistics(tensor: xr.DataArray, measures: Set[Measure]) -> Dict[Measure, Any]: |
159 |
| - ret = {} |
160 |
| - for measure in measures: |
161 |
| - ret[measure] = measure.compute(tensor) |
162 |
| - |
163 |
| - return ret |
164 |
| - |
165 |
| - @staticmethod |
166 |
| - def _collect_required_stats(proc: Sequence[Processing], scope: Literal["sample", "dataset"]): |
167 |
| - stats = defaultdict(set) |
| 58 | + def _collect_required_measures(proc: Sequence[Processing]) -> RequiredMeasures: |
| 59 | + ret: RequiredMeasures = {PER_SAMPLE: {}, PER_DATASET: {}} |
168 | 60 | for p in proc:
|
169 |
| - if scope == SAMPLE: |
170 |
| - req = p.get_required_sample_statistics() |
171 |
| - elif scope == DATASET: |
172 |
| - req = p.get_required_dataset_statistics() |
173 |
| - else: |
174 |
| - raise ValueError(scope) |
175 |
| - for tn, ms in req.items(): |
176 |
| - stats[tn].update(ms) |
| 61 | + for mode, ms_per_mode in p.get_required_measures().items(): |
| 62 | + for tn, ms_per_tn in ms_per_mode.items(): |
| 63 | + if tn not in ret[mode]: |
| 64 | + ret[mode][tn] = set() |
| 65 | + |
| 66 | + ret[mode][tn].update(ms_per_tn) |
177 | 67 |
|
178 |
| - return dict(stats) |
| 68 | + return ret |
0 commit comments