fix: complement with max distance (#469)

OpheliaMiralles · pre-commit-ci[bot] · web-flow · commit a8666fa26f97 · 2025-11-25T15:38:59.000Z
## Description - max_distance could not be passed as an argument to complement - complement with max_distance did not work as the nearest point is the shape of the source data when distance is inf (CKDTree) ## What problem does this change solve? Make complement usable with max_distance ## What issue or task does this change relate to? ## Additional notes ## ***As a contributor to the Anemoi framework, please ensure that your changes include unit tests, updates to any affected dependencies and documentation, and have been tested in a parallel setting (i.e., with multiple GPUs). As a reviewer, you are also responsible for verifying these aspects and requesting changes if they are not adequately addressed. For guidelines about those please refer to https://anemoi.readthedocs.io/en/latest/*** By opening this pull request, I affirm that all authors agree to the [Contributor License Agreement.](https://github.com/ecmwf/codex/blob/main/Legal/contributor_license_agreement.md) --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/src/anemoi/datasets/data/complement.py b/src/anemoi/datasets/data/complement.py
@@ -293,21 +293,29 @@ def _get_tuple(self, index: TupleIndex) -> NDArray[Any]:
         index, previous = update_tuple(index, variable_index, slice(None))
         source_index = [self._source.name_to_index[x] for x in self.variables[previous]]
         source_data = self._source[index[0], source_index, index[2], ...]
-        target_data = source_data[..., self._nearest_grid_points]
-
-        epsilon = 1e-8  # prevent division by zero
-        weights = 1.0 / (self._distances + epsilon)
-        weights = weights.astype(target_data.dtype)
-        weights /= weights.sum(axis=1, keepdims=True)  # normalize
-
-        # Reshape weights to broadcast correctly
-        # Add leading singleton dimensions so it matches target_data shape
-        while weights.ndim < target_data.ndim:
-            weights = np.expand_dims(weights, axis=0)
-
-        # Compute weighted average along the last dimension
-        final_point = np.sum(target_data * weights, axis=-1)
-        result = final_point[..., index[3]]
+        if any(self._nearest_grid_points >= source_data.shape[-1]):
+            target_shape = source_data.shape[:-1] + self._target.shape[-1:]
+            target_data = np.full(target_shape, np.nan, dtype=self._target.dtype)
+            cond = self._nearest_grid_points < source_data.shape[-1]
+            reachable = np.where(cond)[0]
+            nearest_reachable = self._nearest_grid_points[cond]
+            target_data[..., reachable] = source_data[..., nearest_reachable]
+            result = target_data[..., index[3]]
+        else:
+            target_data = source_data[..., self._nearest_grid_points]
+            epsilon = 1e-8  # prevent division by zero
+            weights = 1.0 / (self._distances + epsilon)
+            weights = weights.astype(target_data.dtype)
+            weights /= weights.sum(axis=1, keepdims=True)  # normalize
+
+            # Reshape weights to broadcast correctly
+            # Add leading singleton dimensions so it matches target_data shape
+            while weights.ndim < target_data.ndim:
+                weights = np.expand_dims(weights, axis=0)
+
+            # Compute weighted average along the last dimension
+            final_point = np.sum(target_data * weights, axis=-1)
+            result = final_point[..., index[3]]
 
         return apply_index_to_slices_changes(result, changes)
 
@@ -353,8 +361,9 @@ def complement_factory(args: tuple, kwargs: dict) -> Dataset:
     }[interpolation]
 
     if interpolation == "nearest":
-        k = kwargs.pop("k", "1")
-        complement = Class(target=target, source=source, k=k)._subset(**kwargs)
+        k = kwargs.pop("k", 1)
+        max_distance = kwargs.pop("max_distance", None)
+        complement = Class(target=target, source=source, k=k, max_distance=max_distance)._subset(**kwargs)
 
     else:
         complement = Class(target=target, source=source)._subset(**kwargs)