diff --git a/.gitignore b/.gitignore
index e12dc8bf8..c2006db35 100644
--- a/.gitignore
+++ b/.gitignore
@@ -91,3 +91,7 @@ venv.bak/
 
 # Mac OS Stuff
 .DS_Store
+
+# Running local tests
+/tmp
+/pysteps/tests/tmp/
diff --git a/ci/ci_test_env.yml b/ci/ci_test_env.yml
index 4c0b8732f..7857de61b 100644
--- a/ci/ci_test_env.yml
+++ b/ci/ci_test_env.yml
@@ -18,6 +18,7 @@ dependencies:
   - pillow
   - pyproj
   - scipy
+  - xarray
   # Optional dependencies
   - dask
   - pyfftw
diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py
index d6f868ef4..b2dc6843b 100644
--- a/pysteps/blending/steps.py
+++ b/pysteps/blending/steps.py
@@ -42,10 +42,9 @@
     calculate_weights_spn
     blend_means_sigmas
 """
-
 import math
 import time
-from copy import deepcopy
+from copy import copy, deepcopy
 from functools import partial
 from multiprocessing.pool import ThreadPool
 
@@ -65,1635 +64,2771 @@
 except ImportError:
     DASK_IMPORTED = False
 
+from dataclasses import dataclass, field
+from typing import Any, Callable
 
-def forecast(
-    precip,
-    precip_models,
-    velocity,
-    velocity_models,
-    timesteps,
-    timestep,
-    issuetime,
-    n_ens_members,
-    n_cascade_levels=6,
-    blend_nwp_members=False,
-    precip_thr=None,
-    norain_thr=0.0,
-    kmperpixel=None,
-    extrap_method="semilagrangian",
-    decomp_method="fft",
-    bandpass_filter_method="gaussian",
-    noise_method="nonparametric",
-    noise_stddev_adj=None,
-    ar_order=2,
-    vel_pert_method="bps",
-    weights_method="bps",
-    conditional=False,
-    probmatching_method="cdf",
-    mask_method="incremental",
-    resample_distribution=True,
-    smooth_radar_mask_range=0,
-    callback=None,
-    return_output=True,
-    seed=None,
-    num_workers=1,
-    fft_method="numpy",
-    domain="spatial",
-    outdir_path_skill="./tmp/",
-    extrap_kwargs=None,
-    filter_kwargs=None,
-    noise_kwargs=None,
-    vel_pert_kwargs=None,
-    clim_kwargs=None,
-    mask_kwargs=None,
-    measure_time=False,
-):
-    """
-    Generate a blended nowcast ensemble by using the Short-Term Ensemble
-    Prediction System (STEPS) method.
-
-    Parameters
-    ----------
-    precip: array-like
-      Array of shape (ar_order+1,m,n) containing the input precipitation fields
-      ordered by timestamp from oldest to newest. The time steps between the
-      inputs are assumed to be regular.
-    precip_models: array-like
-      Either raw (NWP) model forecast data or decomposed (NWP) model forecast data.
-      If you supply decomposed data, it needs to be an array of shape
-      (n_models,timesteps+1) containing, per timestep (t=0 to lead time here) and
-      per (NWP) model or model ensemble member, a dictionary with a list of cascades
-      obtained by calling a method implemented in :py:mod:`pysteps.cascade.decomposition`.
-      If you supply the original (NWP) model forecast data, it needs to be an array of shape
-      (n_models,timestep+1,m,n) containing precipitation (or other) fields, which will
-      then be decomposed in this function.
-
-      Depending on your use case it can be advantageous to decompose the model
-      forecasts outside beforehand, as this slightly reduces calculation times.
-      This is possible with :py:func:`pysteps.blending.utils.decompose_NWP`,
-      :py:func:`pysteps.blending.utils.compute_store_nwp_motion`, and
-      :py:func:`pysteps.blending.utils.load_NWP`. However, if you have a lot of (NWP) model
-      members (e.g. 1 model member per nowcast member), this can lead to excessive memory
-      usage.
-
-      To further reduce memory usage, both this array and the ``velocity_models`` array
-      can be given as float32. They will then be converted to float64 before computations
-      to minimize loss in precision.
+# TODO: compare old and new version of the code, run a benchmark to compare the two
+# TODO: look at the documentation and try to improve it, lots of things are now combined together
 
-      In case of one (deterministic) model as input, add an extra dimension to make sure
-      precip_models is four dimensional prior to calling this function.
-    velocity: array-like
-      Array of shape (2,m,n) containing the x- and y-components of the advection
-      field. The velocities are assumed to represent one time step between the
-      inputs. All values are required to be finite.
-    velocity_models: array-like
-      Array of shape (n_models,timestep,2,m,n) containing the x- and y-components
-      of the advection field for the (NWP) model field per forecast lead time.
-      All values are required to be finite.
 
-      To reduce memory usage, this array
-      can be given as float32. They will then be converted to float64 before computations
-      to minimize loss in precision.
-    timesteps: int or list of floats
-      Number of time steps to forecast or a list of time steps for which the
-      forecasts are computed (relative to the input time step). The elements of
-      the list are required to be in ascending order.
+@dataclass(frozen=True)
+class StepsBlendingConfig:
+    precip_threshold: float | None
+    norain_threshold: float
+    kmperpixel: float
     timestep: float
-      Time step of the motion vectors (minutes). Required if vel_pert_method is
-      not None or mask_method is 'incremental'.
-    issuetime: datetime
-      Datetime object containing the date and time for which the forecast
-      is issued.
     n_ens_members: int
-      The number of ensemble members to generate. This number should always be
-      equal to or larger than the number of NWP ensemble members / number of
-      NWP models.
-    n_cascade_levels: int, optional
-      The number of cascade levels to use. Defaults to 6,
-      see issue #385 on GitHub.
+    n_cascade_levels: int
     blend_nwp_members: bool
-      Check if NWP models/members should be used individually, or if all of
-      them are blended together per nowcast ensemble member. Standard set to
-      false.
-    precip_thr: float, optional
-      Specifies the threshold value for minimum observable precipitation
-      intensity. Required if mask_method is not None or conditional is True.
-    norain_thr: float, optional
-      Specifies the threshold value for the fraction of rainy (see above) pixels
-      in the radar rainfall field below which we consider there to be no rain.
-      Depends on the amount of clutter typically present.
-      Standard set to 0.0
-    kmperpixel: float, optional
-      Spatial resolution of the input data (kilometers/pixel). Required if
-      vel_pert_method is not None or mask_method is 'incremental'.
-    extrap_method: str, optional
-      Name of the extrapolation method to use. See the documentation of
-      :py:mod:`pysteps.extrapolation.interface`.
-    decomp_method: {'fft'}, optional
-      Name of the cascade decomposition method to use. See the documentation
-      of :py:mod:`pysteps.cascade.interface`.
-    bandpass_filter_method: {'gaussian', 'uniform'}, optional
-      Name of the bandpass filter method to use with the cascade decomposition.
-      See the documentation of :py:mod:`pysteps.cascade.interface`.
-    noise_method: {'parametric','nonparametric','ssft','nested',None}, optional
-      Name of the noise generator to use for perturbating the precipitation
-      field. See the documentation of :py:mod:`pysteps.noise.interface`. If set to None,
-      no noise is generated.
-    noise_stddev_adj: {'auto','fixed',None}, optional
-      Optional adjustment for the standard deviations of the noise fields added
-      to each cascade level. This is done to compensate incorrect std. dev.
-      estimates of casace levels due to presence of no-rain areas. 'auto'=use
-      the method implemented in :py:func:`pysteps.noise.utils.compute_noise_stddev_adjs`.
-      'fixed'= use the formula given in :cite:`BPS2006` (eq. 6), None=disable
-      noise std. dev adjustment.
-    ar_order: int, optional
-      The order of the autoregressive model to use. Must be >= 1.
-    vel_pert_method: {'bps',None}, optional
-      Name of the noise generator to use for perturbing the advection field. See
-      the documentation of :py:mod:`pysteps.noise.interface`. If set to None, the advection
-      field is not perturbed.
-    weights_method: {'bps','spn'}, optional
-      The calculation method of the blending weights. Options are the method
-      by :cite:`BPS2006` and the covariance-based method by :cite:`SPN2013`.
-      Defaults to bps.
-    conditional: bool, optional
-      If set to True, compute the statistics of the precipitation field
-      conditionally by excluding pixels where the values are below the threshold
-      precip_thr.
-    probmatching_method: {'cdf','mean',None}, optional
-      Method for matching the statistics of the forecast field with those of
-      the most recently observed one. 'cdf'=map the forecast CDF to the observed
-      one, 'mean'=adjust only the conditional mean value of the forecast field
-      in precipitation areas, None=no matching applied. Using 'mean' requires
-      that mask_method is not None.
-    mask_method: {'obs','incremental',None}, optional
-      The method to use for masking no precipitation areas in the forecast field.
-      The masked pixels are set to the minimum value of the observations.
-      'obs' = apply precip_thr to the most recently observed precipitation intensity
-      field, 'incremental' = iteratively buffer the mask with a certain rate
-      (currently it is 1 km/min), None=no masking.
-    resample_distribution: bool, optional
-        Method to resample the distribution from the extrapolation and NWP cascade as input
-        for the probability matching. Not resampling these distributions may lead to losing
-        some extremes when the weight of both the extrapolation and NWP cascade is similar.
-        Defaults to True.
-    smooth_radar_mask_range: int, Default is 0.
-      Method to smooth the transition between the radar-NWP-noise blend and the NWP-noise
-      blend near the edge of the radar domain (radar mask), where the radar data is either
-      not present anymore or is not reliable. If set to 0 (grid cells), this generates a
-      normal forecast without smoothing. To create a smooth mask, this range should be a
-      positive value, representing a buffer band of a number of pixels by which the mask
-      is cropped and smoothed. The smooth radar mask removes the hard edges between NWP
-      and radar in the final blended product. Typically, a value between 50 and 100 km
-      can be used. 80 km generally gives good results.
-    callback: function, optional
-      Optional function that is called after computation of each time step of
-      the nowcast. The function takes one argument: a three-dimensional array
-      of shape (n_ens_members,h,w), where h and w are the height and width
-      of the input field precip, respectively. This can be used, for instance,
-      writing the outputs into files.
-    return_output: bool, optional
-      Set to False to disable returning the outputs as numpy arrays. This can
-      save memory if the intermediate results are written to output files using
-      the callback function.
-    seed: int, optional
-      Optional seed number for the random generators.
-    num_workers: int, optional
-      The number of workers to use for parallel computation. Applicable if dask
-      is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it
-      is advisable to disable OpenMP by setting the environment variable
-      OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous
-      threads.
-    fft_method: str, optional
-      A string defining the FFT method to use (see FFT methods in
-      :py:func:`pysteps.utils.interface.get_method`).
-      Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed,
-      the recommended method is 'pyfftw'.
-    domain: {"spatial", "spectral"}
-      If "spatial", all computations are done in the spatial domain (the
-      classical STEPS model). If "spectral", the AR(2) models and stochastic
-      perturbations are applied directly in the spectral domain to reduce
-      memory footprint and improve performance :cite:`PCH2019b`.
-    outdir_path_skill: string, optional
-      Path to folder where the historical skill are stored. Defaults to
-      path_workdir from rcparams. If no path is given, './tmp' will be used.
-    extrap_kwargs: dict, optional
-      Optional dictionary containing keyword arguments for the extrapolation
-      method. See the documentation of :py:func:`pysteps.extrapolation.interface`.
-    filter_kwargs: dict, optional
-      Optional dictionary containing keyword arguments for the filter method.
-      See the documentation of :py:mod:`pysteps.cascade.bandpass_filters`.
-    noise_kwargs: dict, optional
-      Optional dictionary containing keyword arguments for the initializer of
-      the noise generator. See the documentation of :py:mod:`pysteps.noise.fftgenerators`.
-    vel_pert_kwargs: dict, optional
-      Optional dictionary containing keyword arguments 'p_par' and 'p_perp' for
-      the initializer of the velocity perturbator. The choice of the optimal
-      parameters depends on the domain and the used optical flow method.
-
-      Default parameters from :cite:`BPS2006`:
-      p_par  = [10.88, 0.23, -7.68]
-      p_perp = [5.76, 0.31, -2.72]
+    extrapolation_method: str
+    decomposition_method: str
+    bandpass_filter_method: str
+    noise_method: str | None
+    noise_stddev_adj: str | None
+    ar_order: int
+    velocity_perturbation_method: str | None
+    weights_method: str
+    conditional: bool
+    probmatching_method: str | None
+    mask_method: str | None
+    resample_distribution: bool
+    smooth_radar_mask_range: int
+    seed: int | None
+    num_workers: int
+    fft_method: str
+    domain: str
+    outdir_path_skill: str
+    extrapolation_kwargs: dict[str, Any] = field(default_factory=dict)
+    filter_kwargs: dict[str, Any] = field(default_factory=dict)
+    noise_kwargs: dict[str, Any] = field(default_factory=dict)
+    velocity_perturbation_kwargs: dict[str, Any] = field(default_factory=dict)
+    climatology_kwargs: dict[str, Any] = field(default_factory=dict)
+    mask_kwargs: dict[str, Any] = field(default_factory=dict)
+    measure_time: bool = False
+    callback: Any | None = None
+    return_output: bool = True
+
+
+@dataclass
+class StepsBlendingParams:
+    noise_std_coeffs: np.ndarray | None = None
+    bandpass_filter: Any | None = None
+    fft: Any | None = None
+    perturbation_generator: Callable[..., np.ndarray] | None = None
+    noise_generator: Callable[..., np.ndarray] | None = None
+    PHI: np.ndarray | None = None
+    extrapolation_method: Callable[..., Any] | None = None
+    decomposition_method: Callable[..., dict] | None = None
+    recomposition_method: Callable[..., np.ndarray] | None = None
+    velocity_perturbations: Any | None = None
+    generate_velocity_noise: Callable[[Any, float], np.ndarray] | None = None
+    velocity_perturbations_parallel: np.ndarray | None = None
+    velocity_perturbations_perpendicular: np.ndarray | None = None
+    fft_objs: list[Any] = field(default_factory=list)
+    mask_rim: int | None = None
+    struct: np.ndarray | None = None
+    time_steps_is_list: bool = False
+    precip_models_provided_is_cascade: bool = False
+    xy_coordinates: np.ndarray | None = None
+    precip_zerovalue: float | None = None
+    mask_threshold: np.ndarray | None = None
+    zero_precip_radar: bool = False
+    zero_precip_model_fields: bool = False
+    original_timesteps: list | np.ndarray | None = None
+    num_ensemble_workers: int | None = None
+    rho_nwp_models: np.ndarray | None = None
+    domain_mask: np.ndarray | None = None
+    filter_kwargs: dict | None = None
+    noise_kwargs: dict | None = None
+    velocity_perturbation_kwargs: dict | None = None
+    climatology_kwargs: dict | None = None
+    mask_kwargs: dict | None = None
+
+
+@dataclass
+class StepsBlendingState:
+    # Radar and noise states
+    precip_cascades: np.ndarray | None = None
+    precip_noise_input: np.ndarray | None = None
+    precip_noise_cascades: np.ndarray | None = None
+    precip_mean_noise: np.ndarray | None = None
+    precip_std_noise: np.ndarray | None = None
+
+    # Extrapolation states
+    mean_extrapolation: np.ndarray | None = None
+    std_extrapolation: np.ndarray | None = None
+    rho_extrap_cascade_prev: np.ndarray | None = None
+    rho_extrap_cascade: np.ndarray | None = None
+    precip_cascades_prev_subtimestep: np.ndarray | None = None
+    cascade_noise_prev_subtimestep: np.ndarray | None = None
+    precip_extrapolated_after_decomp: np.ndarray | None = None
+    noise_extrapolated_after_decomp: np.ndarray | None = None
+    precip_extrapolated_probability_matching: np.ndarray | None = None
+
+    # NWP model states
+    precip_models_cascades: np.ndarray | None = None
+    precip_models_cascades_timestep: np.ndarray | None = None
+    precip_models_timestep: np.ndarray | None = None
+    mean_models_timestep: np.ndarray | None = None
+    std_models_timestep: np.ndarray | None = None
+    velocity_models_timestep: np.ndarray | None = None
+
+    # Mapping from NWP members to ensemble members
+    mapping_list_NWP_member_to_ensemble_member: np.ndarray | None = None
+
+    # Random states for precipitation, motion and probmatching
+    randgen_precip: list[np.random.RandomState] | None = None
+    randgen_motion: list[np.random.RandomState] | None = None
+    randgen_probmatching: list[np.random.RandomState] | None = None
+
+    # Variables for final forecast computation
+    previous_displacement: list[Any] | None = None
+    previous_displacement_noise_cascade: list[Any] | None = None
+    previous_displacement_prob_matching: list[Any] | None = None
+    rho_final_blended_forecast: np.ndarray | None = None
+    final_blended_forecast_means: np.ndarray | None = None
+    final_blended_forecast_stds: np.ndarray | None = None
+    final_blended_forecast_means_mod_only: np.ndarray | None = None
+    final_blended_forecast_stds_mod_only: np.ndarray | None = None
+    final_blended_forecast_cascades: np.ndarray | None = None
+    final_blended_forecast_cascades_mod_only: np.ndarray | None = None
+    final_blended_forecast_recomposed: np.ndarray | None = None
+    final_blended_forecast_recomposed_mod_only: np.ndarray | None = None
+
+    # Final outputs
+    final_blended_forecast: np.ndarray | None = None
+    final_blended_forecast_non_perturbed: np.ndarray | None = None
+
+    # Timing and indexing
+    time_prev_timestep: list[float] | None = None
+    leadtime_since_start_forecast: list[float] | None = None
+    subtimesteps: list[float] | None = None
+    is_nowcast_time_step: bool | None = None
+    subtimestep_index: int | None = None
+
+    # Weights used for blending
+    weights: np.ndarray | None = None
+    weights_model_only: np.ndarray | None = None
+
+    # This is stores here as well because this is changed during the forecast loop and thus no longer part of the config
+    extrapolation_kwargs: dict[str, Any] = field(default_factory=dict)
+
+
+class StepsBlendingNowcaster:
+    def __init__(
+        self,
+        precip,
+        precip_models,
+        velocity,
+        velocity_models,
+        time_steps,
+        issue_time,
+        steps_blending_config: StepsBlendingConfig,
+    ):
+        """Initializes the StepsBlendingNowcaster with inputs and configurations."""
+        # Store inputs
+        self.__precip = precip
+        self.__precip_models = precip_models
+        self.__velocity = velocity
+        self.__velocity_models = velocity_models
+        self.__timesteps = time_steps
+        self.__issuetime = issue_time
+
+        self.__config = steps_blending_config
+
+        # Initialize Params and State
+        self.__params = StepsBlendingParams()
+        self.__state = StepsBlendingState()
+
+        # Additional variables for time measurement
+        self.__start_time_init = None
+        self.__zero_precip_time = None
+        self.__init_time = None
+        self.__mainloop_time = None
+
+    def compute_forecast(self):
+        self.__check_inputs()
+        self.__print_forecast_info()
+        # Measure time for initialization
+        if self.__config.measure_time:
+            self.__start_time_init = time.time()
+
+        # Slice the precipitation field to only use the last ar_order + 1 fields
+        self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy()
+        self.__initialize_nowcast_components()
+        self.__prepare_radar_and_NWP_fields()
+
+        # Determine if rain is present in both radar and NWP fields
+        if self.__params.zero_precip_radar and self.__params.zero_precip_model_fields:
+            return self.__zero_precipitation_forecast()
+        else:
+            # Prepare the data for the zero precipitation radar case and initialize the noise correctly
+            if self.__params.zero_precip_radar:
+                self.__prepare_nowcast_for_zero_radar()
+            else:
+                self.__state.precip_noise_input = self.__precip.copy()
+            self.__initialize_noise()
+            self.__estimate_ar_parameters_radar()
+            self.__multiply_precip_cascade_to_match_ensemble_members()
+            self.__initialize_random_generators()
+            self.__prepare_forecast_loop()
+            self.__initialize_noise_cascades()
+            if self.__config.measure_time:
+                self.__init_time = self.__measure_time(
+                    "initialization", self.__start_time_init
+                )
 
-      Parameters fitted to the data (optical flow/domain):
+            self.__blended_nowcast_main_loop()
+            # Stack and return the forecast output
+            if self.__config.return_output:
+                self.__state.final_blended_forecast = np.stack(
+                    [
+                        np.stack(self.__state.final_blended_forecast[j])
+                        for j in range(self.__config.n_ens_members)
+                    ]
+                )
+                if self.__config.measure_time:
+                    return (
+                        self.__state.final_blended_forecast,
+                        self.__init_time,
+                        self.__mainloop_time,
+                    )
+                else:
+                    return self.__state.final_blended_forecast
+            else:
+                return None
 
-      darts/fmi:
-      p_par  = [13.71259667, 0.15658963, -16.24368207]
-      p_perp = [8.26550355, 0.17820458, -9.54107834]
+    def __blended_nowcast_main_loop(self):
+        """
+        Main nowcast loop that iterates through the ensemble members and time steps
+        to generate forecasts.
+        """
+        ###
+        # 8. Start the forecasting loop
+        ###
+        # Isolate the last time slice of observed precipitation
+        self.__precip = self.__precip[-1, :, :]
+        print("Starting blended nowcast computation.")
 
-      darts/mch:
-      p_par  = [24.27562298, 0.11297186, -27.30087471]
-      p_perp = [-7.80797846e+01, -3.38641048e-02, 7.56715304e+01]
+        if self.__config.measure_time:
+            starttime_mainloop = time.time()
+        self.__state.extrapolation_kwargs["return_displacement"] = True
 
-      darts/fmi+mch:
-      p_par  = [16.55447057, 0.14160448, -19.24613059]
-      p_perp = [14.75343395, 0.11785398, -16.26151612]
+        self.__state.precip_cascades_prev_subtimestep = deepcopy(
+            self.__state.precip_cascades
+        )
+        self.__state.cascade_noise_prev_subtimestep = deepcopy(
+            self.__state.precip_noise_cascades
+        )
 
-      lucaskanade/fmi:
-      p_par  = [2.20837526, 0.33887032, -2.48995355]
-      p_perp = [2.21722634, 0.32359621, -2.57402761]
+        self.__state.time_prev_timestep = [
+            0.0 for j in range(self.__config.n_ens_members)
+        ]
+        self.__state.leadtime_since_start_forecast = [
+            0.0 for j in range(self.__config.n_ens_members)
+        ]
 
-      lucaskanade/mch:
-      p_par  = [2.56338484, 0.3330941, -2.99714349]
-      p_perp = [1.31204508, 0.3578426, -1.02499891]
+        # iterate each time step
+        for t, subtimestep_idx in enumerate(self.__timesteps):
+            self.__determine_subtimesteps_and_nowcast_time_step(t, subtimestep_idx)
+            if self.__config.measure_time:
+                starttime = time.time()
+            self.__decompose_nwp_if_needed_and_fill_nans_in_nwp(t)
+            self.__find_nowcast_NWP_combination(t)
+            self.__determine_skill_for_current_timestep(t)
+            # the nowcast iteration for each ensemble member
+            final_blended_forecast_all_members_one_timestep = [
+                None for _ in range(self.__config.n_ens_members)
+            ]
 
-      lucaskanade/fmi+mch:
-      p_par  = [2.31970635, 0.33734287, -2.64972861]
-      p_perp = [1.90769947, 0.33446594, -2.06603662]
+            def worker(j):
+                worker_state = copy(self.__state)
+                self.__determine_NWP_skill_for_next_timestep(t, j, worker_state)
+                self.__determine_weights_per_component(worker_state)
+                self.__regress_extrapolation_and_noise_cascades(j, worker_state)
+                self.__perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep(
+                    t, j, worker_state
+                )
+                # 8.5 Blend the cascades
+                final_blended_forecast_single_member = []
+                for t_sub in self.__state.subtimesteps:
+                    # TODO: does it make sense to use sub time steps - check if it works?
+                    if t_sub > 0:
+                        self.__blend_cascades(t_sub, j, worker_state)
+                        self.__recompose_cascade_to_rainfall_field(j, worker_state)
+                        final_blended_forecast_single_member = (
+                            self.__post_process_output(
+                                j, final_blended_forecast_single_member, worker_state
+                            )
+                        )
+                    final_blended_forecast_all_members_one_timestep[j] = (
+                        final_blended_forecast_single_member
+                    )
 
-      vet/fmi:
-      p_par  = [0.25337388, 0.67542291, 11.04895538]
-      p_perp = [0.02432118, 0.99613295, 7.40146505]
+            dask_worker_collection = []
 
-      vet/mch:
-      p_par  = [0.5075159, 0.53895212, 7.90331791]
-      p_perp = [0.68025501, 0.41761289, 4.73793581]
+            if DASK_IMPORTED and self.__config.n_ens_members > 1:
+                for j in range(self.__config.n_ens_members):
+                    dask_worker_collection.append(dask.delayed(worker)(j))
+                dask.compute(
+                    *dask_worker_collection,
+                    num_workers=self.__params.num_ensemble_workers,
+                )
+            else:
+                for j in range(self.__config.n_ens_members):
+                    worker(j)
 
-      vet/fmi+mch:
-      p_par  = [0.29495222, 0.62429207, 8.6804131 ]
-      p_perp = [0.23127377, 0.59010281, 5.98180004]
+            dask_worker_collection = None
 
-      fmi=Finland, mch=Switzerland, fmi+mch=both pooled into the same data set
+            if self.__state.is_nowcast_time_step:
+                if self.__config.measure_time:
+                    _ = self.__measure_time("subtimestep", starttime)
+                else:
+                    print("done.")
 
-      The above parameters have been fitten by using run_vel_pert_analysis.py
-      and fit_vel_pert_params.py located in the scripts directory.
+            if self.__config.callback is not None:
+                precip_forecast_final = np.stack(
+                    final_blended_forecast_all_members_one_timestep
+                )
+                if precip_forecast_final.shape[1] > 0:
+                    self.__config.callback(precip_forecast_final.squeeze())
 
-      See :py:mod:`pysteps.noise.motion` for additional documentation.
-    clim_kwargs: dict, optional
-      Optional dictionary containing keyword arguments for the climatological
-      skill file. Arguments can consist of: 'outdir_path', 'n_models'
-      (the number of NWP models) and 'window_length' (the minimum number of
-      days the clim file should have, otherwise the default is used).
-    mask_kwargs: dict
-      Optional dictionary containing mask keyword arguments 'mask_f' and
-      'mask_rim', the factor defining the the mask increment and the rim size,
-      respectively.
-      The mask increment is defined as mask_f*timestep/kmperpixel.
-    measure_time: bool
-      If set to True, measure, print and return the computation time.
+            if self.__config.return_output:
+                for j in range(self.__config.n_ens_members):
+                    self.__state.final_blended_forecast[j].extend(
+                        final_blended_forecast_all_members_one_timestep[j]
+                    )
 
-    Returns
-    -------
-    out: ndarray
-      If return_output is True, a four-dimensional array of shape
-      (n_ens_members,num_timesteps,m,n) containing a time series of forecast
-      precipitation fields for each ensemble member. Otherwise, a None value
-      is returned. The time series starts from t0+timestep, where timestep is
-      taken from the input precipitation fields precip. If measure_time is True, the
-      return value is a three-element tuple containing the nowcast array, the
-      initialization time of the nowcast generator and the time used in the
-      main loop (seconds).
+            final_blended_forecast_all_members_one_timestep = None
+        if self.__config.measure_time:
+            self.__mainloop_time = time.time() - starttime_mainloop
 
-    See also
-    --------
-    :py:mod:`pysteps.extrapolation.interface`, :py:mod:`pysteps.cascade.interface`,
-    :py:mod:`pysteps.noise.interface`, :py:func:`pysteps.noise.utils.compute_noise_stddev_adjs`
+    def __check_inputs(self):
+        """Validates the inputs and determines if the user provided raw forecasts or decomposed forecasts."""
+        # Check dimensions of precip
+        if self.__precip.ndim != 3:
+            raise ValueError(
+                "precip must be a three-dimensional array of shape (ar_order + 1, m, n)"
+            )
+        if self.__precip.shape[0] < self.__config.ar_order + 1:
+            raise ValueError(
+                f"precip must have at least {self.__config.ar_order + 1} time steps in the first dimension "
+                f"to match the autoregressive order (ar_order={self.__config.ar_order})"
+            )
 
-    References
-    ----------
-    :cite:`Seed2003`, :cite:`BPS2004`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b`
+        # Check dimensions of velocity
+        if self.__velocity.ndim != 3:
+            raise ValueError(
+                "velocity must be a three-dimensional array of shape (2, m, n)"
+            )
+        if self.__velocity_models.ndim != 5:
+            raise ValueError(
+                "velocity_models must be a five-dimensional array of shape (n_models, timestep, 2, m, n)"
+            )
+        if self.__velocity.shape[0] != 2 or self.__velocity_models.shape[2] != 2:
+            raise ValueError(
+                "velocity and velocity_models must have an x- and y-component, check the shape"
+            )
 
-    Notes
-    -----
-    1. The blending currently does not blend the beta-parameters in the parametric
-    noise method. It is recommended to use the non-parameteric noise method.
+        # Check that spatial dimensions match between precip and velocity
+        if self.__precip.shape[1:3] != self.__velocity.shape[1:3]:
+            raise ValueError(
+                f"Spatial dimensions of precip and velocity do not match: "
+                f"{self.__precip.shape[1:3]} vs {self.__velocity.shape[1:3]}"
+            )
+        # Check if the number of members in the precipitation models and velocity models match
+        if self.__precip_models.shape[0] != self.__velocity_models.shape[0]:
+            raise ValueError(
+                "The number of members in the precipitation models and velocity models must match"
+            )
 
-    2. If blend_nwp_members is True, the BPS2006 method for the weights is
-    suboptimal. It is recommended to use the SPN2013 method instead.
+        if isinstance(self.__timesteps, list):
+            self.__params.time_steps_is_list = True
+            if not sorted(self.__timesteps) == self.__timesteps:
+                raise ValueError(
+                    "timesteps is not in ascending order", self.__timesteps
+                )
+            if self.__precip_models.shape[1] != math.ceil(self.__timesteps[-1]) + 1:
+                raise ValueError(
+                    "precip_models does not contain sufficient lead times for this forecast"
+                )
+            self.__params.original_timesteps = [0] + list(self.__timesteps)
+            self.__timesteps = nowcast_utils.binned_timesteps(
+                self.__params.original_timesteps
+            )
+        else:
+            self.__params.time_steps_is_list = False
+            if self.__precip_models.shape[1] != self.__timesteps + 1:
+                raise ValueError(
+                    "precip_models does not contain sufficient lead times for this forecast"
+                )
+            self.__timesteps = list(range(self.__timesteps + 1))
 
-    3. Not yet implemented (and neither in the steps nowcasting module): The regression
-    of the lag-1 and lag-2 parameters to their climatological values. See also eq.
-    12 - 19 in :cite: `BPS2004`. By doing so, the Phi parameters change over time,
-    which enhances the AR process. This can become a future development if this
-    turns out to be a warranted functionality.
-    """
+        precip_nwp_dim = self.__precip_models.ndim
+        if precip_nwp_dim == 2:
+            if isinstance(self.__precip_models[0][0], dict):
+                # It's a 2D array of dictionaries with decomposed cascades
+                self.__params.precip_models_provided_is_cascade = True
+            else:
+                raise ValueError(
+                    "When precip_models has ndim == 2, it must contain dictionaries with decomposed cascades."
+                )
+        elif precip_nwp_dim == 4:
+            self.__params.precip_models_provided_is_cascade = False
+        else:
+            raise ValueError(
+                "precip_models must be either a two-dimensional array containing dictionaries with decomposed model fields"
+                "or a four-dimensional array containing the original (NWP) model forecasts"
+            )
+
+        if self.__config.extrapolation_kwargs is None:
+            self.__state.extrapolation_kwargs = dict()
+        else:
+            self.__state.extrapolation_kwargs = deepcopy(
+                self.__config.extrapolation_kwargs
+            )
+
+        if self.__config.filter_kwargs is None:
+            self.__params.filter_kwargs = dict()
+        else:
+            self.__params.filter_kwargs = deepcopy(self.__config.filter_kwargs)
+
+        if self.__config.noise_kwargs is None:
+            self.__params.noise_kwargs = dict()
+        else:
+            self.__params.noise_kwargs = deepcopy(self.__config.noise_kwargs)
 
-    # 0.1 Start with some checks
-    _check_inputs(precip, precip_models, velocity, velocity_models, timesteps, ar_order)
+        if self.__config.velocity_perturbation_kwargs is None:
+            self.__params.velocity_perturbation_kwargs = dict()
+        else:
+            self.__params.velocity_perturbation_kwargs = deepcopy(
+                self.__config.velocity_perturbation_kwargs
+            )
 
-    if extrap_kwargs is None:
-        extrap_kwargs = dict()
+        if self.__config.climatology_kwargs is None:
+            # Make sure clim_kwargs at least contains the number of models
+            self.__params.climatology_kwargs = dict(
+                {"n_models": self.__precip_models.shape[0]}
+            )
+        else:
+            self.__params.climatology_kwargs = deepcopy(
+                self.__config.climatology_kwargs
+            )
 
-    if filter_kwargs is None:
-        filter_kwargs = dict()
+        if self.__config.mask_kwargs is None:
+            self.__params.mask_kwargs = dict()
+        else:
+            self.__params.mask_kwargs = deepcopy(self.__config.mask_kwargs)
 
-    if noise_kwargs is None:
-        noise_kwargs = dict()
+        if np.any(~np.isfinite(self.__velocity)):
+            raise ValueError("velocity contains non-finite values")
 
-    if vel_pert_kwargs is None:
-        vel_pert_kwargs = dict()
+        if self.__config.mask_method not in ["obs", "incremental", None]:
+            raise ValueError(
+                "unknown mask method %s: must be 'obs', 'incremental' or None"
+                % self.__config.mask_method
+            )
 
-    if clim_kwargs is None:
-        # Make sure clim_kwargs at least contains the number of models
-        clim_kwargs = dict({"n_models": precip_models.shape[0]})
+        if self.__config.conditional and self.__config.precip_threshold is None:
+            raise ValueError("conditional=True but precip_thr is not set")
 
-    if mask_kwargs is None:
-        mask_kwargs = dict()
+        if (
+            self.__config.mask_method is not None
+            and self.__config.precip_threshold is None
+        ):
+            raise ValueError("mask_method!=None but precip_thr=None")
 
-    if np.any(~np.isfinite(velocity)):
-        raise ValueError("velocity contains non-finite values")
+        if self.__config.noise_stddev_adj not in ["auto", "fixed", None]:
+            raise ValueError(
+                "unknown noise_std_dev_adj method %s: must be 'auto', 'fixed', or None"
+                % self.__config.noise_stddev_adj
+            )
 
-    if mask_method not in ["obs", "incremental", None]:
-        raise ValueError(
-            "unknown mask method %s: must be 'obs', 'incremental' or None" % mask_method
-        )
+        if self.__config.kmperpixel is None:
+            if self.__config.velocity_perturbation_method is not None:
+                raise ValueError(
+                    "velocity_perturbation_method is set but kmperpixel=None"
+                )
+            if self.__config.mask_method == "incremental":
+                raise ValueError("mask_method='incremental' but kmperpixel=None")
 
-    if conditional and precip_thr is None:
-        raise ValueError("conditional=True but precip_thr is not set")
+        if self.__config.timestep is None:
+            if self.__config.velocity_perturbation_method is not None:
+                raise ValueError(
+                    "velocity_perturbation_method is set but timestep=None"
+                )
+            if self.__config.mask_method == "incremental":
+                raise ValueError("mask_method='incremental' but timestep=None")
 
-    if mask_method is not None and precip_thr is None:
-        raise ValueError("mask_method!=None but precip_thr=None")
+    def __print_forecast_info(self):
+        print("STEPS blending")
+        print("==============")
+        print("")
 
-    if noise_stddev_adj not in ["auto", "fixed", None]:
-        raise ValueError(
-            "unknown noise_std_dev_adj method %s: must be 'auto', 'fixed', or None"
-            % noise_stddev_adj
+        print("Inputs")
+        print("------")
+        print(f"forecast issue time:         {self.__issuetime.isoformat()}")
+        print(
+            f"input dimensions:            {self.__precip.shape[1]}x{self.__precip.shape[2]}"
+        )
+        if self.__config.kmperpixel is not None:
+            print(f"km/pixel:                    {self.__config.kmperpixel}")
+        if self.__config.timestep is not None:
+            print(f"time step:                   {self.__config.timestep} minutes")
+        print("")
+
+        print("NWP and blending inputs")
+        print("-----------------------")
+        print(f"number of (NWP) models:      {self.__precip_models.shape[0]}")
+        print(f"blend (NWP) model members:   {self.__config.blend_nwp_members}")
+        print(
+            f"decompose (NWP) models:      {'yes' if self.__precip_models.ndim == 4 else 'no'}"
+        )
+        print("")
+
+        print("Methods")
+        print("-------")
+        print(f"extrapolation:               {self.__config.extrapolation_method}")
+        print(f"bandpass filter:             {self.__config.bandpass_filter_method}")
+        print(f"decomposition:               {self.__config.decomposition_method}")
+        print(f"noise generator:             {self.__config.noise_method}")
+        print(
+            f"noise adjustment:            {'yes' if self.__config.noise_stddev_adj else 'no'}"
+        )
+        print(
+            f"velocity perturbator:        {self.__config.velocity_perturbation_method}"
+        )
+        print(f"blending weights method:     {self.__config.weights_method}")
+        print(
+            f"conditional statistics:      {'yes' if self.__config.conditional else 'no'}"
+        )
+        print(f"precip. mask method:         {self.__config.mask_method}")
+        print(f"probability matching:        {self.__config.probmatching_method}")
+        print(f"FFT method:                  {self.__config.fft_method}")
+        print(f"domain:                      {self.__config.domain}")
+        print("")
+
+        print("Parameters")
+        print("----------")
+        if isinstance(self.__timesteps, int):
+            print(f"number of time steps:        {self.__timesteps}")
+        else:
+            print(f"time steps:                  {self.__timesteps}")
+        print(f"ensemble size:               {self.__config.n_ens_members}")
+        print(f"parallel threads:            {self.__config.num_workers}")
+        print(f"number of cascade levels:    {self.__config.n_cascade_levels}")
+        print(f"order of the AR(p) model:    {self.__config.ar_order}")
+        if self.__config.velocity_perturbation_method == "bps":
+            self.__params.velocity_perturbations_parallel = (
+                self.__params.velocity_perturbation_kwargs.get(
+                    "p_par", noise.motion.get_default_params_bps_par()
+                )
+            )
+            self.__params.velocity_perturbations_perpendicular = (
+                self.__params.velocity_perturbation_kwargs.get(
+                    "p_perp", noise.motion.get_default_params_bps_perp()
+                )
+            )
+            print(
+                f"vel. pert., parallel:        {self.__params.velocity_perturbations_parallel[0]},{self.__params.velocity_perturbations_parallel[1]},{self.__params.velocity_perturbations_parallel[2]}"
+            )
+            print(
+                f"vel. pert., perpendicular:   {self.__params.velocity_perturbations_perpendicular[0]},{self.__params.velocity_perturbations_perpendicular[1]},{self.__params.velocity_perturbations_perpendicular[2]}"
+            )
+        else:
+            (
+                self.__params.velocity_perturbations_parallel,
+                self.__params.velocity_perturbations_perpendicular,
+            ) = (None, None)
+
+        if self.__config.conditional or self.__config.mask_method is not None:
+            print(f"precip. intensity threshold: {self.__config.precip_threshold}")
+        print(f"no-rain fraction threshold for radar: {self.__config.norain_threshold}")
+        print("")
+
+    def __initialize_nowcast_components(self):
+        """
+        Initialize the FFT, bandpass filters, decomposition methods, and extrapolation method.
+        """
+        # Initialize number of ensemble workers
+        self.__params.num_ensemble_workers = min(
+            self.__config.n_ens_members, self.__config.num_workers
         )
 
-    if kmperpixel is None:
-        if vel_pert_method is not None:
-            raise ValueError("vel_pert_method is set but kmperpixel=None")
-        if mask_method == "incremental":
-            raise ValueError("mask_method='incremental' but kmperpixel=None")
-
-    if timestep is None:
-        if vel_pert_method is not None:
-            raise ValueError("vel_pert_method is set but timestep=None")
-        if mask_method == "incremental":
-            raise ValueError("mask_method='incremental' but timestep=None")
-
-    # 0.2 Log some settings
-    print("STEPS blending")
-    print("==============")
-    print("")
-
-    print("Inputs")
-    print("------")
-    print(f"forecast issue time:         {issuetime.isoformat()}")
-    print(f"input dimensions:            {precip.shape[1]}x{precip.shape[2]}")
-    if kmperpixel is not None:
-        print(f"km/pixel:                    {kmperpixel}")
-    if timestep is not None:
-        print(f"time step:                   {timestep} minutes")
-    print("")
-
-    print("NWP and blending inputs")
-    print("-----------------------")
-    print(f"number of (NWP) models:      {precip_models.shape[0]}")
-    print(f"blend (NWP) model members:   {blend_nwp_members}")
-    print(f"decompose (NWP) models:      {'yes' if precip_models.ndim == 4 else 'no'}")
-    print("")
-
-    print("Methods")
-    print("-------")
-    print(f"extrapolation:               {extrap_method}")
-    print(f"bandpass filter:             {bandpass_filter_method}")
-    print(f"decomposition:               {decomp_method}")
-    print(f"noise generator:             {noise_method}")
-    print(f"noise adjustment:            {'yes' if noise_stddev_adj else 'no'}")
-    print(f"velocity perturbator:        {vel_pert_method}")
-    print(f"blending weights method:     {weights_method}")
-    print(f"conditional statistics:      {'yes' if conditional else 'no'}")
-    print(f"precip. mask method:         {mask_method}")
-    print(f"probability matching:        {probmatching_method}")
-    print(f"FFT method:                  {fft_method}")
-    print(f"domain:                      {domain}")
-    print("")
-
-    print("Parameters")
-    print("----------")
-    if isinstance(timesteps, int):
-        print(f"number of time steps:        {timesteps}")
-    else:
-        print(f"time steps:                  {timesteps}")
-    print(f"ensemble size:               {n_ens_members}")
-    print(f"parallel threads:            {num_workers}")
-    print(f"number of cascade levels:    {n_cascade_levels}")
-    print(f"order of the AR(p) model:    {ar_order}")
-    if vel_pert_method == "bps":
-        vp_par = vel_pert_kwargs.get("p_par", noise.motion.get_default_params_bps_par())
-        vp_perp = vel_pert_kwargs.get(
-            "p_perp", noise.motion.get_default_params_bps_perp()
+        M, N = self.__precip.shape[1:]  # Extract the spatial dimensions (height, width)
+
+        # Initialize FFT method
+        self.__params.fft = utils.get_method(
+            self.__config.fft_method, shape=(M, N), n_threads=self.__config.num_workers
         )
-        print(f"vel. pert., parallel:        {vp_par[0]},{vp_par[1]},{vp_par[2]}")
-        print(f"vel. pert., perpendicular:   {vp_perp[0]},{vp_perp[1]},{vp_perp[2]}")
-    else:
-        vp_par, vp_perp = None, None
 
-    if conditional or mask_method is not None:
-        print(f"precip. intensity threshold: {precip_thr}")
-    print(f"no-rain fraction threshold for radar: {norain_thr}")
-    print("")
+        # Initialize the band-pass filter for the cascade decomposition
+        filter_method = cascade.get_method(self.__config.bandpass_filter_method)
+        self.__params.bandpass_filter = filter_method(
+            (M, N),
+            self.__config.n_cascade_levels,
+            **(self.__params.filter_kwargs or {}),
+        )
 
-    # 0.3 Get the methods that will be used
-    num_ensemble_workers = n_ens_members if num_workers > n_ens_members else num_workers
+        # Get the decomposition method (e.g., FFT)
+        (
+            self.__params.decomposition_method,
+            self.__params.recomposition_method,
+        ) = cascade.get_method(self.__config.decomposition_method)
 
-    if measure_time:
-        starttime_init = time.time()
+        # Get the extrapolation method (e.g., semilagrangian)
+        self.__params.extrapolation_method = extrapolation.get_method(
+            self.__config.extrapolation_method
+        )
 
-    fft = utils.get_method(fft_method, shape=precip.shape[1:], n_threads=num_workers)
+        # Generate the mesh grid for spatial coordinates
+        x_values, y_values = np.meshgrid(np.arange(N), np.arange(M))
+        self.__params.xy_coordinates = np.stack([x_values, y_values])
 
-    precip_shape = precip.shape[1:]
+        self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy()
+        # Determine the domain mask from non-finite values in the precipitation data
+        self.__params.domain_mask = np.logical_or.reduce(
+            [~np.isfinite(self.__precip[i, :]) for i in range(self.__precip.shape[0])]
+        )
 
-    # initialize the band-pass filter
-    filter_method = cascade.get_method(bandpass_filter_method)
-    bp_filter = filter_method(precip_shape, n_cascade_levels, **filter_kwargs)
+        print("Blended nowcast components initialized successfully.")
 
-    decompositor, recompositor = cascade.get_method(decomp_method)
+    def __prepare_radar_and_NWP_fields(self):
+        # determine the precipitation threshold mask
+        if self.__config.conditional:
+            # TODO: is this logical_and correct here? Now only those places where precip is in all images is saved?
+            self.__params.mask_threshold = np.logical_and.reduce(
+                [
+                    self.__precip[i, :, :] >= self.__config.precip_threshold
+                    for i in range(self.__precip.shape[0])
+                ]
+            )
+        else:
+            self.__params.mask_threshold = None
 
-    extrapolator = extrapolation.get_method(extrap_method)
+        # we need to know the zerovalue of precip to replace the mask when decomposing after
+        # extrapolation
+        self.__params.precip_zerovalue = np.nanmin(self.__precip)
 
-    x_values, y_values = np.meshgrid(
-        np.arange(precip.shape[2]), np.arange(precip.shape[1])
-    )
+        # 1. Start with the radar rainfall fields. We want the fields in a
+        # Lagrangian space
 
-    xy_coords = np.stack([x_values, y_values])
+        # Advect the previous precipitation fields to the same position with the
+        # most recent one (i.e. transform them into the Lagrangian coordinates).
 
-    precip = precip[-(ar_order + 1) :, :, :].copy()
+        self.__state.extrapolation_kwargs["xy_coords"] = self.__params.xy_coordinates
+        res = []
 
-    # determine the domain mask from non-finite values
-    domain_mask = np.logical_or.reduce(
-        [~np.isfinite(precip[i, :]) for i in range(precip.shape[0])]
-    )
+        # TODO: create better names here for this part, adapted from previous code which is now inlined (old function was called _transform_to_lagrangian)
+        def f(precip, i):
+            return self.__params.extrapolation_method(
+                precip[i, :, :],
+                self.__velocity,
+                self.__config.ar_order - i,
+                "min",
+                allow_nonfinite_values=True,
+                **self.__state.extrapolation_kwargs.copy(),
+            )[-1]
 
-    # determine the precipitation threshold mask
-    if conditional:
-        MASK_thr = np.logical_and.reduce(
-            [precip[i, :, :] >= precip_thr for i in range(precip.shape[0])]
-        )
-    else:
-        MASK_thr = None
+        if not DASK_IMPORTED:
+            # Process each earlier precipitation field directly
+            for i in range(self.__config.ar_order):
+                self.__precip[i, :, :] = f(self.__precip, i)
+        else:
+            # Use Dask delayed for parallelization if DASK_IMPORTED is True
+            for i in range(self.__config.ar_order):
+                res.append(dask.delayed(f)(self.__precip, i))
+            num_workers_ = (
+                len(res)
+                if self.__config.num_workers > len(res)
+                else self.__config.num_workers
+            )
+            self.__precip = np.stack(
+                list(dask.compute(*res, num_workers=num_workers_))
+                + [self.__precip[-1, :, :]]
+            )
 
-    # we need to know the zerovalue of precip to replace the mask when decomposing after extrapolation
-    zerovalue = np.nanmin(precip)
+        # Replace non-finite values with the minimum value for each field
+        self.__precip = self.__precip.copy()
+        for i in range(self.__precip.shape[0]):
+            self.__precip[i, ~np.isfinite(self.__precip[i, :])] = np.nanmin(
+                self.__precip[i, :]
+            )
 
-    # 1. Start with the radar rainfall fields. We want the fields in a
-    # Lagrangian space
-    precip = _transform_to_lagrangian(
-        precip, velocity, ar_order, xy_coords, extrapolator, extrap_kwargs, num_workers
-    )
+        # 2. Perform the cascade decomposition for the input precip fields and,
+        # if necessary, for the (NWP) model fields
+        # 2.1 Compute the cascade decompositions of the input precipitation fields
+        # Compute the cascade decompositions of the input precipitation fields.
+        precip_forecast_decomp = []
+        for i in range(self.__config.ar_order + 1):
+            precip_forecast = self.__params.decomposition_method(
+                self.__precip[i, :, :],
+                self.__params.bandpass_filter,
+                mask=self.__params.mask_threshold,
+                fft_method=self.__params.fft,
+                output_domain=self.__config.domain,
+                normalize=True,
+                compute_stats=True,
+                compact_output=True,
+            )
+            precip_forecast_decomp.append(precip_forecast)
 
-    # 2. Perform the cascade decomposition for the input precip fields and
-    # and, if necessary, for the (NWP) model fields
-    # 2.1 Compute the cascade decompositions of the input precipitation fields
-    (
-        precip_cascade,
-        mu_extrapolation,
-        sigma_extrapolation,
-    ) = _compute_cascade_decomposition_radar(
-        precip,
-        ar_order,
-        n_cascade_levels,
-        n_ens_members,
-        MASK_thr,
-        domain,
-        bp_filter,
-        decompositor,
-        fft,
-    )
+        # Rearrange the cascaded into a four-dimensional array of shape
+        # (n_cascade_levels,ar_order+1,m,n) for the autoregressive model
+        self.__state.precip_cascades = nowcast_utils.stack_cascades(
+            precip_forecast_decomp, self.__config.n_cascade_levels
+        )
 
-    # 2.2 If necessary, recompose (NWP) model forecasts
-    precip_models_cascade = None
+        precip_forecast_decomp = precip_forecast_decomp[-1]
+        self.__state.mean_extrapolation = np.array(precip_forecast_decomp["means"])
+        self.__state.std_extrapolation = np.array(precip_forecast_decomp["stds"])
+
+        # 2.2 If necessary, recompose (NWP) model forecasts
+        self.__state.precip_models_cascades = None
+
+        if self.__params.precip_models_provided_is_cascade:
+            self.__state.precip_models_cascades = self.__precip_models
+            # Inline logic of _compute_cascade_recomposition_nwp
+            temp_precip_models = []
+            for i in range(self.__precip_models.shape[0]):
+                precip_model = []
+                for time_step in range(self.__precip_models.shape[1]):
+                    # Use the recomposition method to rebuild the rainfall fields
+                    recomposed = self.__params.recomposition_method(
+                        self.__precip_models[i, time_step]
+                    )
+                    precip_model.append(recomposed)
+                temp_precip_models.append(precip_model)
 
-    if precip_models.ndim != 4:
-        precip_models_cascade = precip_models
-        precip_models = _compute_cascade_recomposition_nwp(precip_models, recompositor)
+            self.__precip_models = np.stack(temp_precip_models)
 
-    # 2.3 Check for zero input fields in the radar and NWP data.
-    zero_precip_radar = blending.utils.check_norain(precip, precip_thr, norain_thr)
-    # The norain fraction threshold used for nwp is the default value of 0.0,
-    # since nwp does not suffer from clutter.
-    zero_model_fields = blending.utils.check_norain(
-        precip_models, precip_thr, norain_thr
-    )
+        # 2.3 Check for zero input fields in the radar and NWP data.
+        self.__params.zero_precip_radar = blending.utils.check_norain(
+            self.__precip,
+            self.__config.precip_threshold,
+            self.__config.norain_threshold,
+        )
+        # The norain fraction threshold used for nwp is the default value of 0.0,
+        # since nwp does not suffer from clutter.
+        self.__params.zero_precip_model_fields = blending.utils.check_norain(
+            self.__precip_models,
+            self.__config.precip_threshold,
+            self.__config.norain_threshold,
+        )
 
-    if isinstance(timesteps, int):
-        timesteps = list(range(timesteps + 1))
-        timestep_type = "int"
-    else:
-        original_timesteps = [0] + list(timesteps)
-        timesteps = nowcast_utils.binned_timesteps(original_timesteps)
-        timestep_type = "list"
-
-    # 2.3.1 If precip is below the norain threshold and precip_models is zero,
-    # we consider it as no rain in the domain.
-    # The forecast will directly return an array filled with the minimum
-    # value present in precip (which equals zero rainfall in the used
-    # transformation)
-    if zero_precip_radar and zero_model_fields:
+    def __zero_precipitation_forecast(self):
         print(
             "No precipitation above the threshold found in both the radar and NWP fields"
         )
         print("The resulting forecast will contain only zeros")
         # Create the output list
-        R_f = [[] for j in range(n_ens_members)]
+        precip_forecast = [[] for j in range(self.__config.n_ens_members)]
 
         # Save per time step to ensure the array does not become too large if
         # no return_output is requested and callback is not None.
-        for t, subtimestep_idx in enumerate(timesteps):
+        for t, subtimestep_idx in enumerate(self.__timesteps):
             # If the timestep is not the first one, we need to provide the zero forecast
             if t > 0:
                 # Create an empty np array with shape [n_ens_members, rows, cols]
                 # and fill it with the minimum value from precip (corresponding to
                 # zero precipitation)
-                R_f_ = np.full(
-                    (n_ens_members, precip_shape[0], precip_shape[1]), np.nanmin(precip)
+                N, M = self.__precip.shape[1:]
+                precip_forecast_workers = np.full(
+                    (self.__config.n_ens_members, N, M), self.__params.precip_zerovalue
                 )
                 if subtimestep_idx:
-                    if callback is not None:
-                        if R_f_.shape[1] > 0:
-                            callback(R_f_.squeeze())
-                    if return_output:
-                        for j in range(n_ens_members):
-                            R_f[j].append(R_f_[j])
-
-                R_f_ = None
-
-        if measure_time:
-            zero_precip_time = time.time() - starttime_init
-
-        if return_output:
-            outarr = np.stack([np.stack(R_f[j]) for j in range(n_ens_members)])
-            if measure_time:
-                return outarr, zero_precip_time, zero_precip_time
+                    if self.__config.callback is not None:
+                        if precip_forecast_workers.shape[1] > 0:
+                            self.__config.callback(precip_forecast_workers.squeeze())
+                    if self.__config.return_output:
+                        for j in range(self.__config.n_ens_members):
+                            precip_forecast[j].append(precip_forecast_workers[j])
+                precip_forecast_workers = None
+
+        if self.__config.measure_time:
+            self.__zero_precip_time = time.time() - self.__start_time_init
+
+        if self.__config.return_output:
+            precip_forecast_all_members_all_times = np.stack(
+                [
+                    np.stack(precip_forecast[j])
+                    for j in range(self.__config.n_ens_members)
+                ]
+            )
+
+            if self.__config.measure_time:
+                return (
+                    precip_forecast_all_members_all_times,
+                    self.__zero_precip_time,
+                    self.__zero_precip_time,
+                )
             else:
-                return outarr
+                return precip_forecast_all_members_all_times
         else:
             return None
 
-    else:
+    def __prepare_nowcast_for_zero_radar(self):
         # 2.3.3 If zero_precip_radar, make sure that precip_cascade does not contain
         # only nans or infs. If so, fill it with the zero value.
-        if zero_precip_radar:
-            # Look for a timestep and member with rain so that we have a sensible decomposition
-            done = False
-            for t in timesteps:
-                if done:
-                    break
-                for j in range(precip_models.shape[0]):
-                    if not blending.utils.check_norain(
-                        precip_models[j, t], precip_thr, norain_thr
-                    ):
-                        if precip_models_cascade is not None:
-                            precip_cascade[~np.isfinite(precip_cascade)] = np.nanmin(
-                                precip_models_cascade[j, t]["cascade_levels"]
-                            )
-                            continue
-                        precip_models_cascade_temp = decompositor(
-                            precip_models[j, t, :, :],
-                            bp_filter=bp_filter,
-                            fft_method=fft,
-                            output_domain=domain,
-                            normalize=True,
-                            compute_stats=True,
-                            compact_output=True,
-                        )["cascade_levels"]
-                        precip_cascade[~np.isfinite(precip_cascade)] = np.nanmin(
-                            precip_models_cascade_temp
+
+        # Look for a timestep and member with rain so that we have a sensible decomposition
+        done = False
+        for t in self.__timesteps:
+            if done:
+                break
+            for j in range(self.__precip_models.shape[0]):
+                if not blending.utils.check_norain(
+                    self.__precip_models[j, t],
+                    self.__config.precip_threshold,
+                    self.__config.norain_threshold,
+                ):
+                    if self.__state.precip_models_cascades is not None:
+                        self.__state.precip_cascades[
+                            ~np.isfinite(self.__state.precip_cascades)
+                        ] = np.nanmin(
+                            self.__state.precip_models_cascades[j, t]["cascade_levels"]
                         )
-                        done = True
-                        break
+                        continue
+                    precip_models_cascade_timestep = self.__params.decomposition_method(
+                        self.__precip_models[j, t, :, :],
+                        bp_filter=self.__params.bandpass_filter,
+                        fft_method=self.__params.fft,
+                        output_domain=self.__config.domain,
+                        normalize=True,
+                        compute_stats=True,
+                        compact_output=True,
+                    )["cascade_levels"]
+                    self.__state.precip_cascades[
+                        ~np.isfinite(self.__state.precip_cascades)
+                    ] = np.nanmin(precip_models_cascade_timestep)
+                    done = True
+                    break
 
         # 2.3.5 If zero_precip_radar is True, only use the velocity field of the NWP
         # forecast. I.e., velocity (radar) equals velocity_model at the first time
         # step.
-        if zero_precip_radar:
-            # Use the velocity from velocity_models at time step 0
-            velocity = velocity_models[:, 0, :, :, :].astype(np.float64, copy=False)
-            # Take the average over the first axis, which corresponds to n_models
-            # (hence, the model average)
-            velocity = np.mean(velocity, axis=0)
+        # Use the velocity from velocity_models at time step 0
+        self.__velocity = self.__velocity_models[:, 0, :, :, :].astype(
+            np.float64, copy=False
+        )
+        # Take the average over the first axis, which corresponds to n_models
+        # (hence, the model average)
+        self.__velocity = np.mean(self.__velocity, axis=0)
 
         # 3. Initialize the noise method.
         # If zero_precip_radar is True, initialize noise based on the NWP field time
         # step where the fraction of rainy cells is highest (because other lead times
         # might be zero as well). Else, initialize the noise with the radar
         # rainfall data
-        if zero_precip_radar:
-            precip_noise_input = _determine_max_nr_rainy_cells_nwp(
-                precip_models, precip_thr, precip_models.shape[0], timesteps
-            )
-            # Make sure precip_noise_input is three dimensional
-            if len(precip_noise_input.shape) != 3:
-                precip_noise_input = precip_noise_input[np.newaxis, :, :]
-        else:
-            precip_noise_input = precip.copy()
-
-        pp, generate_noise, noise_std_coeffs = _init_noise(
-            precip_noise_input,
-            precip_thr,
-            n_cascade_levels,
-            bp_filter,
-            decompositor,
-            fft,
-            noise_method,
-            noise_kwargs,
-            noise_stddev_adj,
-            measure_time,
-            num_workers,
-            seed,
-        )
-        precip_noise_input = None
-
-        # 4. Estimate AR parameters for the radar rainfall field
-        PHI = _estimate_ar_parameters_radar(
-            precip_cascade,
-            ar_order,
-            n_cascade_levels,
-            MASK_thr,
-            zero_precip_radar,
+        # Initialize noise based on the NWP field time step where the fraction of rainy cells is highest
+        if self.__config.precip_threshold is None:
+            self.__config.precip_threshold = np.nanmin(self.__precip_models)
+
+        max_rain_pixels = -1
+        max_rain_pixels_j = -1
+        max_rain_pixels_t = -1
+        for j in range(self.__precip_models.shape[0]):
+            for t in self.__timesteps:
+                rain_pixels = self.__precip_models[j][t][
+                    self.__precip_models[j][t] > self.__config.precip_threshold
+                ].size
+                if rain_pixels > max_rain_pixels:
+                    max_rain_pixels = rain_pixels
+                    max_rain_pixels_j = j
+                    max_rain_pixels_t = t
+        self.__state.precip_noise_input = self.__precip_models[max_rain_pixels_j][
+            max_rain_pixels_t
+        ]
+        self.__state.precip_noise_input = self.__state.precip_noise_input.astype(
+            np.float64, copy=False
         )
 
-        # 5. Repeat precip_cascade for n ensemble members
-        # First, discard all except the p-1 last cascades because they are not needed
-        # for the AR(p) model
+        # Make sure precip_noise_input is three-dimensional
+        if len(self.__state.precip_noise_input.shape) != 3:
+            self.__state.precip_noise_input = self.__state.precip_noise_input[
+                np.newaxis, :, :
+            ]
 
-        precip_cascade = np.stack(
-            [[precip_cascade[i][-ar_order:].copy() for i in range(n_cascade_levels)]]
-            * n_ens_members
-        )
+    def __initialize_noise(self):
+        """Initialize the noise method."""
+        if self.__config.noise_method is not None:
+            # get methods for perturbations
+            init_noise, self.__params.noise_generator = noise.get_method(
+                self.__config.noise_method
+            )
 
-        # 6. Initialize all the random generators and prepare for the forecast loop
-        randgen_prec, vps, generate_vel_noise, randgen_probmatching = (
-            _init_random_generators(
-                velocity,
-                noise_method,
-                probmatching_method,
-                vel_pert_method,
-                vp_par,
-                vp_perp,
-                seed,
-                n_ens_members,
-                kmperpixel,
-                timestep,
+            # initialize the perturbation generator for the precipitation field
+            self.__params.perturbation_generator = init_noise(
+                self.__state.precip_noise_input,
+                fft_method=self.__params.fft,
+                **self.__params.noise_kwargs,
             )
-        )
-        D, D_Yn, D_pb, R_f, R_m, mask_rim, struct, fft_objs = _prepare_forecast_loop(
-            precip_cascade,
-            noise_method,
-            fft_method,
-            n_cascade_levels,
-            n_ens_members,
-            mask_method,
-            mask_kwargs,
-            timestep,
-            kmperpixel,
-        )
 
-        # Also initialize the cascade of temporally correlated noise, which has the
-        # same shape as precip_cascade, but starts random noise.
-        noise_cascade, mu_noise, sigma_noise = _init_noise_cascade(
-            shape=precip_cascade.shape,
-            n_ens_members=n_ens_members,
-            n_cascade_levels=n_cascade_levels,
-            generate_noise=generate_noise,
-            decompositor=decompositor,
-            pp=pp,
-            randgen_prec=randgen_prec,
-            fft_objs=fft_objs,
-            bp_filter=bp_filter,
-            domain=domain,
-            noise_method=noise_method,
-            noise_std_coeffs=noise_std_coeffs,
-            ar_order=ar_order,
-        )
+            if self.__config.noise_stddev_adj == "auto":
+                print("Computing noise adjustment coefficients... ", end="", flush=True)
+                if self.__config.measure_time:
+                    starttime = time.time()
+
+                precip_forecast_min = np.min(self.__state.precip_noise_input)
+                self.__params.noise_std_coeffs = noise.utils.compute_noise_stddev_adjs(
+                    self.__state.precip_noise_input[-1, :, :],
+                    self.__config.precip_threshold,
+                    precip_forecast_min,
+                    self.__params.bandpass_filter,
+                    self.__params.decomposition_method,
+                    self.__params.perturbation_generator,
+                    self.__params.noise_generator,
+                    20,
+                    conditional=True,
+                    num_workers=self.__config.num_workers,
+                    seed=self.__config.seed,
+                )
 
-        precip = precip[-1, :, :]
+                if self.__config.measure_time:
+                    _ = self.__measure_time("Initialize noise", starttime)
+                else:
+                    print("done.")
+            elif self.__config.noise_stddev_adj == "fixed":
+                f = lambda k: 1.0 / (0.75 + 0.09 * k)
+                self.__params.noise_std_coeffs = [
+                    f(k) for k in range(1, self.__config.n_cascade_levels + 1)
+                ]
+            else:
+                self.__params.noise_std_coeffs = np.ones(self.__config.n_cascade_levels)
 
-        # 7. initizalize the current and previous extrapolation forecast scale
-        # for the nowcasting component
-        rho_extr_prev = np.repeat(1.0, PHI.shape[0])
-        rho_extr = PHI[:, 0] / (1.0 - PHI[:, 1])  # phi1 / (1 - phi2), see BPS2004
+            if self.__config.noise_stddev_adj is not None:
+                print(f"noise std. dev. coeffs:   {self.__params.noise_std_coeffs}")
 
-        if measure_time:
-            init_time = time.time() - starttime_init
+        else:
+            self.__params.perturbation_generator = None
+            self.__params.noise_generator = None
+            self.__params.noise_std_coeffs = None
 
-        ###
-        # 8. Start the forecasting loop
-        ###
-        print("Starting blended nowcast computation.")
+    def __estimate_ar_parameters_radar(self):
+        # 4. Estimate AR parameters for the radar rainfall field
+        """Estimate AR parameters for the radar rainfall field."""
+        # If there are values in the radar fields, compute the auto-correlations
+        GAMMA = np.empty((self.__config.n_cascade_levels, self.__config.ar_order))
+        if not self.__params.zero_precip_radar:
+            # compute lag-l temporal auto-correlation coefficients for each cascade level
+            for i in range(self.__config.n_cascade_levels):
+                GAMMA[i, :] = correlation.temporal_autocorrelation(
+                    self.__state.precip_cascades[i], mask=self.__params.mask_threshold
+                )
 
-        if measure_time:
-            starttime_mainloop = time.time()
+        # Else, use standard values for the auto-correlations
+        else:
+            # Get the climatological lag-1 and lag-2 auto-correlation values from Table 2
+            # in `BPS2004`.
+            # Hard coded, change to own (climatological) values when present.
+            # TODO: add user warning here so users can be aware of this without reading the code?
+            GAMMA = np.array(
+                [
+                    [0.99805, 0.9925, 0.9776, 0.9297, 0.796, 0.482, 0.079, 0.0006],
+                    [0.9933, 0.9752, 0.923, 0.750, 0.367, 0.069, 0.0018, 0.0014],
+                ]
+            )
 
-        extrap_kwargs["return_displacement"] = True
+            # Check whether the number of cascade_levels is correct
+            if GAMMA.shape[1] > self.__config.n_cascade_levels:
+                GAMMA = GAMMA[:, 0 : self.__config.n_cascade_levels]
+            elif GAMMA.shape[1] < self.__config.n_cascade_levels:
+                # Get the number of cascade levels that is missing
+                n_extra_lev = self.__config.n_cascade_levels - GAMMA.shape[1]
+                # Append the array with correlation values of 10e-4
+                GAMMA = np.append(
+                    GAMMA,
+                    [np.repeat(0.0006, n_extra_lev), np.repeat(0.0014, n_extra_lev)],
+                    axis=1,
+                )
 
-        forecast_prev = deepcopy(precip_cascade)
-        noise_prev = deepcopy(noise_cascade)
+            # Finally base GAMMA.shape[0] on the AR-level
+            if self.__config.ar_order == 1:
+                GAMMA = GAMMA[0, :]
+            if self.__config.ar_order > 2:
+                for _ in range(self.__config.ar_order - 2):
+                    GAMMA = np.vstack((GAMMA, GAMMA[1, :]))
+
+            # Finally, transpose GAMMA to ensure that the shape is the same as np.empty((n_cascade_levels, ar_order))
+            GAMMA = GAMMA.transpose()
+            assert GAMMA.shape == (
+                self.__config.n_cascade_levels,
+                self.__config.ar_order,
+            )
 
-        t_prev = [0.0 for j in range(n_ens_members)]
-        t_total = [0.0 for j in range(n_ens_members)]
+        # Print the GAMMA value
+        nowcast_utils.print_corrcoefs(GAMMA)
 
-        # iterate each time step
-        for t, subtimestep_idx in enumerate(timesteps):
-            if timestep_type == "list":
-                subtimesteps = [original_timesteps[t_] for t_ in subtimestep_idx]
-            else:
-                subtimesteps = [t]
+        if self.__config.ar_order == 2:
+            # adjust the lag-2 correlation coefficient to ensure that the AR(p)
+            # process is stationary
+            for i in range(self.__config.n_cascade_levels):
+                GAMMA[i, 1] = autoregression.adjust_lag2_corrcoef2(
+                    GAMMA[i, 0], GAMMA[i, 1]
+                )
 
-            if (timestep_type == "list" and subtimesteps) or (
-                timestep_type == "int" and t > 0
-            ):
-                is_nowcast_time_step = True
-            else:
-                is_nowcast_time_step = False
+        # estimate the parameters of the AR(p) model from the auto-correlation
+        # coefficients
+        self.__params.PHI = np.empty(
+            (self.__config.n_cascade_levels, self.__config.ar_order + 1)
+        )
+        for i in range(self.__config.n_cascade_levels):
+            self.__params.PHI[i, :] = autoregression.estimate_ar_params_yw(GAMMA[i, :])
 
-            if is_nowcast_time_step:
-                print(
-                    f"Computing nowcast for time step {t}... ",
-                    end="",
-                    flush=True,
-                )
+        nowcast_utils.print_ar_params(self.__params.PHI)
 
-            if measure_time:
-                starttime = time.time()
+    def __multiply_precip_cascade_to_match_ensemble_members(self):
+        # 5. Repeat precip_cascade for n ensemble members
+        # First, discard all except the p-1 last cascades because they are not needed
+        # for the AR(p) model
 
-            if precip_models_cascade is not None:
-                decomp_precip_models = list(precip_models_cascade[:, t])
-            else:
-                if precip_models.shape[0] == 1:
-                    decomp_precip_models = [
-                        decompositor(
-                            precip_models[0, t, :, :],
-                            bp_filter=bp_filter,
-                            fft_method=fft,
-                            output_domain=domain,
-                            normalize=True,
-                            compute_stats=True,
-                            compact_output=True,
-                        )
-                    ]
-                else:
-                    with ThreadPool(num_workers) as pool:
-                        decomp_precip_models = pool.map(
-                            partial(
-                                decompositor,
-                                bp_filter=bp_filter,
-                                fft_method=fft,
-                                output_domain=domain,
-                                normalize=True,
-                                compute_stats=True,
-                                compact_output=True,
-                            ),
-                            list(precip_models[:, t, :, :]),
-                        )
+        self.__state.precip_cascades = np.stack(
+            [
+                [
+                    self.__state.precip_cascades[i][-self.__config.ar_order :].copy()
+                    for i in range(self.__config.n_cascade_levels)
+                ]
+            ]
+            * self.__config.n_ens_members
+        )
 
-            precip_models_cascade_temp = np.array(
-                [decomp["cascade_levels"] for decomp in decomp_precip_models]
-            )
-            mu_models_temp = np.array(
-                [decomp["means"] for decomp in decomp_precip_models]
-            )
-            sigma_models_temp = np.array(
-                [decomp["stds"] for decomp in decomp_precip_models]
-            )
+    def __initialize_random_generators(self):
+        # 6. Initialize all the random generators and prepare for the forecast loop
+        """Initialize all the random generators."""
+        seed = self.__config.seed
+        if self.__config.noise_method is not None:
+            self.__state.randgen_precip = []
+            for j in range(self.__config.n_ens_members):
+                rs = np.random.RandomState(seed)
+                self.__state.randgen_precip.append(rs)
+                seed = rs.randint(0, high=1e9)
+
+        if self.__config.probmatching_method is not None:
+            self.__state.randgen_probmatching = []
+            for j in range(self.__config.n_ens_members):
+                rs = np.random.RandomState(seed)
+                self.__state.randgen_probmatching.append(rs)
+                seed = rs.randint(0, high=1e9)
+
+        if self.__config.velocity_perturbation_method is not None:
+            self.__state.randgen_motion = []
+            for j in range(self.__config.n_ens_members):
+                rs = np.random.RandomState(seed)
+                self.__state.randgen_motion.append(rs)
+                seed = rs.randint(0, high=1e9)
 
-            # 2.3.4 Check if the NWP fields contain nans or infinite numbers. If so,
-            # fill these with the minimum value present in precip (corresponding to
-            # zero rainfall in the radar observations)
-            (
-                precip_models_cascade_temp,
-                precip_models_temp,
-                mu_models_temp,
-                sigma_models_temp,
-            ) = _fill_nans_infs_nwp_cascade(
-                precip_models_cascade_temp,
-                precip_models[:, t, :, :].astype(np.float64, copy=False),
-                precip_cascade,
-                precip,
-                mu_models_temp,
-                sigma_models_temp,
-            )
-
-            # 8.1.1 Before calling the worker for the forecast loop, determine which (NWP)
-            # models will be combined with which nowcast ensemble members. With the
-            # way it is implemented at this moment: n_ens_members of the output equals
-            # the maximum number of (ensemble) members in the input (either the nowcasts or NWP).
             (
-                precip_models_cascade_temp,
-                precip_models_temp,
-                velocity_models_temp,
-                mu_models_temp,
-                sigma_models_temp,
-                n_model_indices,
-            ) = _find_nwp_combination(
-                precip_models_cascade_temp,
-                precip_models_temp,
-                velocity_models[:, t, :, :, :].astype(np.float64, copy=False),
-                mu_models_temp,
-                sigma_models_temp,
-                n_ens_members,
-                ar_order,
-                n_cascade_levels,
-                blend_nwp_members,
-            )
-
-            # If zero_precip_radar is True, set the velocity field equal to the NWP
-            # velocity field for the current time step (velocity_models_temp).
-            if zero_precip_radar:
-                # Use the velocity from velocity_models and take the average over
-                # n_models (axis=0)
-                velocity = np.mean(velocity_models_temp, axis=0)
-
-            if t == 0:
-                # 8.1.2 Calculate the initial skill of the (NWP) model forecasts at t=0
-                rho_nwp_models = _compute_initial_nwp_skill(
-                    precip_cascade,
-                    precip_models_cascade_temp,
-                    domain_mask,
-                    issuetime,
-                    outdir_path_skill,
-                    clim_kwargs,
-                )
-
-            if t > 0:
-                # 8.1.3 Determine the skill of the components for lead time (t0 + t)
-                # First for the extrapolation component. Only calculate it when t > 0.
-                (
-                    rho_extr,
-                    rho_extr_prev,
-                ) = blending.skill_scores.lt_dependent_cor_extrapolation(
-                    PHI=PHI, correlations=rho_extr, correlations_prev=rho_extr_prev
+                init_velocity_noise,
+                self.__params.generate_velocity_noise,
+            ) = noise.get_method(self.__config.velocity_perturbation_method)
+
+            # initialize the perturbation generators for the motion field
+            self.__params.velocity_perturbations = []
+            for j in range(self.__config.n_ens_members):
+                kwargs = {
+                    "randstate": self.__state.randgen_motion[j],
+                    "p_par": self.__params.velocity_perturbations_parallel,
+                    "p_perp": self.__params.velocity_perturbations_perpendicular,
+                }
+                vp_ = init_velocity_noise(
+                    self.__velocity,
+                    1.0 / self.__config.kmperpixel,
+                    self.__config.timestep,
+                    **kwargs,
                 )
+                self.__params.velocity_perturbations.append(vp_)
+        else:
+            (
+                self.__params.velocity_perturbations,
+                self.__params.generate_velocity_noise,
+            ) = (None, None)
+
+    def __prepare_forecast_loop(self):
+        """Prepare for the forecast loop."""
+        # Empty arrays for the previous displacements and the forecast cascade
+        self.__state.previous_displacement = np.stack(
+            [None for j in range(self.__config.n_ens_members)]
+        )
+        self.__state.previous_displacement_noise_cascade = np.stack(
+            [None for j in range(self.__config.n_ens_members)]
+        )
+        self.__state.previous_displacement_prob_matching = np.stack(
+            [None for j in range(self.__config.n_ens_members)]
+        )
+        self.__state.final_blended_forecast = [
+            [] for j in range(self.__config.n_ens_members)
+        ]
+
+        if self.__config.mask_method == "incremental":
+            # get mask parameters
+            self.__params.mask_rim = self.__params.mask_kwargs.get("mask_rim", 10)
+            mask_f = self.__params.mask_kwargs.get("mask_f", 1.0)
+            # initialize the structuring element
+            struct = generate_binary_structure(2, 1)
+            # iterate it to expand it nxn
+            n = mask_f * self.__config.timestep / self.__config.kmperpixel
+            self.__params.struct = iterate_structure(struct, int((n - 1) / 2.0))
+        else:
+            self.__params.mask_rim, self.__params.struct = None, None
 
-            # the nowcast iteration for each ensemble member
-            R_f_ = [None for _ in range(n_ens_members)]
+        if self.__config.noise_method is None:
+            self.__state.final_blended_forecast_non_perturbed = [
+                self.__state.precip_cascades[0][i].copy()
+                for i in range(self.__config.n_cascade_levels)
+            ]
+        else:
+            self.__state.final_blended_forecast_non_perturbed = None
+
+        self.__params.fft_objs = []
+        for i in range(self.__config.n_ens_members):
+            self.__params.fft_objs.append(
+                utils.get_method(
+                    self.__config.fft_method,
+                    shape=self.__state.precip_cascades.shape[-2:],
+                )
+            )
 
-            def worker(j):
-                # 8.1.2 Determine the skill of the nwp components for lead time (t0 + t)
-                # Then for the model components
-                if blend_nwp_members:
-                    rho_nwp_fc = [
-                        blending.skill_scores.lt_dependent_cor_nwp(
-                            lt=(t * int(timestep)),
-                            correlations=rho_nwp_models[n_model],
-                            outdir_path=outdir_path_skill,
-                            n_model=n_model,
-                            skill_kwargs=clim_kwargs,
-                        )
-                        for n_model in range(rho_nwp_models.shape[0])
+        # initizalize the current and previous extrapolation forecast scale for the nowcasting component
+        # phi1 / (1 - phi2), see BPS2004
+        self.__state.rho_extrap_cascade_prev = np.repeat(
+            1.0, self.__params.PHI.shape[0]
+        )
+        self.__state.rho_extrap_cascade = self.__params.PHI[:, 0] / (
+            1.0 - self.__params.PHI[:, 1]
+        )
+
+    def __initialize_noise_cascades(self):
+        """Initialize the noise cascade with identical noise for all AR(n) steps
+        We also need to return the mean and standard deviations of the noise
+        for the recombination of the noise before advecting it.
+        """
+        self.__state.precip_noise_cascades = np.zeros(
+            self.__state.precip_cascades.shape
+        )
+        self.__state.precip_mean_noise = np.zeros(
+            (self.__config.n_ens_members, self.__config.n_cascade_levels)
+        )
+        self.__state.precip_std_noise = np.zeros(
+            (self.__config.n_ens_members, self.__config.n_cascade_levels)
+        )
+        if self.__config.noise_method:
+            for j in range(self.__config.n_ens_members):
+                epsilon = self.__params.noise_generator(
+                    self.__params.perturbation_generator,
+                    randstate=self.__state.randgen_precip[j],
+                    fft_method=self.__params.fft_objs[j],
+                    domain=self.__config.domain,
+                )
+                epsilon_decomposed = self.__params.decomposition_method(
+                    epsilon,
+                    self.__params.bandpass_filter,
+                    fft_method=self.__params.fft_objs[j],
+                    input_domain=self.__config.domain,
+                    output_domain=self.__config.domain,
+                    compute_stats=True,
+                    normalize=True,
+                    compact_output=True,
+                )
+                self.__state.precip_mean_noise[j] = epsilon_decomposed["means"]
+                self.__state.precip_std_noise[j] = epsilon_decomposed["stds"]
+                for i in range(self.__config.n_cascade_levels):
+                    epsilon_temp = epsilon_decomposed["cascade_levels"][i]
+                    epsilon_temp *= self.__params.noise_std_coeffs[i]
+                    for n in range(self.__config.ar_order):
+                        self.__state.precip_noise_cascades[j][i][n] = epsilon_temp
+                epsilon_decomposed = None
+                epsilon_temp = None
+
+    def __determine_subtimesteps_and_nowcast_time_step(self, t, subtimestep_idx):
+        if self.__params.time_steps_is_list:
+            self.__state.subtimesteps = [
+                self.__params.original_timesteps[t_] for t_ in subtimestep_idx
+            ]
+        else:
+            self.__state.subtimesteps = [t]
+
+        if (self.__params.time_steps_is_list and self.__state.subtimesteps) or (
+            not self.__params.time_steps_is_list and t > 0
+        ):
+            self.__state.is_nowcast_time_step = True
+        else:
+            self.__state.is_nowcast_time_step = False
+
+        if self.__state.is_nowcast_time_step:
+            print(
+                f"Computing nowcast for time step {t}... ",
+                end="",
+                flush=True,
+            )
+
+    def __decompose_nwp_if_needed_and_fill_nans_in_nwp(self, t):
+        if self.__state.precip_models_cascades is not None:
+            decomp_precip_models = list(self.__state.precip_models_cascades[:, t])
+
+        else:
+            if self.__precip_models.shape[0] == 1:
+                decomp_precip_models = [
+                    self.__params.decomposition_method(
+                        self.__precip_models[0, t, :, :],
+                        bp_filter=self.__params.bandpass_filter,
+                        fft_method=self.__params.fft,
+                        output_domain=self.__config.domain,
+                        normalize=True,
+                        compute_stats=True,
+                        compact_output=True,
+                    )
+                ]
+            else:
+                with ThreadPool(self.__config.num_workers) as pool:
+                    decomp_precip_models = pool.map(
+                        partial(
+                            self.__params.decomposition_method,
+                            bp_filter=self.__params.bandpass_filter,
+                            fft_method=self.__params.fft,
+                            output_domain=self.__config.domain,
+                            normalize=True,
+                            compute_stats=True,
+                            compact_output=True,
+                        ),
+                        list(self.__precip_models[:, t, :, :]),
+                    )
+
+        self.__state.precip_models_cascades_timestep = np.array(
+            [decomp["cascade_levels"] for decomp in decomp_precip_models]
+        )
+        self.__state.mean_models_timestep = np.array(
+            [decomp["means"] for decomp in decomp_precip_models]
+        )
+        self.__state.std_models_timestep = np.array(
+            [decomp["stds"] for decomp in decomp_precip_models]
+        )
+
+        # 2.3.4 Check if the NWP fields contain nans or infinite numbers. If so,
+        # fill these with the minimum value present in precip (corresponding to
+        # zero rainfall in the radar observations)
+
+        # Ensure that the NWP cascade and fields do no contain any nans or infinite number
+        # Fill nans and infinite numbers with the minimum value present in precip
+        self.__state.precip_models_timestep = self.__precip_models[:, t, :, :].astype(
+            np.float64, copy=False
+        )  # (corresponding to zero rainfall in the radar observations)
+        min_cascade = np.nanmin(self.__state.precip_cascades)
+        min_precip = np.nanmin(self.__precip)
+        self.__state.precip_models_cascades_timestep[
+            ~np.isfinite(self.__state.precip_models_cascades_timestep)
+        ] = min_cascade
+        self.__state.precip_models_timestep[
+            ~np.isfinite(self.__state.precip_models_timestep)
+        ] = min_precip
+        # Also set any nans or infs in the mean and sigma of the cascade to
+        # respectively 0.0 and 1.0
+        self.__state.mean_models_timestep[
+            ~np.isfinite(self.__state.mean_models_timestep)
+        ] = 0.0
+        self.__state.std_models_timestep[
+            ~np.isfinite(self.__state.std_models_timestep)
+        ] = 0.0
+
+    def __find_nowcast_NWP_combination(self, t):
+        # 8.1.1 Before calling the worker for the forecast loop, determine which (NWP)
+        # models will be combined with which nowcast ensemble members. With the
+        # way it is implemented at this moment: n_ens_members of the output equals
+        # the maximum number of (ensemble) members in the input (either the nowcasts or NWP).
+
+        """Determine which (NWP) models will be combined with which nowcast ensemble members.
+        With the way it is implemented at this moment: n_ens_members of the output equals
+        the maximum number of (ensemble) members in the input (either the nowcasts or NWP).
+        """
+        self.__state.velocity_models_timestep = self.__velocity_models[
+            :, t, :, :, :
+        ].astype(np.float64, copy=False)
+        # Make sure the number of model members is not larger than or equal to n_ens_members
+        n_model_members = self.__state.precip_models_cascades_timestep.shape[0]
+        if n_model_members > self.__config.n_ens_members:
+            raise ValueError(
+                "The number of NWP model members is larger than the given number of ensemble members. n_model_members <= n_ens_members."
+            )
+
+        # Check if NWP models/members should be used individually, or if all of
+        # them are blended together per nowcast ensemble member.
+        if self.__config.blend_nwp_members:
+            self.__state.mapping_list_NWP_member_to_ensemble_member = None
+
+        else:
+            # Start with determining the maximum and mimimum number of members/models
+            # in both input products
+            n_ens_members_max = max(self.__config.n_ens_members, n_model_members)
+            n_ens_members_min = min(self.__config.n_ens_members, n_model_members)
+            # Also make a list of the model index numbers. These indices are needed
+            # for indexing the right climatological skill file when pysteps calculates
+            # the blended forecast in parallel.
+            if n_model_members > 1:
+                self.__state.mapping_list_NWP_member_to_ensemble_member = np.arange(
+                    n_model_members
+                )
+            else:
+                self.__state.mapping_list_NWP_member_to_ensemble_member = [0]
+
+            # Now, repeat the nowcast ensemble members or the nwp models/members until
+            # it has the same amount of members as n_ens_members_max. For instance, if
+            # you have 10 ensemble nowcasts members and 3 NWP members, the output will
+            # be an ensemble of 10 members. Hence, the three NWP members are blended
+            # with the first three members of the nowcast (member one with member one,
+            # two with two, etc.), subsequently, the same NWP members are blended with
+            # the next three members (NWP member one with member 4, NWP member 2 with
+            # member 5, etc.), until 10 is reached.
+            if n_ens_members_min != n_ens_members_max:
+                if n_model_members == 1:
+                    self.__state.precip_models_cascades_timestep = np.repeat(
+                        self.__state.precip_models_cascades_timestep,
+                        n_ens_members_max,
+                        axis=0,
+                    )
+                    self.__state.mean_models_timestep = np.repeat(
+                        self.__state.mean_models_timestep, n_ens_members_max, axis=0
+                    )
+                    self.__state.std_models_timestep = np.repeat(
+                        self.__state.std_models_timestep, n_ens_members_max, axis=0
+                    )
+                    self.__state.velocity_models_timestep = np.repeat(
+                        self.__state.velocity_models_timestep, n_ens_members_max, axis=0
+                    )
+                    # For the prob. matching
+                    self.__state.precip_models_timestep = np.repeat(
+                        self.__state.precip_models_timestep, n_ens_members_max, axis=0
+                    )
+                    # Finally, for the model indices
+                    self.__state.mapping_list_NWP_member_to_ensemble_member = np.repeat(
+                        self.__state.mapping_list_NWP_member_to_ensemble_member,
+                        n_ens_members_max,
+                        axis=0,
+                    )
+
+                elif n_model_members == n_ens_members_min:
+                    repeats = [
+                        (n_ens_members_max + i) // n_ens_members_min
+                        for i in range(n_ens_members_min)
                     ]
-                    rho_nwp_fc = np.stack(rho_nwp_fc)
-                    # Concatenate rho_extr and rho_nwp
-                    rho_fc = np.concatenate((rho_extr[None, :], rho_nwp_fc), axis=0)
-                else:
-                    rho_nwp_fc = blending.skill_scores.lt_dependent_cor_nwp(
-                        lt=(t * int(timestep)),
-                        correlations=rho_nwp_models[j],
-                        outdir_path=outdir_path_skill,
-                        n_model=n_model_indices[j],
-                        skill_kwargs=clim_kwargs,
+                    self.__state.precip_models_cascades_timestep = np.repeat(
+                        self.__state.precip_models_cascades_timestep,
+                        repeats,
+                        axis=0,
                     )
-                    # Concatenate rho_extr and rho_nwp
-                    rho_fc = np.concatenate(
-                        (rho_extr[None, :], rho_nwp_fc[None, :]), axis=0
+                    self.__state.mean_models_timestep = np.repeat(
+                        self.__state.mean_models_timestep, repeats, axis=0
                     )
+                    self.__state.std_models_timestep = np.repeat(
+                        self.__state.std_models_timestep, repeats, axis=0
+                    )
+                    self.__state.velocity_models_timestep = np.repeat(
+                        self.__state.velocity_models_timestep, repeats, axis=0
+                    )
+                    # For the prob. matching
+                    self.__state.precip_models_timestep = np.repeat(
+                        self.__state.precip_models_timestep, repeats, axis=0
+                    )
+                    # Finally, for the model indices
+                    self.__state.mapping_list_NWP_member_to_ensemble_member = np.repeat(
+                        self.__state.mapping_list_NWP_member_to_ensemble_member,
+                        repeats,
+                        axis=0,
+                    )
+
+    def __determine_skill_for_current_timestep(self, t):
+        if t == 0:
+            # Calculate the initial skill of the (NWP) model forecasts at t=0.
+            self.__params.rho_nwp_models = []
+            for model_index in range(
+                self.__state.precip_models_cascades_timestep.shape[0]
+            ):
+                rho_value = blending.skill_scores.spatial_correlation(
+                    obs=self.__state.precip_cascades[0, :, -1, :, :].copy(),
+                    mod=self.__state.precip_models_cascades_timestep[
+                        model_index, :, :, :
+                    ].copy(),
+                    domain_mask=self.__params.domain_mask,
+                )
+                self.__params.rho_nwp_models.append(rho_value)
+            self.__params.rho_nwp_models = np.stack(self.__params.rho_nwp_models)
 
-                # 8.2 Determine the weights per component
-
-                # Weights following the bps method. These are needed for the velocity
-                # weights prior to the advection step. If weights method spn is
-                # selected, weights will be overwritten with those weights prior to
-                # blending step.
-                # weight = [(extr_field, n_model_fields, noise), n_cascade_levels, ...]
-                weights = calculate_weights_bps(rho_fc)
-
-                # The model only weights
-                if weights_method == "bps":
-                    # Determine the weights of the components without the extrapolation
-                    # cascade, in case this is no data or outside the mask.
-                    weights_model_only = calculate_weights_bps(rho_fc[1:, :])
-                elif weights_method == "spn":
-                    # Only the weights of the components without the extrapolation
-                    # cascade will be determined here. The full set of weights are
-                    # determined after the extrapolation step in this method.
-                    if blend_nwp_members and precip_models_cascade_temp.shape[0] > 1:
-                        weights_model_only = np.zeros(
-                            (precip_models_cascade_temp.shape[0] + 1, n_cascade_levels)
+            # Ensure that the model skill decreases with increasing scale level.
+            for model_index in range(
+                self.__state.precip_models_cascades_timestep.shape[0]
+            ):
+                for i in range(
+                    1, self.__state.precip_models_cascades_timestep.shape[1]
+                ):
+                    if (
+                        self.__params.rho_nwp_models[model_index, i]
+                        > self.__params.rho_nwp_models[model_index, i - 1]
+                    ):
+                        # Set it equal to the previous scale level
+                        self.__params.rho_nwp_models[model_index, i] = (
+                            self.__params.rho_nwp_models[model_index, i - 1]
                         )
-                        for i in range(n_cascade_levels):
-                            # Determine the normalized covariance matrix (containing)
-                            # the cross-correlations between the models
-                            cov = np.corrcoef(
-                                np.stack(
-                                    [
-                                        precip_models_cascade_temp[
-                                            n_model, i, :, :
-                                        ].flatten()
-                                        for n_model in range(
-                                            precip_models_cascade_temp.shape[0]
-                                        )
+
+            # Save this in the climatological skill file
+            blending.clim.save_skill(
+                current_skill=self.__params.rho_nwp_models,
+                validtime=self.__issuetime,
+                outdir_path=self.__config.outdir_path_skill,
+                **self.__params.climatology_kwargs,
+            )
+        if t > 0:
+            # 8.1.3 Determine the skill of the components for lead time (t0 + t)
+            # First for the extrapolation component. Only calculate it when t > 0.
+            (
+                self.__state.rho_extrap_cascade,
+                self.__state.rho_extrap_cascade_prev,
+            ) = blending.skill_scores.lt_dependent_cor_extrapolation(
+                PHI=self.__params.PHI,
+                correlations=self.__state.rho_extrap_cascade,
+                correlations_prev=self.__state.rho_extrap_cascade_prev,
+            )
+
+    def __determine_NWP_skill_for_next_timestep(self, t, j, worker_state):
+        # 8.1.2 Determine the skill of the nwp components for lead time (t0 + t)
+        # Then for the model components
+        if self.__config.blend_nwp_members:
+            rho_nwp_forecast = []
+            for model_index in range(self.__params.rho_nwp_models.shape[0]):
+                rho_value = blending.skill_scores.lt_dependent_cor_nwp(
+                    lt=(t * int(self.__config.timestep)),
+                    correlations=self.__params.rho_nwp_models[model_index],
+                    outdir_path=self.__config.outdir_path_skill,
+                    n_model=model_index,
+                    skill_kwargs=self.__params.climatology_kwargs,
+                )
+                rho_nwp_forecast.append(rho_value)
+            rho_nwp_forecast = np.stack(rho_nwp_forecast)
+            # Concatenate rho_extrap_cascade and rho_nwp
+            worker_state.rho_final_blended_forecast = np.concatenate(
+                (worker_state.rho_extrap_cascade[None, :], rho_nwp_forecast), axis=0
+            )
+        else:
+            # TODO: check if j is the best accessor for this variable
+            rho_nwp_forecast = blending.skill_scores.lt_dependent_cor_nwp(
+                lt=(t * int(self.__config.timestep)),
+                correlations=self.__params.rho_nwp_models[j],
+                outdir_path=self.__config.outdir_path_skill,
+                n_model=worker_state.mapping_list_NWP_member_to_ensemble_member[j],
+                skill_kwargs=self.__params.climatology_kwargs,
+            )
+            # Concatenate rho_extrap_cascade and rho_nwp
+            worker_state.rho_final_blended_forecast = np.concatenate(
+                (worker_state.rho_extrap_cascade[None, :], rho_nwp_forecast[None, :]),
+                axis=0,
+            )
+
+    def __determine_weights_per_component(self, worker_state):
+        # 8.2 Determine the weights per component
+
+        # Weights following the bps method. These are needed for the velocity
+        # weights prior to the advection step. If weights method spn is
+        # selected, weights will be overwritten with those weights prior to
+        # blending step.
+        # weight = [(extr_field, n_model_fields, noise), n_cascade_levels, ...]
+        worker_state.weights = calculate_weights_bps(
+            worker_state.rho_final_blended_forecast
+        )
+
+        # The model only weights
+        if self.__config.weights_method == "bps":
+            # Determine the weights of the components without the extrapolation
+            # cascade, in case this is no data or outside the mask.
+            worker_state.weights_model_only = calculate_weights_bps(
+                worker_state.rho_final_blended_forecast[1:, :]
+            )
+        elif self.__config.weights_method == "spn":
+            # Only the weights of the components without the extrapolation
+            # cascade will be determined here. The full set of weights are
+            # determined after the extrapolation step in this method.
+            if (
+                self.__config.blend_nwp_members
+                and worker_state.precip_models_cascades_timestep.shape[0] > 1
+            ):
+                worker_state.weights_model_only = np.zeros(
+                    (
+                        worker_state.precip_models_cascades_timestep.shape[0] + 1,
+                        self.__config.n_cascade_levels,
+                    )
+                )
+                for i in range(self.__config.n_cascade_levels):
+                    # Determine the normalized covariance matrix (containing)
+                    # the cross-correlations between the models
+                    covariance_nwp_models = np.corrcoef(
+                        np.stack(
+                            [
+                                worker_state.precip_models_cascades_timestep[
+                                    n_model, i, :, :
+                                ].flatten()
+                                for n_model in range(
+                                    worker_state.precip_models_cascades_timestep.shape[
+                                        0
                                     ]
                                 )
-                            )
-                            # Determine the weights for this cascade level
-                            weights_model_only[:, i] = calculate_weights_spn(
-                                correlations=rho_fc[1:, i], cov=cov
-                            )
-                    else:
-                        # Same as correlation and noise is 1 - correlation
-                        weights_model_only = calculate_weights_bps(rho_fc[1:, :])
-                else:
-                    raise ValueError(
-                        "Unknown weights method %s: must be 'bps' or 'spn'"
-                        % weights_method
+                            ]
+                        )
+                    )
+                    # Determine the weights for this cascade level
+                    worker_state.weights_model_only[:, i] = calculate_weights_spn(
+                        correlations=worker_state.rho_final_blended_forecast[1:, i],
+                        covariance=covariance_nwp_models,
                     )
+            else:
+                # Same as correlation and noise is 1 - correlation
+                worker_state.weights_model_only = calculate_weights_bps(
+                    worker_state.rho_final_blended_forecast[1:, :]
+                )
+        else:
+            raise ValueError(
+                "Unknown weights method %s: must be 'bps' or 'spn'"
+                % self.__config.weights_method
+            )
+
+    def __regress_extrapolation_and_noise_cascades(self, j, worker_state):
+        # 8.3 Determine the noise cascade and regress this to the subsequent
+        # time step + regress the extrapolation component to the subsequent
+        # time step
+
+        # 8.3.1 Determine the epsilon, a cascade of temporally independent
+        # but spatially correlated noise
+        if self.__config.noise_method is not None:
+            # generate noise field
+            epsilon = self.__params.noise_generator(
+                self.__params.perturbation_generator,
+                randstate=worker_state.randgen_precip[j],
+                fft_method=self.__params.fft_objs[j],
+                domain=self.__config.domain,
+            )
 
-                # 8.3 Determine the noise cascade and regress this to the subsequent
-                # time step + regress the extrapolation component to the subsequent
-                # time step
-
-                # 8.3.1 Determine the epsilon, a cascade of temporally independent
-                # but spatially correlated noise
-                if noise_method is not None:
-                    # generate noise field
-                    EPS = generate_noise(
-                        pp,
-                        randstate=randgen_prec[j],
-                        fft_method=fft_objs[j],
-                        domain=domain,
+            # decompose the noise field into a cascade
+            epsilon_decomposed = self.__params.decomposition_method(
+                epsilon,
+                self.__params.bandpass_filter,
+                fft_method=self.__params.fft_objs[j],
+                input_domain=self.__config.domain,
+                output_domain=self.__config.domain,
+                compute_stats=True,
+                normalize=True,
+                compact_output=True,
+            )
+        else:
+            epsilon_decomposed = None
+
+        # 8.3.2 regress the extrapolation component to the subsequent time
+        # step
+        # iterate the AR(p) model for each cascade level
+        for i in range(self.__config.n_cascade_levels):
+            # apply AR(p) process to extrapolation cascade level
+            if (
+                epsilon_decomposed is not None
+                or self.__config.velocity_perturbation_method is not None
+            ):
+                worker_state.precip_cascades[j][i] = autoregression.iterate_ar_model(
+                    worker_state.precip_cascades[j][i], self.__params.PHI[i, :]
+                )
+                # Renormalize the cascade
+                worker_state.precip_cascades[j][i][1] /= np.std(
+                    worker_state.precip_cascades[j][i][1]
+                )
+            else:
+                # use the deterministic AR(p) model computed above if
+                # perturbations are disabled
+                worker_state.precip_cascades[j][i] = (
+                    worker_state.final_blended_forecast_non_perturbed[i]
+                )
+
+        # 8.3.3 regress the noise component to the subsequent time step
+        # iterate the AR(p) model for each cascade level
+        for i in range(self.__config.n_cascade_levels):
+            # normalize the noise cascade
+            if epsilon_decomposed is not None:
+                epsilon_temp = epsilon_decomposed["cascade_levels"][i]
+                epsilon_temp *= self.__params.noise_std_coeffs[i]
+            else:
+                epsilon_temp = None
+            # apply AR(p) process to noise cascade level
+            # (Returns zero noise if epsilon_decomposed is None)
+            worker_state.precip_noise_cascades[j][i] = autoregression.iterate_ar_model(
+                worker_state.precip_noise_cascades[j][i],
+                self.__params.PHI[i, :],
+                eps=epsilon_temp,
+            )
+
+        epsilon_decomposed = None
+        epsilon_temp = None
+
+    def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep(
+        self, t, j, worker_state
+    ):
+        # 8.4 Perturb and blend the advection fields + advect the
+        # extrapolation and noise cascade to the current time step
+        # (or subtimesteps if non-integer time steps are given)
+
+        # Settings and initialize the output
+        extrap_kwargs_ = worker_state.extrapolation_kwargs.copy()
+        extrap_kwargs_noise = worker_state.extrapolation_kwargs.copy()
+        extrap_kwargs_pb = worker_state.extrapolation_kwargs.copy()
+        velocity_perturbations_extrapolation = self.__velocity
+        # The following should be accessible after this function
+        worker_state.precip_extrapolated_decomp = []
+        worker_state.noise_extrapolated_decomp = []
+        worker_state.precip_extrapolated_probability_matching = []
+
+        # Extrapolate per sub time step
+        for t_sub in worker_state.subtimesteps:
+            if t_sub > 0:
+                t_diff_prev_subtimestep_int = t_sub - int(t_sub)
+                if t_diff_prev_subtimestep_int > 0.0:
+                    precip_forecast_cascade_subtimestep = [
+                        (1.0 - t_diff_prev_subtimestep_int)
+                        * worker_state.precip_cascades_prev_subtimestep[j][i][-1, :]
+                        + t_diff_prev_subtimestep_int
+                        * worker_state.precip_cascades[j][i][-1, :]
+                        for i in range(self.__config.n_cascade_levels)
+                    ]
+                    noise_cascade_subtimestep = [
+                        (1.0 - t_diff_prev_subtimestep_int)
+                        * worker_state.cascade_noise_prev_subtimestep[j][i][-1, :]
+                        + t_diff_prev_subtimestep_int
+                        * worker_state.precip_noise_cascades[j][i][-1, :]
+                        for i in range(self.__config.n_cascade_levels)
+                    ]
+
+                else:
+                    precip_forecast_cascade_subtimestep = [
+                        worker_state.precip_cascades_prev_subtimestep[j][i][-1, :]
+                        for i in range(self.__config.n_cascade_levels)
+                    ]
+                    noise_cascade_subtimestep = [
+                        worker_state.cascade_noise_prev_subtimestep[j][i][-1, :]
+                        for i in range(self.__config.n_cascade_levels)
+                    ]
+
+                precip_forecast_cascade_subtimestep = np.stack(
+                    precip_forecast_cascade_subtimestep
+                )
+                noise_cascade_subtimestep = np.stack(noise_cascade_subtimestep)
+
+                t_diff_prev_subtimestep = t_sub - worker_state.time_prev_timestep[j]
+                worker_state.leadtime_since_start_forecast[j] += t_diff_prev_subtimestep
+
+                # compute the perturbed motion field - include the NWP
+                # velocities and the weights. Note that we only perturb
+                # the extrapolation velocity field, as the NWP velocity
+                # field is present per time step
+                if self.__config.velocity_perturbation_method is not None:
+                    velocity_perturbations_extrapolation = (
+                        self.__velocity
+                        + self.__params.generate_velocity_noise(
+                            self.__params.velocity_perturbations[j],
+                            worker_state.leadtime_since_start_forecast[j]
+                            * self.__config.timestep,
+                        )
                     )
 
-                    # decompose the noise field into a cascade
-                    EPS = decompositor(
-                        EPS,
-                        bp_filter,
-                        fft_method=fft_objs[j],
-                        input_domain=domain,
-                        output_domain=domain,
-                        compute_stats=True,
-                        normalize=True,
-                        compact_output=True,
+                # Stack the perturbed extrapolation and the NWP velocities
+                if self.__config.blend_nwp_members:
+                    velocity_stack_all = np.concatenate(
+                        (
+                            velocity_perturbations_extrapolation[None, :, :, :],
+                            worker_state.velocity_models_timestep,
+                        ),
+                        axis=0,
                     )
                 else:
-                    EPS = None
-
-                # 8.3.2 regress the extrapolation component to the subsequent time
-                # step
-                # iterate the AR(p) model for each cascade level
-                for i in range(n_cascade_levels):
-                    # apply AR(p) process to extrapolation cascade level
-                    if EPS is not None or vel_pert_method is not None:
-                        precip_cascade[j][i] = autoregression.iterate_ar_model(
-                            precip_cascade[j][i], PHI[i, :]
-                        )
-                        # Renormalize the cascade
-                        precip_cascade[j][i][1] /= np.std(precip_cascade[j][i][1])
-                    else:
-                        # use the deterministic AR(p) model computed above if
-                        # perturbations are disabled
-                        precip_cascade[j][i] = R_m[i]
-
-                # 8.3.3 regress the noise component to the subsequent time step
-                # iterate the AR(p) model for each cascade level
-                for i in range(n_cascade_levels):
-                    # normalize the noise cascade
-                    if EPS is not None:
-                        EPS_ = EPS["cascade_levels"][i]
-                        EPS_ *= noise_std_coeffs[i]
-                    else:
-                        EPS_ = None
-                    # apply AR(p) process to noise cascade level
-                    # (Returns zero noise if EPS is None)
-                    noise_cascade[j][i] = autoregression.iterate_ar_model(
-                        noise_cascade[j][i], PHI[i, :], eps=EPS_
+                    velocity_models = worker_state.velocity_models_timestep[j]
+                    velocity_stack_all = np.concatenate(
+                        (
+                            velocity_perturbations_extrapolation[None, :, :, :],
+                            velocity_models[None, :, :, :],
+                        ),
+                        axis=0,
                     )
+                    velocity_models = None
+
+                # Obtain a blended optical flow, using the weights of the
+                # second cascade following eq. 24 in BPS2006
+                velocity_blended = blending.utils.blend_optical_flows(
+                    flows=velocity_stack_all,
+                    weights=worker_state.weights[
+                        :-1, 1
+                    ],  # [(extr_field, n_model_fields), cascade_level=2]
+                )
 
-                EPS = None
-                EPS_ = None
+                # Extrapolate both cascades to the next time step
+                # First recompose the cascade, advect it and decompose it again
+                # This is needed to remove the interpolation artifacts.
+                # In addition, the number of extrapolations is greatly reduced
+                # A. Radar Rain
+                precip_forecast_recomp_subtimestep = blending.utils.recompose_cascade(
+                    combined_cascade=precip_forecast_cascade_subtimestep,
+                    combined_mean=worker_state.mean_extrapolation,
+                    combined_sigma=worker_state.std_extrapolation,
+                )
+                # Make sure we have values outside the mask
+                if self.__params.zero_precip_radar:
+                    precip_forecast_recomp_subtimestep = np.nan_to_num(
+                        precip_forecast_recomp_subtimestep,
+                        copy=True,
+                        nan=self.__params.precip_zerovalue,
+                        posinf=self.__params.precip_zerovalue,
+                        neginf=self.__params.precip_zerovalue,
+                    )
+                # Put back the mask
+                precip_forecast_recomp_subtimestep[self.__params.domain_mask] = np.nan
+                worker_state.extrapolation_kwargs["displacement_prev"] = (
+                    worker_state.previous_displacement[j]
+                )
+                (
+                    precip_forecast_extrapolated_recomp_subtimestep_temp,
+                    worker_state.previous_displacement[j],
+                ) = self.__params.extrapolation_method(
+                    precip_forecast_recomp_subtimestep,
+                    velocity_blended,
+                    [t_diff_prev_subtimestep],
+                    allow_nonfinite_values=True,
+                    **worker_state.extrapolation_kwargs,
+                )
+                precip_extrapolated_recomp_subtimestep = (
+                    precip_forecast_extrapolated_recomp_subtimestep_temp[0].copy()
+                )
+                temp_mask = ~np.isfinite(precip_extrapolated_recomp_subtimestep)
+                # TODO: WHERE DO CAN I FIND THIS -15.0
+                precip_extrapolated_recomp_subtimestep[
+                    ~np.isfinite(precip_extrapolated_recomp_subtimestep)
+                ] = self.__params.precip_zerovalue
+                precip_extrapolated_decomp = self.__params.decomposition_method(
+                    precip_extrapolated_recomp_subtimestep,
+                    self.__params.bandpass_filter,
+                    mask=self.__params.mask_threshold,
+                    fft_method=self.__params.fft,
+                    output_domain=self.__config.domain,
+                    normalize=True,
+                    compute_stats=True,
+                    compact_output=True,
+                )["cascade_levels"]
+                # Make sure we have values outside the mask
+                if self.__params.zero_precip_radar:
+                    precip_extrapolated_decomp = np.nan_to_num(
+                        precip_extrapolated_decomp,
+                        copy=True,
+                        nan=np.nanmin(precip_forecast_cascade_subtimestep),
+                        posinf=np.nanmin(precip_forecast_cascade_subtimestep),
+                        neginf=np.nanmin(precip_forecast_cascade_subtimestep),
+                    )
+                for i in range(self.__config.n_cascade_levels):
+                    precip_extrapolated_decomp[i][temp_mask] = np.nan
+                # B. Noise
+                noise_cascade_subtimestep_recomp = blending.utils.recompose_cascade(
+                    combined_cascade=noise_cascade_subtimestep,
+                    combined_mean=worker_state.precip_mean_noise[j],
+                    combined_sigma=worker_state.precip_std_noise[j],
+                )
+                extrap_kwargs_noise["displacement_prev"] = (
+                    worker_state.previous_displacement_noise_cascade[j]
+                )
+                extrap_kwargs_noise["map_coordinates_mode"] = "wrap"
+                (
+                    noise_extrapolated_recomp_temp,
+                    worker_state.previous_displacement_noise_cascade[j],
+                ) = self.__params.extrapolation_method(
+                    noise_cascade_subtimestep_recomp,
+                    velocity_blended,
+                    [t_diff_prev_subtimestep],
+                    allow_nonfinite_values=True,
+                    **extrap_kwargs_noise,
+                )
+                noise_extrapolated_recomp = noise_extrapolated_recomp_temp[0].copy()
+                noise_extrapolated_decomp = self.__params.decomposition_method(
+                    noise_extrapolated_recomp,
+                    self.__params.bandpass_filter,
+                    mask=self.__params.mask_threshold,
+                    fft_method=self.__params.fft,
+                    output_domain=self.__config.domain,
+                    normalize=True,
+                    compute_stats=True,
+                    compact_output=True,
+                )["cascade_levels"]
+                for i in range(self.__config.n_cascade_levels):
+                    noise_extrapolated_decomp[i] *= self.__params.noise_std_coeffs[i]
+
+                # Append the results to the output lists
+                worker_state.precip_extrapolated_decomp.append(
+                    precip_extrapolated_decomp.copy()
+                )
+                worker_state.noise_extrapolated_decomp.append(
+                    noise_extrapolated_decomp.copy()
+                )
+                precip_forecast_cascade_subtimestep = None
+                precip_forecast_recomp_subtimestep = None
+                precip_forecast_extrapolated_recomp_subtimestep_temp = None
+                precip_extrapolated_recomp_subtimestep = None
+                precip_extrapolated_decomp = None
+                noise_cascade_subtimestep = None
+                noise_cascade_subtimestep_recomp = None
+                noise_extrapolated_recomp_temp = None
+                noise_extrapolated_recomp = None
+                noise_extrapolated_decomp = None
+
+                # Finally, also extrapolate the initial radar rainfall
+                # field. This will be blended with the rainfall field(s)
+                # of the (NWP) model(s) for Lagrangian blended prob. matching
+                # min_R = np.min(precip)
+                extrap_kwargs_pb["displacement_prev"] = (
+                    worker_state.previous_displacement_prob_matching[j]
+                )
+                # Apply the domain mask to the extrapolation component
+                precip_forecast_temp_for_probability_matching = self.__precip.copy()
+                precip_forecast_temp_for_probability_matching[
+                    self.__params.domain_mask
+                ] = np.nan
 
-                # 8.4 Perturb and blend the advection fields + advect the
-                # extrapolation and noise cascade to the current time step
-                # (or subtimesteps if non-integer time steps are given)
+                (
+                    precip_forecast_extrapolated_probability_matching_temp,
+                    worker_state.previous_displacement_prob_matching[j],
+                ) = self.__params.extrapolation_method(
+                    precip_forecast_temp_for_probability_matching,
+                    velocity_blended,
+                    [t_diff_prev_subtimestep],
+                    allow_nonfinite_values=True,
+                    **extrap_kwargs_pb,
+                )
 
-                # Settings and initialize the output
-                extrap_kwargs_ = extrap_kwargs.copy()
-                extrap_kwargs_noise = extrap_kwargs.copy()
-                extrap_kwargs_pb = extrap_kwargs.copy()
-                velocity_pert = velocity
-                R_f_ep_out = []
-                Yn_ep_out = []
-                R_pm_ep = []
+                worker_state.precip_extrapolated_probability_matching.append(
+                    precip_forecast_extrapolated_probability_matching_temp[0]
+                )
 
-                # Extrapolate per sub time step
-                for t_sub in subtimesteps:
-                    if t_sub > 0:
-                        t_diff_prev_int = t_sub - int(t_sub)
-                        if t_diff_prev_int > 0.0:
-                            R_f_ip = [
-                                (1.0 - t_diff_prev_int) * forecast_prev[j][i][-1, :]
-                                + t_diff_prev_int * precip_cascade[j][i][-1, :]
-                                for i in range(n_cascade_levels)
-                            ]
-                            Yn_ip = [
-                                (1.0 - t_diff_prev_int) * noise_prev[j][i][-1, :]
-                                + t_diff_prev_int * noise_cascade[j][i][-1, :]
-                                for i in range(n_cascade_levels)
-                            ]
+                worker_state.time_prev_timestep[j] = t_sub
 
-                        else:
-                            R_f_ip = [
-                                forecast_prev[j][i][-1, :]
-                                for i in range(n_cascade_levels)
-                            ]
-                            Yn_ip = [
-                                noise_prev[j][i][-1, :] for i in range(n_cascade_levels)
-                            ]
+        if len(worker_state.precip_extrapolated_decomp) > 0:
+            worker_state.precip_extrapolated_decomp = np.stack(
+                worker_state.precip_extrapolated_decomp
+            )
+            worker_state.noise_extrapolated_decomp = np.stack(
+                worker_state.noise_extrapolated_decomp
+            )
+            worker_state.precip_extrapolated_probability_matching = np.stack(
+                worker_state.precip_extrapolated_probability_matching
+            )
 
-                        R_f_ip = np.stack(R_f_ip)
-                        Yn_ip = np.stack(Yn_ip)
+        # advect the forecast field by one time step if no subtimesteps in the
+        # current interval were found
+        if not worker_state.subtimesteps:
+            t_diff_prev_subtimestep = t + 1 - worker_state.time_prev_timestep[j]
+            worker_state.leadtime_since_start_forecast[j] += t_diff_prev_subtimestep
+
+            # compute the perturbed motion field - include the NWP
+            # velocities and the weights
+            if self.__config.velocity_perturbation_method is not None:
+                velocity_perturbations_extrapolation = (
+                    self.__velocity
+                    + self.__params.generate_velocity_noise(
+                        self.__params.velocity_perturbations[j],
+                        worker_state.leadtime_since_start_forecast[j]
+                        * self.__config.timestep,
+                    )
+                )
 
-                        t_diff_prev = t_sub - t_prev[j]
-                        t_total[j] += t_diff_prev
+            # Stack the perturbed extrapolation and the NWP velocities
+            if self.__config.blend_nwp_members:
+                velocity_stack_all = np.concatenate(
+                    (
+                        velocity_perturbations_extrapolation[None, :, :, :],
+                        worker_state.velocity_models_timestep,
+                    ),
+                    axis=0,
+                )
+            else:
+                velocity_models = worker_state.velocity_models_timestep[j]
+                velocity_stack_all = np.concatenate(
+                    (
+                        velocity_perturbations_extrapolation[None, :, :, :],
+                        velocity_models[None, :, :, :],
+                    ),
+                    axis=0,
+                )
+                velocity_models = None
+
+            # Obtain a blended optical flow, using the weights of the
+            # second cascade following eq. 24 in BPS2006
+            velocity_blended = blending.utils.blend_optical_flows(
+                flows=velocity_stack_all,
+                weights=worker_state.weights[
+                    :-1, 1
+                ],  # [(extr_field, n_model_fields), cascade_level=2]
+            )
 
-                        # compute the perturbed motion field - include the NWP
-                        # velocities and the weights. Note that we only perturb
-                        # the extrapolation velocity field, as the NWP velocity
-                        # field is present per time step
-                        if vel_pert_method is not None:
-                            velocity_pert = velocity + generate_vel_noise(
-                                vps[j], t_total[j] * timestep
-                            )
+            # Extrapolate the extrapolation and noise cascade
 
-                        # Stack the perturbed extrapolation and the NWP velocities
-                        if blend_nwp_members:
-                            V_stack = np.concatenate(
-                                (
-                                    velocity_pert[None, :, :, :],
-                                    velocity_models_temp,
-                                ),
-                                axis=0,
-                            )
-                        else:
-                            V_model_ = velocity_models_temp[j]
-                            V_stack = np.concatenate(
-                                (velocity_pert[None, :, :, :], V_model_[None, :, :, :]),
-                                axis=0,
-                            )
-                            V_model_ = None
-
-                        # Obtain a blended optical flow, using the weights of the
-                        # second cascade following eq. 24 in BPS2006
-                        velocity_blended = blending.utils.blend_optical_flows(
-                            flows=V_stack,
-                            weights=weights[
-                                :-1, 1
-                            ],  # [(extr_field, n_model_fields), cascade_level=2]
-                        )
+            extrap_kwargs_["displacement_prev"] = worker_state.previous_displacement[j]
+            extrap_kwargs_noise["displacement_prev"] = (
+                worker_state.previous_displacement_noise_cascade[j]
+            )
+            extrap_kwargs_noise["map_coordinates_mode"] = "wrap"
 
-                        # Extrapolate both cascades to the next time step
-                        # First recompose the cascade, advect it and decompose it again
-                        # This is needed to remove the interpolation artifacts.
-                        # In addition, the number of extrapolations is greatly reduced
-                        # A. Radar Rain
-                        R_f_ip_recomp = blending.utils.recompose_cascade(
-                            combined_cascade=R_f_ip,
-                            combined_mean=mu_extrapolation,
-                            combined_sigma=sigma_extrapolation,
-                        )
-                        # Make sure we have values outside the mask
-                        if zero_precip_radar:
-                            R_f_ip_recomp = np.nan_to_num(
-                                R_f_ip_recomp,
-                                copy=True,
-                                nan=zerovalue,
-                                posinf=zerovalue,
-                                neginf=zerovalue,
-                            )
-                        # Put back the mask
-                        R_f_ip_recomp[domain_mask] = np.nan
-                        extrap_kwargs["displacement_prev"] = D[j]
-                        R_f_ep_recomp_, D[j] = extrapolator(
-                            R_f_ip_recomp,
-                            velocity_blended,
-                            [t_diff_prev],
-                            allow_nonfinite_values=True,
-                            **extrap_kwargs,
-                        )
-                        R_f_ep_recomp = R_f_ep_recomp_[0].copy()
-                        temp_mask = ~np.isfinite(R_f_ep_recomp)
-                        # TODO WHERE DO CAN I FIND THIS -15.0
-                        R_f_ep_recomp[~np.isfinite(R_f_ep_recomp)] = zerovalue
-                        R_f_ep = decompositor(
-                            R_f_ep_recomp,
-                            bp_filter,
-                            mask=MASK_thr,
-                            fft_method=fft,
-                            output_domain=domain,
-                            normalize=True,
-                            compute_stats=True,
-                            compact_output=True,
-                        )["cascade_levels"]
-                        # Make sure we have values outside the mask
-                        if zero_precip_radar:
-                            R_f_ep = np.nan_to_num(
-                                R_f_ep,
-                                copy=True,
-                                nan=np.nanmin(R_f_ip),
-                                posinf=np.nanmin(R_f_ip),
-                                neginf=np.nanmin(R_f_ip),
-                            )
-                        for i in range(n_cascade_levels):
-                            R_f_ep[i][temp_mask] = np.nan
-                        # B. Noise
-                        Yn_ip_recomp = blending.utils.recompose_cascade(
-                            combined_cascade=Yn_ip,
-                            combined_mean=mu_noise[j],
-                            combined_sigma=sigma_noise[j],
-                        )
-                        extrap_kwargs_noise["displacement_prev"] = D_Yn[j]
-                        extrap_kwargs_noise["map_coordinates_mode"] = "wrap"
-                        Yn_ep_recomp_, D_Yn[j] = extrapolator(
-                            Yn_ip_recomp,
-                            velocity_blended,
-                            [t_diff_prev],
-                            allow_nonfinite_values=True,
-                            **extrap_kwargs_noise,
-                        )
-                        Yn_ep_recomp = Yn_ep_recomp_[0].copy()
-                        Yn_ep = decompositor(
-                            Yn_ep_recomp,
-                            bp_filter,
-                            mask=MASK_thr,
-                            fft_method=fft,
-                            output_domain=domain,
-                            normalize=True,
-                            compute_stats=True,
-                            compact_output=True,
-                        )["cascade_levels"]
-                        for i in range(n_cascade_levels):
-                            Yn_ep[i] *= noise_std_coeffs[i]
-
-                        # Append the results to the output lists
-                        R_f_ep_out.append(R_f_ep.copy())
-                        Yn_ep_out.append(Yn_ep.copy())
-                        R_f_ip = None
-                        R_f_ip_recomp = None
-                        R_f_ep_recomp_ = None
-                        R_f_ep_recomp = None
-                        R_f_ep = None
-                        Yn_ip = None
-                        Yn_ip_recomp = None
-                        Yn_ep_recomp_ = None
-                        Yn_ep_recomp = None
-                        Yn_ep = None
-
-                        # Finally, also extrapolate the initial radar rainfall
-                        # field. This will be blended with the rainfall field(s)
-                        # of the (NWP) model(s) for Lagrangian blended prob. matching
-                        # min_R = np.min(precip)
-                        extrap_kwargs_pb["displacement_prev"] = D_pb[j]
-                        # Apply the domain mask to the extrapolation component
-                        R_ = precip.copy()
-                        R_[domain_mask] = np.nan
-                        R_pm_ep_, D_pb[j] = extrapolator(
-                            R_,
-                            velocity_blended,
-                            [t_diff_prev],
-                            allow_nonfinite_values=True,
-                            **extrap_kwargs_pb,
-                        )
-                        R_pm_ep.append(R_pm_ep_[0])
-
-                        t_prev[j] = t_sub
-
-                if len(R_f_ep_out) > 0:
-                    R_f_ep_out = np.stack(R_f_ep_out)
-                    Yn_ep_out = np.stack(Yn_ep_out)
-                    R_pm_ep = np.stack(R_pm_ep)
-
-                # advect the forecast field by one time step if no subtimesteps in the
-                # current interval were found
-                if not subtimesteps:
-                    t_diff_prev = t + 1 - t_prev[j]
-                    t_total[j] += t_diff_prev
-
-                    # compute the perturbed motion field - include the NWP
-                    # velocities and the weights
-                    if vel_pert_method is not None:
-                        velocity_pert = velocity + generate_vel_noise(
-                            vps[j], t_total[j] * timestep
-                        )
+            (
+                _,
+                worker_state.previous_displacement[j],
+            ) = self.__params.extrapolation_method(
+                None,
+                velocity_blended,
+                [t_diff_prev_subtimestep],
+                allow_nonfinite_values=True,
+                **extrap_kwargs_,
+            )
 
-                    # Stack the perturbed extrapolation and the NWP velocities
-                    if blend_nwp_members:
-                        V_stack = np.concatenate(
-                            (velocity_pert[None, :, :, :], velocity_models_temp),
-                            axis=0,
-                        )
-                    else:
-                        V_model_ = velocity_models_temp[j]
-                        V_stack = np.concatenate(
-                            (velocity_pert[None, :, :, :], V_model_[None, :, :, :]),
-                            axis=0,
-                        )
-                        V_model_ = None
-
-                    # Obtain a blended optical flow, using the weights of the
-                    # second cascade following eq. 24 in BPS2006
-                    velocity_blended = blending.utils.blend_optical_flows(
-                        flows=V_stack,
-                        weights=weights[
-                            :-1, 1
-                        ],  # [(extr_field, n_model_fields), cascade_level=2]
-                    )
+            (
+                _,
+                worker_state.previous_displacement_noise_cascade[j],
+            ) = self.__params.extrapolation_method(
+                None,
+                velocity_blended,
+                [t_diff_prev_subtimestep],
+                allow_nonfinite_values=True,
+                **extrap_kwargs_noise,
+            )
+
+            # Also extrapolate the radar observation, used for the probability
+            # matching and post-processing steps
+            extrap_kwargs_pb["displacement_prev"] = (
+                worker_state.previous_displacement_prob_matching[j]
+            )
+            (
+                _,
+                worker_state.previous_displacement_prob_matching[j],
+            ) = self.__params.extrapolation_method(
+                None,
+                velocity_blended,
+                [t_diff_prev_subtimestep],
+                allow_nonfinite_values=True,
+                **extrap_kwargs_pb,
+            )
 
-                    # Extrapolate the extrapolation and noise cascade
+            worker_state.time_prev_timestep[j] = t + 1
 
-                    extrap_kwargs_["displacement_prev"] = D[j]
-                    extrap_kwargs_noise["displacement_prev"] = D_Yn[j]
-                    extrap_kwargs_noise["map_coordinates_mode"] = "wrap"
+        worker_state.precip_cascades_prev_subtimestep[j] = worker_state.precip_cascades[
+            j
+        ]
+        worker_state.cascade_noise_prev_subtimestep[j] = (
+            worker_state.precip_noise_cascades[j]
+        )
 
-                    _, D[j] = extrapolator(
-                        None,
-                        velocity_blended,
-                        [t_diff_prev],
-                        allow_nonfinite_values=True,
-                        **extrap_kwargs_,
-                    )
+    def __blend_cascades(self, t_sub, j, worker_state):
+        worker_state.subtimestep_index = np.where(
+            np.array(worker_state.subtimesteps) == t_sub
+        )[0][0]
+        # First concatenate the cascades and the means and sigmas
+        # precip_models = [n_models,timesteps,n_cascade_levels,m,n]
+        if self.__config.blend_nwp_members:
+            cascade_stack_all_components = np.concatenate(
+                (
+                    worker_state.precip_extrapolated_decomp[
+                        None, worker_state.subtimestep_index
+                    ],
+                    worker_state.precip_models_cascades_timestep,
+                    worker_state.noise_extrapolated_decomp[
+                        None, worker_state.subtimestep_index
+                    ],
+                ),
+                axis=0,
+            )  # [(extr_field, n_model_fields, noise), n_cascade_levels, ...]
+            means_stacked = np.concatenate(
+                (
+                    worker_state.mean_extrapolation[None, :],
+                    worker_state.mean_models_timestep,
+                ),
+                axis=0,
+            )
+            sigmas_stacked = np.concatenate(
+                (
+                    worker_state.std_extrapolation[None, :],
+                    worker_state.std_models_timestep,
+                ),
+                axis=0,
+            )
+        else:
+            cascade_stack_all_components = np.concatenate(
+                (
+                    worker_state.precip_extrapolated_decomp[
+                        None, worker_state.subtimestep_index
+                    ],
+                    worker_state.precip_models_cascades_timestep[None, j],
+                    worker_state.noise_extrapolated_decomp[
+                        None, worker_state.subtimestep_index
+                    ],
+                ),
+                axis=0,
+            )  # [(extr_field, n_model_fields, noise), n_cascade_levels, ...]
+            means_stacked = np.concatenate(
+                (
+                    worker_state.mean_extrapolation[None, :],
+                    worker_state.mean_models_timestep[None, j],
+                ),
+                axis=0,
+            )
+            sigmas_stacked = np.concatenate(
+                (
+                    worker_state.std_extrapolation[None, :],
+                    worker_state.std_models_timestep[None, j],
+                ),
+                axis=0,
+            )
+
+        # First determine the blending weights if method is spn. The
+        # weights for method bps have already been determined.
+
+        if self.__config.weights_method == "spn":
+            worker_state.weights = np.zeros(
+                (
+                    cascade_stack_all_components.shape[0],
+                    self.__config.n_cascade_levels,
+                )
+            )
+            for i in range(self.__config.n_cascade_levels):
+                # Determine the normalized covariance matrix (containing)
+                # the cross-correlations between the models
+                cascade_stack_all_components_temp = np.stack(
+                    [
+                        cascade_stack_all_components[n_model, i, :, :].flatten()
+                        for n_model in range(cascade_stack_all_components.shape[0] - 1)
+                    ]
+                )  # -1 to exclude the noise component
+                covariance_nwp_models = np.ma.corrcoef(
+                    np.ma.masked_invalid(cascade_stack_all_components_temp)
+                )
+                # Determine the weights for this cascade level
+                worker_state.weights[:, i] = calculate_weights_spn(
+                    correlations=worker_state.rho_final_blended_forecast[:, i],
+                    covariance=covariance_nwp_models,
+                )
+
+        # Blend the extrapolation, (NWP) model(s) and noise cascades
+        worker_state.final_blended_forecast_cascades = blending.utils.blend_cascades(
+            cascades_norm=cascade_stack_all_components, weights=worker_state.weights
+        )
+
+        # Also blend the cascade without the extrapolation component
+        worker_state.final_blended_forecast_cascades_mod_only = (
+            blending.utils.blend_cascades(
+                cascades_norm=cascade_stack_all_components[1:, :],
+                weights=worker_state.weights_model_only,
+            )
+        )
+
+        # Blend the means and standard deviations
+        # Input is array of shape [number_components, scale_level, ...]
+        (
+            worker_state.final_blended_forecast_means,
+            worker_state.final_blended_forecast_stds,
+        ) = blend_means_sigmas(
+            means=means_stacked, sigmas=sigmas_stacked, weights=worker_state.weights
+        )
+        # Also blend the means and sigmas for the cascade without extrapolation
+
+        (
+            worker_state.final_blended_forecast_means_mod_only,
+            worker_state.final_blended_forecast_stds_mod_only,
+        ) = blend_means_sigmas(
+            means=means_stacked[1:, :],
+            sigmas=sigmas_stacked[1:, :],
+            weights=worker_state.weights_model_only,
+        )
+
+    def __recompose_cascade_to_rainfall_field(self, j, worker_state):
+        # 8.6 Recompose the cascade to a precipitation field
+        # (The function first normalizes the blended cascade, precip_forecast_blended
+        # again)
+        worker_state.final_blended_forecast_recomposed = (
+            blending.utils.recompose_cascade(
+                combined_cascade=worker_state.final_blended_forecast_cascades,
+                combined_mean=worker_state.final_blended_forecast_means,
+                combined_sigma=worker_state.final_blended_forecast_stds,
+            )
+        )
+        # The recomposed cascade without the extrapolation (for NaN filling
+        # outside the radar domain)
+        worker_state.final_blended_forecast_recomposed_mod_only = (
+            blending.utils.recompose_cascade(
+                combined_cascade=worker_state.final_blended_forecast_cascades_mod_only,
+                combined_mean=worker_state.final_blended_forecast_means_mod_only,
+                combined_sigma=worker_state.final_blended_forecast_stds_mod_only,
+            )
+        )
+        if self.__config.domain == "spectral":
+            # TODO: Check this! (Only tested with domain == 'spatial')
+            worker_state.final_blended_forecast_recomposed = self.__params.fft_objs[
+                j
+            ].irfft2(worker_state.final_blended_forecast_recomposed)
+            worker_state.final_blended_forecast_recomposed_mod_only = (
+                self.__params.fft_objs[j].irfft2(
+                    worker_state.final_blended_forecast_recomposed_mod_only
+                )
+            )
+
+    def __post_process_output(
+        self, j, final_blended_forecast_single_member, worker_state
+    ):
+        # 8.7 Post-processing steps - use the mask and fill no data with
+        # the blended NWP forecast. Probability matching following
+        # Lagrangian blended probability matching which uses the
+        # latest extrapolated radar rainfall field blended with the
+        # nwp model(s) rainfall forecast fields as 'benchmark'.
+
+        # 8.7.1 first blend the extrapolated rainfall field (the field
+        # that is only used for post-processing steps) with the NWP
+        # rainfall forecast for this time step using the weights
+        # at scale level 2.
+        weights_probability_matching = worker_state.weights[
+            :-1, 1
+        ]  # Weights without noise, level 2
+        weights_probability_matching_normalized = weights_probability_matching / np.sum(
+            weights_probability_matching
+        )
+        # And the weights for outside the radar domain
+        weights_probability_matching_mod_only = worker_state.weights_model_only[
+            :-1, 1
+        ]  # Weights without noise, level 2
+        weights_probability_matching_normalized_mod_only = (
+            weights_probability_matching_mod_only
+            / np.sum(weights_probability_matching_mod_only)
+        )
+        # Stack the fields
+        if self.__config.blend_nwp_members:
+            precip_forecast_probability_matching_final = np.concatenate(
+                (
+                    worker_state.precip_extrapolated_probability_matching[
+                        None, worker_state.subtimestep_index
+                    ],
+                    worker_state.precip_models_timestep,
+                ),
+                axis=0,
+            )
+        else:
+            precip_forecast_probability_matching_final = np.concatenate(
+                (
+                    worker_state.precip_extrapolated_probability_matching[
+                        None, worker_state.subtimestep_index
+                    ],
+                    worker_state.precip_models_timestep[None, j],
+                ),
+                axis=0,
+            )
+        # Blend it
+        precip_forecast_probability_matching_blended = np.sum(
+            weights_probability_matching_normalized.reshape(
+                weights_probability_matching_normalized.shape[0], 1, 1
+            )
+            * precip_forecast_probability_matching_final,
+            axis=0,
+        )
+        if self.__config.blend_nwp_members:
+            precip_forecast_probability_matching_blended_mod_only = np.sum(
+                weights_probability_matching_normalized_mod_only.reshape(
+                    weights_probability_matching_normalized_mod_only.shape[0],
+                    1,
+                    1,
+                )
+                * worker_state.precip_models_timestep,
+                axis=0,
+            )
+        else:
+            precip_forecast_probability_matching_blended_mod_only = (
+                worker_state.precip_models_timestep[j]
+            )
+
+        # The extrapolation components are NaN outside the advected
+        # radar domain. This results in NaN values in the blended
+        # forecast outside the radar domain. Therefore, fill these
+        # areas with the "..._mod_only" blended forecasts, consisting
+        # of the NWP and noise components.
+
+        nan_indices = np.isnan(worker_state.final_blended_forecast_recomposed)
+        if self.__config.smooth_radar_mask_range != 0:
+            # Compute the smooth dilated mask
+            new_mask = blending.utils.compute_smooth_dilated_mask(
+                nan_indices,
+                max_padding_size_in_px=self.__config.smooth_radar_mask_range,
+            )
+
+            # Ensure mask values are between 0 and 1
+            mask_model = np.clip(new_mask, 0, 1)
+            mask_radar = np.clip(1 - new_mask, 0, 1)
+
+            # Handle NaNs in precip_forecast_new and precip_forecast_new_mod_only by setting NaNs to 0 in the blending step
+            precip_forecast_recomposed_mod_only_no_nan = np.nan_to_num(
+                worker_state.final_blended_forecast_recomposed_mod_only, nan=0
+            )
+            precip_forecast_recomposed_no_nan = np.nan_to_num(
+                worker_state.final_blended_forecast_recomposed, nan=0
+            )
+
+            # Perform the blending of radar and model inside the radar domain using a weighted combination
+            worker_state.final_blended_forecast_recomposed = np.nansum(
+                [
+                    mask_model * precip_forecast_recomposed_mod_only_no_nan,
+                    mask_radar * precip_forecast_recomposed_no_nan,
+                ],
+                axis=0,
+            )
+
+            precip_forecast_probability_matching_blended = np.nansum(
+                [
+                    precip_forecast_probability_matching_blended * mask_radar,
+                    precip_forecast_probability_matching_blended_mod_only * mask_model,
+                ],
+                axis=0,
+            )
+        else:
+            worker_state.final_blended_forecast_recomposed[nan_indices] = (
+                worker_state.final_blended_forecast_recomposed_mod_only[nan_indices]
+            )
+            nan_indices = np.isnan(precip_forecast_probability_matching_blended)
+            precip_forecast_probability_matching_blended[nan_indices] = (
+                precip_forecast_probability_matching_blended_mod_only[nan_indices]
+            )
 
-                    _, D_Yn[j] = extrapolator(
-                        None,
-                        velocity_blended,
-                        [t_diff_prev],
-                        allow_nonfinite_values=True,
-                        **extrap_kwargs_noise,
+        # Finally, fill the remaining nan values, if present, with
+        # the minimum value in the forecast
+        nan_indices = np.isnan(worker_state.final_blended_forecast_recomposed)
+        worker_state.final_blended_forecast_recomposed[nan_indices] = np.nanmin(
+            worker_state.final_blended_forecast_recomposed
+        )
+        nan_indices = np.isnan(precip_forecast_probability_matching_blended)
+        precip_forecast_probability_matching_blended[nan_indices] = np.nanmin(
+            precip_forecast_probability_matching_blended
+        )
+
+        # 8.7.2. Apply the masking and prob. matching
+        precip_field_mask_temp = None
+        if self.__config.mask_method is not None:
+            # apply the precipitation mask to prevent generation of new
+            # precipitation into areas where it was not originally
+            # observed
+            precip_forecast_min_value = (
+                worker_state.final_blended_forecast_recomposed.min()
+            )
+            if self.__config.mask_method == "incremental":
+                # The incremental mask is slightly different from
+                # the implementation in the non-blended steps.py, as
+                # it is not based on the last forecast, but instead
+                # on R_pm_blended. Therefore, the buffer does not
+                # increase over time.
+                # Get the mask for this forecast
+                precip_field_mask = (
+                    precip_forecast_probability_matching_blended
+                    >= self.__config.precip_threshold
+                )
+                # Buffer the mask
+                # Convert the precipitation field mask into an 8-bit unsigned integer mask
+                obs_mask_uint8 = precip_field_mask.astype("uint8")
+
+                # Perform an initial binary dilation using the provided structuring element
+                dilated_mask = binary_dilation(obs_mask_uint8, self.__params.struct)
+
+                # Create a binary structure element for incremental dilations
+                struct_element = generate_binary_structure(2, 1)
+
+                # Initialize a floating-point mask to accumulate dilations for a smooth transition
+                accumulated_mask = dilated_mask.astype(float)
+
+                # Iteratively dilate the mask and accumulate the results to create a grayscale rim
+                for _ in range(self.__params.mask_rim):
+                    dilated_mask = binary_dilation(dilated_mask, struct_element)
+                    accumulated_mask += dilated_mask
+
+                # Normalize the accumulated mask values between 0 and 1
+                precip_field_mask = accumulated_mask / np.max(accumulated_mask)
+                # Get the final mask
+                worker_state.final_blended_forecast_recomposed = (
+                    precip_forecast_min_value
+                    + (
+                        worker_state.final_blended_forecast_recomposed
+                        - precip_forecast_min_value
                     )
+                    * precip_field_mask
+                )
+                precip_field_mask_temp = (
+                    worker_state.final_blended_forecast_recomposed
+                    > precip_forecast_min_value
+                )
+            elif self.__config.mask_method == "obs":
+                # The mask equals the most recent benchmark
+                # rainfall field
+                precip_field_mask_temp = (
+                    precip_forecast_probability_matching_blended
+                    >= self.__config.precip_threshold
+                )
+
+            # Set to min value outside of mask
+            worker_state.final_blended_forecast_recomposed[~precip_field_mask_temp] = (
+                precip_forecast_min_value
+            )
 
-                    # Also extrapolate the radar observation, used for the probability
-                    # matching and post-processing steps
-                    extrap_kwargs_pb["displacement_prev"] = D_pb[j]
-                    _, D_pb[j] = extrapolator(
-                        None,
-                        velocity_blended,
-                        [t_diff_prev],
-                        allow_nonfinite_values=True,
-                        **extrap_kwargs_pb,
+        # If probmatching_method is not None, resample the distribution from
+        # both the extrapolation cascade and the model (NWP) cascade and use
+        # that for the probability matching.
+        if (
+            self.__config.probmatching_method is not None
+            and self.__config.resample_distribution
+        ):
+            arr1 = worker_state.precip_extrapolated_probability_matching[
+                worker_state.subtimestep_index
+            ]
+            arr2 = worker_state.precip_models_timestep[j]
+            # resample weights based on cascade level 2.
+            # Areas where one of the fields is nan are not included.
+            precip_forecast_probability_matching_resampled = (
+                probmatching.resample_distributions(
+                    first_array=arr1,
+                    second_array=arr2,
+                    probability_first_array=weights_probability_matching_normalized[0],
+                    randgen=worker_state.randgen_probmatching[j],
+                )
+            )
+        else:
+            precip_forecast_probability_matching_resampled = (
+                precip_forecast_probability_matching_blended.copy()
+            )
+
+        if self.__config.probmatching_method == "cdf":
+            # nan indices in the extrapolation nowcast
+            nan_indices = np.isnan(
+                worker_state.precip_extrapolated_probability_matching[
+                    worker_state.subtimestep_index
+                ]
+            )
+            # Adjust the CDF of the forecast to match the resampled distribution combined from
+            # extrapolation and model fields.
+            # Rainfall outside the pure extrapolation domain is not taken into account.
+            if np.any(np.isfinite(worker_state.final_blended_forecast_recomposed)):
+                worker_state.final_blended_forecast_recomposed = (
+                    probmatching.nonparam_match_empirical_cdf(
+                        worker_state.final_blended_forecast_recomposed,
+                        precip_forecast_probability_matching_resampled,
+                        nan_indices,
                     )
+                )
+                precip_forecast_probability_matching_resampled = None
+        elif self.__config.probmatching_method == "mean":
+            # Use R_pm_blended as benchmark field and
+            mean_probabiltity_matching_forecast = np.mean(
+                precip_forecast_probability_matching_resampled[
+                    precip_forecast_probability_matching_resampled
+                    >= self.__config.precip_threshold
+                ]
+            )
+            no_rain_mask = (
+                worker_state.final_blended_forecast_recomposed
+                >= self.__config.precip_threshold
+            )
+            mean_precip_forecast = np.mean(
+                worker_state.final_blended_forecast_recomposed[no_rain_mask]
+            )
+            worker_state.final_blended_forecast_recomposed[no_rain_mask] = (
+                worker_state.final_blended_forecast_recomposed[no_rain_mask]
+                - mean_precip_forecast
+                + mean_probabiltity_matching_forecast
+            )
+            precip_forecast_probability_matching_resampled = None
+
+        final_blended_forecast_single_member.append(
+            worker_state.final_blended_forecast_recomposed
+        )
+        return final_blended_forecast_single_member
+
+    def __measure_time(self, label, start_time):
+        """
+        Measure and print the time taken for a specific part of the process.
+
+        Parameters:
+        - label: A description of the part of the process being measured.
+        - start_time: The timestamp when the process started (from time.time()).
+        """
+        if self.__config.measure_time:
+            elapsed_time = time.time() - start_time
+            print(f"{label} took {elapsed_time:.2f} seconds.")
+            return elapsed_time
+        return None
+
+
+def forecast(
+    precip,
+    precip_models,
+    velocity,
+    velocity_models,
+    timesteps,
+    timestep,
+    issuetime,
+    n_ens_members,
+    n_cascade_levels=6,
+    blend_nwp_members=False,
+    precip_thr=None,
+    norain_thr=0.0,
+    kmperpixel=None,
+    extrap_method="semilagrangian",
+    decomp_method="fft",
+    bandpass_filter_method="gaussian",
+    noise_method="nonparametric",
+    noise_stddev_adj=None,
+    ar_order=2,
+    vel_pert_method="bps",
+    weights_method="bps",
+    conditional=False,
+    probmatching_method="cdf",
+    mask_method="incremental",
+    resample_distribution=True,
+    smooth_radar_mask_range=0,
+    callback=None,
+    return_output=True,
+    seed=None,
+    num_workers=1,
+    fft_method="numpy",
+    domain="spatial",
+    outdir_path_skill="./tmp/",
+    extrap_kwargs=None,
+    filter_kwargs=None,
+    noise_kwargs=None,
+    vel_pert_kwargs=None,
+    clim_kwargs=None,
+    mask_kwargs=None,
+    measure_time=False,
+):
+    """
+    Generate a blended nowcast ensemble by using the Short-Term Ensemble
+    Prediction System (STEPS) method.
+
+    Parameters
+    ----------
+    precip: array-like
+      Array of shape (ar_order+1,m,n) containing the input precipitation fields
+      ordered by timestamp from oldest to newest. The time steps between the
+      inputs are assumed to be regular.
+    precip_models: array-like
+      Either raw (NWP) model forecast data or decomposed (NWP) model forecast data.
+      If you supply decomposed data, it needs to be an array of shape
+      (n_models,timesteps+1) containing, per timestep (t=0 to lead time here) and
+      per (NWP) model or model ensemble member, a dictionary with a list of cascades
+      obtained by calling a method implemented in :py:mod:`pysteps.cascade.decomposition`.
+      If you supply the original (NWP) model forecast data, it needs to be an array of shape
+      (n_models,timestep+1,m,n) containing precipitation (or other) fields, which will
+      then be decomposed in this function.
+
+      Depending on your use case it can be advantageous to decompose the model
+      forecasts outside beforehand, as this slightly reduces calculation times.
+      This is possible with :py:func:`pysteps.blending.utils.decompose_NWP`,
+      :py:func:`pysteps.blending.utils.compute_store_nwp_motion`, and
+      :py:func:`pysteps.blending.utils.load_NWP`. However, if you have a lot of (NWP) model
+      members (e.g. 1 model member per nowcast member), this can lead to excessive memory
+      usage.
+
+      To further reduce memory usage, both this array and the ``velocity_models`` array
+      can be given as float32. They will then be converted to float64 before computations
+      to minimize loss in precision.
+
+      In case of one (deterministic) model as input, add an extra dimension to make sure
+      precip_models is four dimensional prior to calling this function.
+    velocity: array-like
+      Array of shape (2,m,n) containing the x- and y-components of the advection
+      field. The velocities are assumed to represent one time step between the
+      inputs. All values are required to be finite.
+    velocity_models: array-like
+      Array of shape (n_models,timestep,2,m,n) containing the x- and y-components
+      of the advection field for the (NWP) model field per forecast lead time.
+      All values are required to be finite.
+
+      To reduce memory usage, this array
+      can be given as float32. They will then be converted to float64 before computations
+      to minimize loss in precision.
+    timesteps: int or list of floats
+      Number of time steps to forecast or a list of time steps for which the
+      forecasts are computed (relative to the input time step). The elements of
+      the list are required to be in ascending order.
+    timestep: float
+      Time step of the motion vectors (minutes). Required if vel_pert_method is
+      not None or mask_method is 'incremental'.
+    issuetime: datetime
+      is issued.
+    n_ens_members: int
+      The number of ensemble members to generate. This number should always be
+      equal to or larger than the number of NWP ensemble members / number of
+      NWP models.
+    n_cascade_levels: int, optional
+      The number of cascade levels to use. Defaults to 6,
+      see issue #385 on GitHub.
+    blend_nwp_members: bool
+      Check if NWP models/members should be used individually, or if all of
+      them are blended together per nowcast ensemble member. Standard set to
+      false.
+    precip_thr: float, optional
+      Specifies the threshold value for minimum observable precipitation
+      intensity. Required if mask_method is not None or conditional is True.
+    norain_thr: float, optional
+      Specifies the threshold value for the fraction of rainy (see above) pixels
+      in the radar rainfall field below which we consider there to be no rain.
+      Depends on the amount of clutter typically present.
+      Standard set to 0.0
+    kmperpixel: float, optional
+      Spatial resolution of the input data (kilometers/pixel). Required if
+      vel_pert_method is not None or mask_method is 'incremental'.
+    extrap_method: str, optional
+      Name of the extrapolation method to use. See the documentation of
+      :py:mod:`pysteps.extrapolation.interface`.
+    decomp_method: {'fft'}, optional
+      Name of the cascade decomposition method to use. See the documentation
+      of :py:mod:`pysteps.cascade.interface`.
+    bandpass_filter_method: {'gaussian', 'uniform'}, optional
+      Name of the bandpass filter method to use with the cascade decomposition.
+      See the documentation of :py:mod:`pysteps.cascade.interface`.
+    noise_method: {'parametric','nonparametric','ssft','nested',None}, optional
+      Name of the noise generator to use for perturbating the precipitation
+      field. See the documentation of :py:mod:`pysteps.noise.interface`. If set to None,
+      no noise is generated.
+    noise_stddev_adj: {'auto','fixed',None}, optional
+      Optional adjustment for the standard deviations of the noise fields added
+      to each cascade level. This is done to compensate incorrect std. dev.
+      estimates of casace levels due to presence of no-rain areas. 'auto'=use
+      the method implemented in :py:func:`pysteps.noise.utils.compute_noise_stddev_adjs`.
+      'fixed'= use the formula given in :cite:`BPS2006` (eq. 6), None=disable
+      noise std. dev adjustment.
+    ar_order: int, optional
+      The order of the autoregressive model to use. Must be >= 1.
+    vel_pert_method: {'bps',None}, optional
+      Name of the noise generator to use for perturbing the advection field. See
+      the documentation of :py:mod:`pysteps.noise.interface`. If set to None, the advection
+      field is not perturbed.
+    weights_method: {'bps','spn'}, optional
+      The calculation method of the blending weights. Options are the method
+      by :cite:`BPS2006` and the covariance-based method by :cite:`SPN2013`.
+      Defaults to bps.
+    conditional: bool, optional
+      If set to True, compute the statistics of the precipitation field
+      conditionally by excluding pixels where the values are below the threshold
+      precip_thr.
+    probmatching_method: {'cdf','mean',None}, optional
+      Method for matching the statistics of the forecast field with those of
+      the most recently observed one. 'cdf'=map the forecast CDF to the observed
+      one, 'mean'=adjust only the conditional mean value of the forecast field
+      in precipitation areas, None=no matching applied. Using 'mean' requires
+      that mask_method is not None.
+    mask_method: {'obs','incremental',None}, optional
+      The method to use for masking no precipitation areas in the forecast field.
+      The masked pixels are set to the minimum value of the observations.
+      'obs' = apply precip_thr to the most recently observed precipitation intensity
+      field, 'incremental' = iteratively buffer the mask with a certain rate
+      (currently it is 1 km/min), None=no masking.
+    resample_distribution: bool, optional
+        Method to resample the distribution from the extrapolation and NWP cascade as input
+        for the probability matching. Not resampling these distributions may lead to losing
+        some extremes when the weight of both the extrapolation and NWP cascade is similar.
+        Defaults to True.
+    smooth_radar_mask_range: int, Default is 0.
+      Method to smooth the transition between the radar-NWP-noise blend and the NWP-noise
+      blend near the edge of the radar domain (radar mask), where the radar data is either
+      not present anymore or is not reliable. If set to 0 (grid cells), this generates a
+      normal forecast without smoothing. To create a smooth mask, this range should be a
+      positive value, representing a buffer band of a number of pixels by which the mask
+      is cropped and smoothed. The smooth radar mask removes the hard edges between NWP
+      and radar in the final blended product. Typically, a value between 50 and 100 km
+      can be used. 80 km generally gives good results.
+    callback: function, optional
+      Optional function that is called after computation of each time step of
+      the nowcast. The function takes one argument: a three-dimensional array
+      of shape (n_ens_members,h,w), where h and w are the height and width
+      of the input field precip, respectively. This can be used, for instance,
+      writing the outputs into files.
+    return_output: bool, optional
+      Set to False to disable returning the outputs as numpy arrays. This can
+      save memory if the intermediate results are written to output files using
+      the callback function.
+    seed: int, optional
+      Optional seed number for the random generators.
+    num_workers: int, optional
+      The number of workers to use for parallel computation. Applicable if dask
+      is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it
+      is advisable to disable OpenMP by setting the environment variable
+      OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous
+      threads.
+    fft_method: str, optional
+      A string defining the FFT method to use (see FFT methods in
+      :py:func:`pysteps.utils.interface.get_method`).
+      Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed,
+      the recommended method is 'pyfftw'.
+    domain: {"spatial", "spectral"}
+      If "spatial", all computations are done in the spatial domain (the
+      classical STEPS model). If "spectral", the AR(2) models and stochastic
+      perturbations are applied directly in the spectral domain to reduce
+      memory footprint and improve performance :cite:`PCH2019b`.
+    outdir_path_skill: string, optional
+      Path to folder where the historical skill are stored. Defaults to
+      path_workdir from rcparams. If no path is given, './tmp' will be used.
+    extrap_kwargs: dict, optional
+      Optional dictionary containing keyword arguments for the extrapolation
+      method. See the documentation of :py:func:`pysteps.extrapolation.interface`.
+    filter_kwargs: dict, optional
+      Optional dictionary containing keyword arguments for the filter method.
+      See the documentation of :py:mod:`pysteps.cascade.bandpass_filters`.
+    noise_kwargs: dict, optional
+      Optional dictionary containing keyword arguments for the initializer of
+      the noise generator. See the documentation of :py:mod:`pysteps.noise.fftgenerators`.
+    vel_pert_kwargs: dict, optional
+      Optional dictionary containing keyword arguments 'p_par' and 'p_perp' for
+      the initializer of the velocity perturbator. The choice of the optimal
+      parameters depends on the domain and the used optical flow method.
 
-                    t_prev[j] = t + 1
+      Default parameters from :cite:`BPS2006`:
+      p_par  = [10.88, 0.23, -7.68]
+      p_perp = [5.76, 0.31, -2.72]
 
-                forecast_prev[j] = precip_cascade[j]
-                noise_prev[j] = noise_cascade[j]
+      Parameters fitted to the data (optical flow/domain):
 
-                # 8.5 Blend the cascades
-                R_f_out = []
+      darts/fmi:
+      p_par  = [13.71259667, 0.15658963, -16.24368207]
+      p_perp = [8.26550355, 0.17820458, -9.54107834]
 
-                for t_sub in subtimesteps:
-                    # TODO: does it make sense to use sub time steps - check if it works?
-                    if t_sub > 0:
-                        t_index = np.where(np.array(subtimesteps) == t_sub)[0][0]
-                        # First concatenate the cascades and the means and sigmas
-                        # precip_models = [n_models,timesteps,n_cascade_levels,m,n]
-                        if blend_nwp_members:
-                            cascades_stacked = np.concatenate(
-                                (
-                                    R_f_ep_out[None, t_index],
-                                    precip_models_cascade_temp,
-                                    Yn_ep_out[None, t_index],
-                                ),
-                                axis=0,
-                            )  # [(extr_field, n_model_fields, noise), n_cascade_levels, ...]
-                            means_stacked = np.concatenate(
-                                (mu_extrapolation[None, :], mu_models_temp), axis=0
-                            )
-                            sigmas_stacked = np.concatenate(
-                                (sigma_extrapolation[None, :], sigma_models_temp),
-                                axis=0,
-                            )
-                        else:
-                            cascades_stacked = np.concatenate(
-                                (
-                                    R_f_ep_out[None, t_index],
-                                    precip_models_cascade_temp[None, j],
-                                    Yn_ep_out[None, t_index],
-                                ),
-                                axis=0,
-                            )  # [(extr_field, n_model_fields, noise), n_cascade_levels, ...]
-                            means_stacked = np.concatenate(
-                                (mu_extrapolation[None, :], mu_models_temp[None, j]),
-                                axis=0,
-                            )
-                            sigmas_stacked = np.concatenate(
-                                (
-                                    sigma_extrapolation[None, :],
-                                    sigma_models_temp[None, j],
-                                ),
-                                axis=0,
-                            )
+      darts/mch:
+      p_par  = [24.27562298, 0.11297186, -27.30087471]
+      p_perp = [-7.80797846e+01, -3.38641048e-02, 7.56715304e+01]
 
-                        # First determine the blending weights if method is spn. The
-                        # weights for method bps have already been determined.
-                        if weights_method == "spn":
-                            weights = np.zeros(
-                                (cascades_stacked.shape[0], n_cascade_levels)
-                            )
-                            for i in range(n_cascade_levels):
-                                # Determine the normalized covariance matrix (containing)
-                                # the cross-correlations between the models
-                                cascades_stacked_ = np.stack(
-                                    [
-                                        cascades_stacked[n_model, i, :, :].flatten()
-                                        for n_model in range(
-                                            cascades_stacked.shape[0] - 1
-                                        )
-                                    ]
-                                )  # -1 to exclude the noise component
-                                cov = np.ma.corrcoef(
-                                    np.ma.masked_invalid(cascades_stacked_)
-                                )
-                                # Determine the weights for this cascade level
-                                weights[:, i] = calculate_weights_spn(
-                                    correlations=rho_fc[:, i], cov=cov
-                                )
+      darts/fmi+mch:
+      p_par  = [16.55447057, 0.14160448, -19.24613059]
+      p_perp = [14.75343395, 0.11785398, -16.26151612]
 
-                        # Blend the extrapolation, (NWP) model(s) and noise cascades
-                        R_f_blended = blending.utils.blend_cascades(
-                            cascades_norm=cascades_stacked, weights=weights
-                        )
+      lucaskanade/fmi:
+      p_par  = [2.20837526, 0.33887032, -2.48995355]
+      p_perp = [2.21722634, 0.32359621, -2.57402761]
 
-                        # Also blend the cascade without the extrapolation component
-                        R_f_blended_mod_only = blending.utils.blend_cascades(
-                            cascades_norm=cascades_stacked[1:, :],
-                            weights=weights_model_only,
-                        )
+      lucaskanade/mch:
+      p_par  = [2.56338484, 0.3330941, -2.99714349]
+      p_perp = [1.31204508, 0.3578426, -1.02499891]
 
-                        # Blend the means and standard deviations
-                        # Input is array of shape [number_components, scale_level, ...]
-                        means_blended, sigmas_blended = blend_means_sigmas(
-                            means=means_stacked, sigmas=sigmas_stacked, weights=weights
-                        )
-                        # Also blend the means and sigmas for the cascade without extrapolation
-                        (
-                            means_blended_mod_only,
-                            sigmas_blended_mod_only,
-                        ) = blend_means_sigmas(
-                            means=means_stacked[1:, :],
-                            sigmas=sigmas_stacked[1:, :],
-                            weights=weights_model_only,
-                        )
+      lucaskanade/fmi+mch:
+      p_par  = [2.31970635, 0.33734287, -2.64972861]
+      p_perp = [1.90769947, 0.33446594, -2.06603662]
 
-                        # 8.6 Recompose the cascade to a precipitation field
-                        # (The function first normalizes the blended cascade, R_f_blended
-                        # again)
-                        R_f_new = blending.utils.recompose_cascade(
-                            combined_cascade=R_f_blended,
-                            combined_mean=means_blended,
-                            combined_sigma=sigmas_blended,
-                        )
-                        # The recomposed cascade without the extrapolation (for NaN filling
-                        # outside the radar domain)
-                        R_f_new_mod_only = blending.utils.recompose_cascade(
-                            combined_cascade=R_f_blended_mod_only,
-                            combined_mean=means_blended_mod_only,
-                            combined_sigma=sigmas_blended_mod_only,
-                        )
-                        if domain == "spectral":
-                            # TODO: Check this! (Only tested with domain == 'spatial')
-                            R_f_new = fft_objs[j].irfft2(R_f_new)
-                            R_f_new_mod_only = fft_objs[j].irfft2(R_f_new_mod_only)
-
-                        # 8.7 Post-processing steps - use the mask and fill no data with
-                        # the blended NWP forecast. Probability matching following
-                        # Lagrangian blended probability matching which uses the
-                        # latest extrapolated radar rainfall field blended with the
-                        # nwp model(s) rainfall forecast fields as 'benchmark'.
-
-                        # 8.7.1 first blend the extrapolated rainfall field (the field
-                        # that is only used for post-processing steps) with the NWP
-                        # rainfall forecast for this time step using the weights
-                        # at scale level 2.
-                        weights_pm = weights[:-1, 1]  # Weights without noise, level 2
-                        weights_pm_normalized = weights_pm / np.sum(weights_pm)
-                        # And the weights for outside the radar domain
-                        weights_pm_mod_only = weights_model_only[
-                            :-1, 1
-                        ]  # Weights without noise, level 2
-                        weights_pm_normalized_mod_only = weights_pm_mod_only / np.sum(
-                            weights_pm_mod_only
-                        )
-                        # Stack the fields
-                        if blend_nwp_members:
-                            R_pm_stacked = np.concatenate(
-                                (
-                                    R_pm_ep[None, t_index],
-                                    precip_models_temp,
-                                ),
-                                axis=0,
-                            )
-                        else:
-                            R_pm_stacked = np.concatenate(
-                                (
-                                    R_pm_ep[None, t_index],
-                                    precip_models_temp[None, j],
-                                ),
-                                axis=0,
-                            )
-                        # Blend it
-                        R_pm_blended = np.sum(
-                            weights_pm_normalized.reshape(
-                                weights_pm_normalized.shape[0], 1, 1
-                            )
-                            * R_pm_stacked,
-                            axis=0,
-                        )
-                        if blend_nwp_members:
-                            R_pm_blended_mod_only = np.sum(
-                                weights_pm_normalized_mod_only.reshape(
-                                    weights_pm_normalized_mod_only.shape[0], 1, 1
-                                )
-                                * precip_models_temp,
-                                axis=0,
-                            )
-                        else:
-                            R_pm_blended_mod_only = precip_models_temp[j]
-
-                        # The extrapolation components are NaN outside the advected
-                        # radar domain. This results in NaN values in the blended
-                        # forecast outside the radar domain. Therefore, fill these
-                        # areas with the "..._mod_only" blended forecasts, consisting
-                        # of the NWP and noise components.
-
-                        nan_indices = np.isnan(R_f_new)
-                        if smooth_radar_mask_range != 0:
-                            # Compute the smooth dilated mask
-                            new_mask = blending.utils.compute_smooth_dilated_mask(
-                                nan_indices,
-                                max_padding_size_in_px=smooth_radar_mask_range,
-                            )
+      vet/fmi:
+      p_par  = [0.25337388, 0.67542291, 11.04895538]
+      p_perp = [0.02432118, 0.99613295, 7.40146505]
 
-                            # Ensure mask values are between 0 and 1
-                            mask_model = np.clip(new_mask, 0, 1)
-                            mask_radar = np.clip(1 - new_mask, 0, 1)
+      vet/mch:
+      p_par  = [0.5075159, 0.53895212, 7.90331791]
+      p_perp = [0.68025501, 0.41761289, 4.73793581]
 
-                            # Handle NaNs in R_f_new and R_f_new_mod_only by setting NaNs to 0 in the blending step
-                            R_f_new_mod_only_no_nan = np.nan_to_num(
-                                R_f_new_mod_only, nan=0
-                            )
-                            R_f_new_no_nan = np.nan_to_num(R_f_new, nan=0)
-
-                            # Perform the blending of radar and model inside the radar domain using a weighted combination
-                            R_f_new = np.nansum(
-                                [
-                                    mask_model * R_f_new_mod_only_no_nan,
-                                    mask_radar * R_f_new_no_nan,
-                                ],
-                                axis=0,
-                            )
+      vet/fmi+mch:
+      p_par  = [0.29495222, 0.62429207, 8.6804131 ]
+      p_perp = [0.23127377, 0.59010281, 5.98180004]
 
-                            nan_indices = np.isnan(R_pm_blended)
-                            R_pm_blended = np.nansum(
-                                [
-                                    R_pm_blended * mask_radar,
-                                    R_pm_blended_mod_only * mask_model,
-                                ],
-                                axis=0,
-                            )
-                        else:
-                            R_f_new[nan_indices] = R_f_new_mod_only[nan_indices]
-                            nan_indices = np.isnan(R_pm_blended)
-                            R_pm_blended[nan_indices] = R_pm_blended_mod_only[
-                                nan_indices
-                            ]
+      fmi=Finland, mch=Switzerland, fmi+mch=both pooled into the same data set
 
-                        # Finally, fill the remaining nan values, if present, with
-                        # the minimum value in the forecast
-                        nan_indices = np.isnan(R_f_new)
-                        R_f_new[nan_indices] = np.nanmin(R_f_new)
-                        nan_indices = np.isnan(R_pm_blended)
-                        R_pm_blended[nan_indices] = np.nanmin(R_pm_blended)
-
-                        # 8.7.2. Apply the masking and prob. matching
-                        if mask_method is not None:
-                            # apply the precipitation mask to prevent generation of new
-                            # precipitation into areas where it was not originally
-                            # observed
-                            R_cmin = R_f_new.min()
-                            if mask_method == "incremental":
-                                # The incremental mask is slightly different from
-                                # the implementation in the non-blended steps.py, as
-                                # it is not based on the last forecast, but instead
-                                # on R_pm_blended. Therefore, the buffer does not
-                                # increase over time.
-                                # Get the mask for this forecast
-                                MASK_prec = R_pm_blended >= precip_thr
-                                # Buffer the mask
-                                MASK_prec = _compute_incremental_mask(
-                                    MASK_prec, struct, mask_rim
-                                )
-                                # Get the final mask
-                                R_f_new = R_cmin + (R_f_new - R_cmin) * MASK_prec
-                                MASK_prec_ = R_f_new > R_cmin
-                            elif mask_method == "obs":
-                                # The mask equals the most recent benchmark
-                                # rainfall field
-                                MASK_prec_ = R_pm_blended >= precip_thr
-
-                            # Set to min value outside of mask
-                            R_f_new[~MASK_prec_] = R_cmin
-
-                        # If probmatching_method is not None, resample the distribution from
-                        # both the extrapolation cascade and the model (NWP) cascade and use
-                        # that for the probability matching.
-                        if probmatching_method is not None and resample_distribution:
-                            arr1 = R_pm_ep[t_index]
-                            arr2 = precip_models_temp[j]
-                            # resample weights based on cascade level 2.
-                            # Areas where one of the fields is nan are not included.
-                            R_pm_resampled = probmatching.resample_distributions(
-                                first_array=arr1,
-                                second_array=arr2,
-                                probability_first_array=weights_pm_normalized[0],
-                                randgen=randgen_probmatching[j],
-                            )
-                        else:
-                            R_pm_resampled = R_pm_blended.copy()
-
-                        if probmatching_method == "cdf":
-                            # nan indices in the extrapolation nowcast
-                            nan_indices = np.isnan(R_pm_ep[t_index])
-                            # Adjust the CDF of the forecast to match the resampled distribution combined from
-                            # extrapolation and model fields.
-                            # Rainfall outside the pure extrapolation domain is not taken into account.
-                            if np.any(np.isfinite(R_f_new)):
-                                R_f_new = probmatching.nonparam_match_empirical_cdf(
-                                    R_f_new, R_pm_resampled, nan_indices
-                                )
-                                R_pm_resampled = None
-                        elif probmatching_method == "mean":
-                            # Use R_pm_blended as benchmark field and
-                            mu_0 = np.mean(R_pm_resampled[R_pm_resampled >= precip_thr])
-                            MASK = R_f_new >= precip_thr
-                            mu_fct = np.mean(R_f_new[MASK])
-                            R_f_new[MASK] = R_f_new[MASK] - mu_fct + mu_0
-                            R_pm_resampled = None
+      The above parameters have been fitted by using run_vel_pert_analysis.py
+      and fit_vel_pert_params.py located in the scripts directory.
 
-                        R_f_out.append(R_f_new)
+      See :py:mod:`pysteps.noise.motion` for additional documentation.
+    clim_kwargs: dict, optional
+      Optional dictionary containing keyword arguments for the climatological
+      skill file. Arguments can consist of: 'outdir_path', 'n_models'
+      (the number of NWP models) and 'window_length' (the minimum number of
+      days the clim file should have, otherwise the default is used).
+    mask_kwargs: dict
+      Optional dictionary containing mask keyword arguments 'mask_f' and
+      'mask_rim', the factor defining the the mask increment and the rim size,
+      respectively.
+      The mask increment is defined as mask_f*timestep/kmperpixel.
+    measure_time: bool
+      If set to True, measure, print and return the computation time.
 
-                R_f_[j] = R_f_out
+    Returns
+    -------
+    out: ndarray
+      If return_output is True, a four-dimensional array of shape
+      (n_ens_members,num_timesteps,m,n) containing a time series of forecast
+      precipitation fields for each ensemble member. Otherwise, a None value
+      is returned. The time series starts from t0+timestep, where timestep is
+      taken from the input precipitation fields precip. If measure_time is True, the
+      return value is a three-element tuple containing the nowcast array, the
+      initialization time of the nowcast generator and the time used in the
+      main loop (seconds).
 
-            res = []
+    See also
+    --------
+    :py:mod:`pysteps.extrapolation.interface`, :py:mod:`pysteps.cascade.interface`,
+    :py:mod:`pysteps.noise.interface`, :py:func:`pysteps.noise.utils.compute_noise_stddev_adjs`
 
-            if DASK_IMPORTED and n_ens_members > 1:
-                for j in range(n_ens_members):
-                    res.append(dask.delayed(worker)(j))
-                dask.compute(*res, num_workers=num_ensemble_workers)
-            else:
-                for j in range(n_ens_members):
-                    worker(j)
+    References
+    ----------
+    :cite:`Seed2003`, :cite:`BPS2004`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b`
 
-            res = None
+    Notes
+    -----
+    1. The blending currently does not blend the beta-parameters in the parametric
+    noise method. It is recommended to use the non-parameteric noise method.
 
-            if is_nowcast_time_step:
-                if measure_time:
-                    print(f"{time.time() - starttime:.2f} seconds.")
-                else:
-                    print("done.")
+    2. If blend_nwp_members is True, the BPS2006 method for the weights is
+    suboptimal. It is recommended to use the SPN2013 method instead.
 
-            if callback is not None:
-                R_f_stacked = np.stack(R_f_)
-                if R_f_stacked.shape[1] > 0:
-                    callback(R_f_stacked.squeeze())
+    3. Not yet implemented (and neither in the steps nowcasting module): The regression
+    of the lag-1 and lag-2 parameters to their climatological values. See also eq.
+    12 - 19 in :cite: `BPS2004`. By doing so, the Phi parameters change over time,
+    which enhances the AR process. This can become a future development if this
+    turns out to be a warranted functionality.
+    """
 
-            if return_output:
-                for j in range(n_ens_members):
-                    R_f[j].extend(R_f_[j])
+    blending_config = StepsBlendingConfig(
+        n_ens_members=n_ens_members,
+        n_cascade_levels=n_cascade_levels,
+        blend_nwp_members=blend_nwp_members,
+        precip_threshold=precip_thr,
+        norain_threshold=norain_thr,
+        kmperpixel=kmperpixel,
+        timestep=timestep,
+        extrapolation_method=extrap_method,
+        decomposition_method=decomp_method,
+        bandpass_filter_method=bandpass_filter_method,
+        noise_method=noise_method,
+        noise_stddev_adj=noise_stddev_adj,
+        ar_order=ar_order,
+        velocity_perturbation_method=vel_pert_method,
+        weights_method=weights_method,
+        conditional=conditional,
+        probmatching_method=probmatching_method,
+        mask_method=mask_method,
+        resample_distribution=resample_distribution,
+        smooth_radar_mask_range=smooth_radar_mask_range,
+        seed=seed,
+        num_workers=num_workers,
+        fft_method=fft_method,
+        domain=domain,
+        outdir_path_skill=outdir_path_skill,
+        extrapolation_kwargs=extrap_kwargs,
+        filter_kwargs=filter_kwargs,
+        noise_kwargs=noise_kwargs,
+        velocity_perturbation_kwargs=vel_pert_kwargs,
+        climatology_kwargs=clim_kwargs,
+        mask_kwargs=mask_kwargs,
+        measure_time=measure_time,
+        callback=callback,
+        return_output=return_output,
+    )
 
-            R_f_ = None
+    # TODO: add comment about how this class based method is supposed to be used: for each forecast run, a new forecaster needs to be made. The config file can stay the same.
 
-        if measure_time:
-            mainloop_time = time.time() - starttime_mainloop
+    # Create an instance of the new class with all the provided arguments
+    blended_nowcaster = StepsBlendingNowcaster(
+        precip,
+        precip_models,
+        velocity,
+        velocity_models,
+        timesteps,
+        issuetime,
+        blending_config,
+    )
 
-        if return_output:
-            outarr = np.stack([np.stack(R_f[j]) for j in range(n_ens_members)])
-            if measure_time:
-                return outarr, init_time, mainloop_time
-            else:
-                return outarr
-        else:
-            return None
+    forecast_steps_nowcast = blended_nowcaster.compute_forecast()
+    return forecast_steps_nowcast
 
 
+# TODO: Where does this piece of code best fit: in utils or inside the class?
 def calculate_ratios(correlations):
     """Calculate explained variance ratios from correlation.
 
@@ -1718,6 +2853,7 @@ def calculate_ratios(correlations):
     return out
 
 
+# TODO: Where does this piece of code best fit: in utils or inside the class?
 def calculate_weights_bps(correlations):
     """Calculate BPS blending weights for STEPS blending from correlation.
 
@@ -1777,7 +2913,8 @@ def calculate_weights_bps(correlations):
     return weights
 
 
-def calculate_weights_spn(correlations, cov):
+# TODO: Where does this piece of code best fit: in utils or inside the class?
+def calculate_weights_spn(correlations, covariance):
     """Calculate SPN blending weights for STEPS blending from correlation.
 
     Parameters
@@ -1785,7 +2922,7 @@ def calculate_weights_spn(correlations, cov):
     correlations : array-like
       Array of shape [n_components]
       containing correlation (skills) for each component (NWP models and nowcast).
-    cov : array-like
+    covariance : array-like
         Array of shape [n_components, n_components] containing the covariance
         matrix of the models that will be blended. If cov is set to None and
         correlations only contains one model, the weight equals the correlation
@@ -1805,21 +2942,21 @@ def calculate_weights_spn(correlations, cov):
     # Check if the correlations are positive, otherwise rho = 10e-5
     correlations = np.where(correlations < 10e-5, 10e-5, correlations)
 
-    if correlations.shape[0] > 1 and len(cov) > 1:
-        if isinstance(cov, type(None)):
+    if correlations.shape[0] > 1 and len(covariance) > 1:
+        if isinstance(covariance, type(None)):
             raise ValueError("cov must contain a covariance matrix")
         else:
             # Make a numpy array out of cov and get the inverse
-            cov = np.where(cov == 0.0, 10e-5, cov)
+            covariance = np.where(covariance == 0.0, 10e-5, covariance)
             # Make sure the determinant of the matrix is not zero, otherwise
             # subtract 10e-5 from the cross-correlations between the models
-            if np.linalg.det(cov) == 0.0:
-                cov = cov - 10e-5
+            if np.linalg.det(covariance) == 0.0:
+                covariance = covariance - 10e-5
             # Ensure the correlation of the model with itself is always 1.0
-            for i, _ in enumerate(cov):
-                cov[i][i] = 1.0
+            for i, _ in enumerate(covariance):
+                covariance[i][i] = 1.0
             # Use a numpy array instead of a matrix
-            cov_matrix = np.array(cov)
+            cov_matrix = np.array(covariance)
             # Get the inverse of the matrix using scipy's inv function
             cov_matrix_inv = inv(cov_matrix)
             # The component weights are the dot product between cov_matrix_inv and cor_vec
@@ -1858,6 +2995,7 @@ def calculate_weights_spn(correlations, cov):
     return weights
 
 
+# TODO: Where does this piece of code best fit: in utils or inside the class?
 def blend_means_sigmas(means, sigmas, weights):
     """Calculate the blended means and sigmas, the normalization parameters
     needed to recompose the cascade. This procedure uses the weights of the
@@ -1919,591 +3057,5 @@ def blend_means_sigmas(means, sigmas, weights):
     for i in range(weights.shape[0]):
         combined_means += (weights[i] / total_weight) * means[i]
         combined_sigmas += (weights[i] / total_weight) * sigmas[i]
-    # TODO: substract covariances to weigthed sigmas - still necessary?
 
     return combined_means, combined_sigmas
-
-
-def _check_inputs(
-    precip, precip_models, velocity, velocity_models, timesteps, ar_order
-):
-    if precip.ndim != 3:
-        raise ValueError("precip must be a three-dimensional array")
-    if precip.shape[0] < ar_order + 1:
-        raise ValueError("precip.shape[0] < ar_order+1")
-    if precip_models.ndim != 2 and precip_models.ndim != 4:
-        raise ValueError(
-            "precip_models must be either a two-dimensional array containing dictionaries with decomposed model fields or a four-dimensional array containing the original (NWP) model forecasts"
-        )
-    if velocity.ndim != 3:
-        raise ValueError("velocity must be a three-dimensional array")
-    if velocity_models.ndim != 5:
-        raise ValueError("velocity_models must be a five-dimensional array")
-    if velocity.shape[0] != 2 or velocity_models.shape[2] != 2:
-        raise ValueError(
-            "velocity and velocity_models must have an x- and y-component, check the shape"
-        )
-    if precip.shape[1:3] != velocity.shape[1:3]:
-        raise ValueError(
-            "dimension mismatch between precip and velocity: shape(precip)=%s, shape(velocity)=%s"
-            % (str(precip.shape), str(velocity.shape))
-        )
-    if precip_models.shape[0] != velocity_models.shape[0]:
-        raise ValueError(
-            "precip_models and velocity_models must consist of the same number of models"
-        )
-    if isinstance(timesteps, list) and not sorted(timesteps) == timesteps:
-        raise ValueError("timesteps is not in ascending order")
-    if isinstance(timesteps, list):
-        if precip_models.shape[1] != math.ceil(timesteps[-1]) + 1:
-            raise ValueError(
-                "precip_models does not contain sufficient lead times for this forecast"
-            )
-    else:
-        if precip_models.shape[1] != timesteps + 1:
-            raise ValueError(
-                "precip_models does not contain sufficient lead times for this forecast"
-            )
-
-
-def _compute_incremental_mask(Rbin, kr, r):
-    # buffer the observation mask Rbin using the kernel kr
-    # add a grayscale rim r (for smooth rain/no-rain transition)
-
-    # buffer observation mask
-    Rbin = np.ndarray.astype(Rbin.copy(), "uint8")
-    Rd = binary_dilation(Rbin, kr)
-
-    # add grayscale rim
-    kr1 = generate_binary_structure(2, 1)
-    mask = Rd.astype(float)
-    for n in range(r):
-        Rd = binary_dilation(Rd, kr1)
-        mask += Rd
-    # normalize between 0 and 1
-    return mask / mask.max()
-
-
-def _transform_to_lagrangian(
-    precip, velocity, ar_order, xy_coords, extrapolator, extrap_kwargs, num_workers
-):
-    """Advect the previous precipitation fields to the same position with the
-    most recent one (i.e. transform them into the Lagrangian coordinates).
-    """
-    extrap_kwargs = extrap_kwargs.copy()
-    extrap_kwargs["xy_coords"] = xy_coords
-    res = list()
-
-    def f(precip, i):
-        return extrapolator(
-            precip[i, :, :],
-            velocity,
-            ar_order - i,
-            "min",
-            allow_nonfinite_values=True,
-            **extrap_kwargs,
-        )[-1]
-
-    for i in range(ar_order):
-        if not DASK_IMPORTED:
-            precip[i, :, :] = f(precip, i)
-        else:
-            res.append(dask.delayed(f)(precip, i))
-
-    if DASK_IMPORTED:
-        num_workers_ = len(res) if num_workers > len(res) else num_workers
-        precip = np.stack(
-            list(dask.compute(*res, num_workers=num_workers_)) + [precip[-1, :, :]]
-        )
-
-    # replace non-finite values with the minimum value
-    precip = precip.copy()
-    for i in range(precip.shape[0]):
-        precip[i, ~np.isfinite(precip[i, :])] = np.nanmin(precip[i, :])
-    return precip
-
-
-def _init_noise(
-    precip,
-    precip_thr,
-    n_cascade_levels,
-    bp_filter,
-    decompositor,
-    fft,
-    noise_method,
-    noise_kwargs,
-    noise_stddev_adj,
-    measure_time,
-    num_workers,
-    seed,
-):
-    """Initialize the noise method."""
-    if noise_method is None:
-        return None, None, None
-
-    # get methods for perturbations
-    init_noise, generate_noise = noise.get_method(noise_method)
-
-    # initialize the perturbation generator for the precipitation field
-    pp = init_noise(precip, fft_method=fft, **noise_kwargs)
-
-    if noise_stddev_adj == "auto":
-        print("Computing noise adjustment coefficients... ", end="", flush=True)
-        if measure_time:
-            starttime = time.time()
-
-        R_min = np.min(precip)
-        noise_std_coeffs = noise.utils.compute_noise_stddev_adjs(
-            precip[-1, :, :],
-            precip_thr,
-            R_min,
-            bp_filter,
-            decompositor,
-            pp,
-            generate_noise,
-            20,
-            conditional=True,
-            num_workers=num_workers,
-            seed=seed,
-        )
-
-        if measure_time:
-            print(f"{time.time() - starttime:.2f} seconds.")
-        else:
-            print("done.")
-    elif noise_stddev_adj == "fixed":
-        f = lambda k: 1.0 / (0.75 + 0.09 * k)
-        noise_std_coeffs = [f(k) for k in range(1, n_cascade_levels + 1)]
-    else:
-        noise_std_coeffs = np.ones(n_cascade_levels)
-
-    if noise_stddev_adj is not None:
-        print(f"noise std. dev. coeffs:   {noise_std_coeffs}")
-
-    return pp, generate_noise, noise_std_coeffs
-
-
-def _compute_cascade_decomposition_radar(
-    precip,
-    ar_order,
-    n_cascade_levels,
-    n_ens_members,
-    MASK_thr,
-    domain,
-    bp_filter,
-    decompositor,
-    fft,
-):
-    """Compute the cascade decompositions of the input precipitation fields."""
-    R_d = []
-    for i in range(ar_order + 1):
-        R_ = decompositor(
-            precip[i, :, :],
-            bp_filter,
-            mask=MASK_thr,
-            fft_method=fft,
-            output_domain=domain,
-            normalize=True,
-            compute_stats=True,
-            compact_output=True,
-        )
-        R_d.append(R_)
-
-    # Rearrange the cascaded into a four-dimensional array of shape
-    # (n_cascade_levels,ar_order+1,m,n) for the autoregressive model
-    R_c = nowcast_utils.stack_cascades(R_d, n_cascade_levels)
-
-    R_d = R_d[-1]
-    mu_extrapolation = np.array(R_d["means"])
-    sigma_extrapolation = np.array(R_d["stds"])
-    R_d = [R_d.copy() for j in range(n_ens_members)]
-    return R_c, mu_extrapolation, sigma_extrapolation
-
-
-def _compute_cascade_recomposition_nwp(precip_models_cascade, recompositor):
-    """If necessary, recompose (NWP) model forecasts."""
-    precip_models = None
-
-    # Recompose the (NWP) model cascades to have rainfall fields per
-    # model and time step, which will be used in the probability matching steps.
-    # Recomposed cascade will have shape: [n_models, n_timesteps, m, n]
-    precip_models = []
-    for i in range(precip_models_cascade.shape[0]):
-        precip_model = []
-        for time_step in range(precip_models_cascade.shape[1]):
-            precip_model.append(recompositor(precip_models_cascade[i, time_step]))
-        precip_models.append(precip_model)
-
-    precip_models = np.stack(precip_models)
-    precip_model = None
-
-    return precip_models
-
-
-def _estimate_ar_parameters_radar(
-    R_c, ar_order, n_cascade_levels, MASK_thr, zero_precip_radar
-):
-    """Estimate AR parameters for the radar rainfall field."""
-    # If there are values in the radar fields, compute the autocorrelations
-    GAMMA = np.empty((n_cascade_levels, ar_order))
-    if not zero_precip_radar:
-        # compute lag-l temporal autocorrelation coefficients for each cascade level
-        for i in range(n_cascade_levels):
-            GAMMA[i, :] = correlation.temporal_autocorrelation(R_c[i], mask=MASK_thr)
-
-    # Else, use standard values for the autocorrelations
-    else:
-        # Get the climatological lag-1 and lag-2 autocorrelation values from Table 2
-        # in `BPS2004`.
-        # Hard coded, change to own (climatological) values when present.
-        GAMMA = np.array(
-            [
-                [0.99805, 0.9925, 0.9776, 0.9297, 0.796, 0.482, 0.079, 0.0006],
-                [0.9933, 0.9752, 0.923, 0.750, 0.367, 0.069, 0.0018, 0.0014],
-            ]
-        )
-
-        # Check whether the number of cascade_levels is correct
-        if GAMMA.shape[1] > n_cascade_levels:
-            GAMMA = GAMMA[:, 0:n_cascade_levels]
-        elif GAMMA.shape[1] < n_cascade_levels:
-            # Get the number of cascade levels that is missing
-            n_extra_lev = n_cascade_levels - GAMMA.shape[1]
-            # Append the array with correlation values of 10e-4
-            GAMMA = np.append(
-                GAMMA,
-                [np.repeat(0.0006, n_extra_lev), np.repeat(0.0014, n_extra_lev)],
-                axis=1,
-            )
-
-        # Finally base GAMMA.shape[0] on the AR-level
-        if ar_order == 1:
-            GAMMA = GAMMA[0, :]
-        if ar_order > 2:
-            for repeat_index in range(ar_order - 2):
-                GAMMA = np.vstack((GAMMA, GAMMA[1, :]))
-
-        # Finally, transpose GAMMA to ensure that the shape is the same as np.empty((n_cascade_levels, ar_order))
-        GAMMA = GAMMA.transpose()
-        assert GAMMA.shape == (n_cascade_levels, ar_order)
-
-    # Print the GAMMA value
-    nowcast_utils.print_corrcoefs(GAMMA)
-
-    if ar_order == 2:
-        # adjust the lag-2 correlation coefficient to ensure that the AR(p)
-        # process is stationary
-        for i in range(n_cascade_levels):
-            GAMMA[i, 1] = autoregression.adjust_lag2_corrcoef2(GAMMA[i, 0], GAMMA[i, 1])
-
-    # estimate the parameters of the AR(p) model from the autocorrelation
-    # coefficients
-    PHI = np.empty((n_cascade_levels, ar_order + 1))
-    for i in range(n_cascade_levels):
-        PHI[i, :] = autoregression.estimate_ar_params_yw(GAMMA[i, :])
-
-    nowcast_utils.print_ar_params(PHI)
-    return PHI
-
-
-def _find_nwp_combination(
-    precip_models,
-    R_models_pm,
-    velocity_models,
-    mu_models,
-    sigma_models,
-    n_ens_members,
-    ar_order,
-    n_cascade_levels,
-    blend_nwp_members,
-):
-    """Determine which (NWP) models will be combined with which nowcast ensemble members.
-    With the way it is implemented at this moment: n_ens_members of the output equals
-    the maximum number of (ensemble) members in the input (either the nowcasts or NWP).
-    """
-    # Make sure the number of model members is not larger than than or equal to
-    # n_ens_members
-    n_model_members = precip_models.shape[0]
-    if n_model_members > n_ens_members:
-        raise ValueError(
-            "The number of NWP model members is larger than the given number of ensemble members. n_model_members <= n_ens_members."
-        )
-
-    # Check if NWP models/members should be used individually, or if all of
-    # them are blended together per nowcast ensemble member.
-    if blend_nwp_members:
-        n_model_indices = None
-
-    else:
-        # Start with determining the maximum and mimimum number of members/models
-        # in both input products
-        n_ens_members_max = max(n_ens_members, n_model_members)
-        n_ens_members_min = min(n_ens_members, n_model_members)
-        # Also make a list of the model index numbers. These indices are needed
-        # for indexing the right climatological skill file when pysteps calculates
-        # the blended forecast in parallel.
-        if n_model_members > 1:
-            n_model_indices = np.arange(n_model_members)
-        else:
-            n_model_indices = [0]
-
-        # Now, repeat the nowcast ensemble members or the nwp models/members until
-        # it has the same amount of members as n_ens_members_max. For instance, if
-        # you have 10 ensemble nowcasts members and 3 NWP members, the output will
-        # be an ensemble of 10 members. Hence, the three NWP members are blended
-        # with the first three members of the nowcast (member one with member one,
-        # two with two, etc.), subsequently, the same NWP members are blended with
-        # the next three members (NWP member one with member 4, NWP member 2 with
-        # member 5, etc.), until 10 is reached.
-        if n_ens_members_min != n_ens_members_max:
-            if n_model_members == 1:
-                precip_models = np.repeat(precip_models, n_ens_members_max, axis=0)
-                mu_models = np.repeat(mu_models, n_ens_members_max, axis=0)
-                sigma_models = np.repeat(sigma_models, n_ens_members_max, axis=0)
-                velocity_models = np.repeat(velocity_models, n_ens_members_max, axis=0)
-                # For the prob. matching
-                R_models_pm = np.repeat(R_models_pm, n_ens_members_max, axis=0)
-                # Finally, for the model indices
-                n_model_indices = np.repeat(n_model_indices, n_ens_members_max, axis=0)
-
-            elif n_model_members == n_ens_members_min:
-                repeats = [
-                    (n_ens_members_max + i) // n_ens_members_min
-                    for i in range(n_ens_members_min)
-                ]
-                if n_model_members == n_ens_members_min:
-                    precip_models = np.repeat(precip_models, repeats, axis=0)
-                    mu_models = np.repeat(mu_models, repeats, axis=0)
-                    sigma_models = np.repeat(sigma_models, repeats, axis=0)
-                    velocity_models = np.repeat(velocity_models, repeats, axis=0)
-                    # For the prob. matching
-                    R_models_pm = np.repeat(R_models_pm, repeats, axis=0)
-                    # Finally, for the model indices
-                    n_model_indices = np.repeat(n_model_indices, repeats, axis=0)
-
-    return (
-        precip_models,
-        R_models_pm,
-        velocity_models,
-        mu_models,
-        sigma_models,
-        n_model_indices,
-    )
-
-
-def _init_random_generators(
-    velocity,
-    noise_method,
-    probmatching_method,
-    vel_pert_method,
-    vp_par,
-    vp_perp,
-    seed,
-    n_ens_members,
-    kmperpixel,
-    timestep,
-):
-    """Initialize all the random generators."""
-    randgen_prec = None
-    if noise_method is not None:
-        randgen_prec = []
-        for j in range(n_ens_members):
-            rs = np.random.RandomState(seed)
-            randgen_prec.append(rs)
-            seed = rs.randint(0, high=1e9)
-
-    randgen_probmatching = None
-    if probmatching_method is not None:
-        randgen_probmatching = []
-        for j in range(n_ens_members):
-            rs = np.random.RandomState(seed)
-            randgen_probmatching.append(rs)
-            seed = rs.randint(0, high=1e9)
-
-    if vel_pert_method is not None:
-        randgen_motion = []
-        for j in range(n_ens_members):
-            rs = np.random.RandomState(seed)
-            randgen_motion.append(rs)
-            seed = rs.randint(0, high=1e9)
-        init_vel_noise, generate_vel_noise = noise.get_method(vel_pert_method)
-
-        # initialize the perturbation generators for the motion field
-        vps = []
-        for j in range(n_ens_members):
-            kwargs = {
-                "randstate": randgen_motion[j],
-                "p_par": vp_par,
-                "p_perp": vp_perp,
-            }
-            vp_ = init_vel_noise(velocity, 1.0 / kmperpixel, timestep, **kwargs)
-            vps.append(vp_)
-    else:
-        vps, generate_vel_noise = None, None
-
-    return randgen_prec, vps, generate_vel_noise, randgen_probmatching
-
-
-def _prepare_forecast_loop(
-    R_c,
-    noise_method,
-    fft_method,
-    n_cascade_levels,
-    n_ens_members,
-    mask_method,
-    mask_kwargs,
-    timestep,
-    kmperpixel,
-):
-    """Prepare for the forecast loop."""
-    # Empty arrays for the previous displacements and the forecast cascade
-    D = np.stack([None for j in range(n_ens_members)])
-    D_Yn = np.stack([None for j in range(n_ens_members)])
-    D_pb = np.stack([None for j in range(n_ens_members)])
-    R_f = [[] for j in range(n_ens_members)]
-
-    if mask_method == "incremental":
-        # get mask parameters
-        mask_rim = mask_kwargs.get("mask_rim", 10)
-        mask_f = mask_kwargs.get("mask_f", 1.0)
-        # initialize the structuring element
-        struct = generate_binary_structure(2, 1)
-        # iterate it to expand it nxn
-        n = mask_f * timestep / kmperpixel
-        struct = iterate_structure(struct, int((n - 1) / 2.0))
-    else:
-        mask_rim, struct = None, None
-
-    if noise_method is None:
-        R_m = [R_c[0][i].copy() for i in range(n_cascade_levels)]
-    else:
-        R_m = None
-
-    fft_objs = []
-    for i in range(n_ens_members):
-        fft_objs.append(utils.get_method(fft_method, shape=R_c.shape[-2:]))
-
-    return D, D_Yn, D_pb, R_f, R_m, mask_rim, struct, fft_objs
-
-
-def _compute_initial_nwp_skill(
-    R_c, precip_models, domain_mask, issuetime, outdir_path_skill, clim_kwargs
-):
-    """Calculate the initial skill of the (NWP) model forecasts at t=0."""
-    rho_nwp_models = [
-        blending.skill_scores.spatial_correlation(
-            obs=R_c[0, :, -1, :, :].copy(),
-            mod=precip_models[n_model, :, :, :].copy(),
-            domain_mask=domain_mask,
-        )
-        for n_model in range(precip_models.shape[0])
-    ]
-    rho_nwp_models = np.stack(rho_nwp_models)
-
-    # Ensure that the model skill decreases with increasing scale level.
-    for n_model in range(precip_models.shape[0]):
-        for i in range(1, precip_models.shape[1]):
-            if rho_nwp_models[n_model, i] > rho_nwp_models[n_model, i - 1]:
-                # Set it equal to the previous scale level
-                rho_nwp_models[n_model, i] = rho_nwp_models[n_model, i - 1]
-
-    # Save this in the climatological skill file
-    blending.clim.save_skill(
-        current_skill=rho_nwp_models,
-        validtime=issuetime,
-        outdir_path=outdir_path_skill,
-        **clim_kwargs,
-    )
-    return rho_nwp_models
-
-
-def _init_noise_cascade(
-    shape,
-    n_ens_members,
-    n_cascade_levels,
-    generate_noise,
-    decompositor,
-    pp,
-    randgen_prec,
-    fft_objs,
-    bp_filter,
-    domain,
-    noise_method,
-    noise_std_coeffs,
-    ar_order,
-):
-    """Initialize the noise cascade with identical noise for all AR(n) steps
-    We also need to return the mean and standard deviations of the noise
-    for the recombination of the noise before advecting it.
-    """
-    noise_cascade = np.zeros(shape)
-    mu_noise = np.zeros((n_ens_members, n_cascade_levels))
-    sigma_noise = np.zeros((n_ens_members, n_cascade_levels))
-    if noise_method:
-        for j in range(n_ens_members):
-            EPS = generate_noise(
-                pp, randstate=randgen_prec[j], fft_method=fft_objs[j], domain=domain
-            )
-            EPS = decompositor(
-                EPS,
-                bp_filter,
-                fft_method=fft_objs[j],
-                input_domain=domain,
-                output_domain=domain,
-                compute_stats=True,
-                normalize=True,
-                compact_output=True,
-            )
-            mu_noise[j] = EPS["means"]
-            sigma_noise[j] = EPS["stds"]
-            for i in range(n_cascade_levels):
-                EPS_ = EPS["cascade_levels"][i]
-                EPS_ *= noise_std_coeffs[i]
-                for n in range(ar_order):
-                    noise_cascade[j][i][n] = EPS_
-            EPS = None
-            EPS_ = None
-    return noise_cascade, mu_noise, sigma_noise
-
-
-def _fill_nans_infs_nwp_cascade(
-    precip_models_cascade,
-    precip_models,
-    precip_cascade,
-    precip,
-    mu_models,
-    sigma_models,
-):
-    """Ensure that the NWP cascade and fields do no contain any nans or infinite number"""
-    # Fill nans and infinite numbers with the minimum value present in precip
-    # (corresponding to zero rainfall in the radar observations)
-    min_cascade = np.nanmin(precip_cascade)
-    min_precip = np.nanmin(precip)
-    precip_models_cascade[~np.isfinite(precip_models_cascade)] = min_cascade
-    precip_models[~np.isfinite(precip_models)] = min_precip
-    # Also set any nans or infs in the mean and sigma of the cascade to
-    # respectively 0.0 and 1.0
-    mu_models[~np.isfinite(mu_models)] = 0.0
-    sigma_models[~np.isfinite(sigma_models)] = 0.0
-
-    return precip_models_cascade, precip_models, mu_models, sigma_models
-
-
-def _determine_max_nr_rainy_cells_nwp(precip_models, precip_thr, n_models, timesteps):
-    """Initialize noise based on the NWP field time step where the fraction of rainy cells is highest"""
-    if precip_thr is None:
-        precip_thr = np.nanmin(precip_models)
-
-    max_rain_pixels = -1
-    max_rain_pixels_j = -1
-    max_rain_pixels_t = -1
-    for j in range(n_models):
-        for t in timesteps:
-            rain_pixels = precip_models[j][t][precip_models[j][t] > precip_thr].size
-            if rain_pixels > max_rain_pixels:
-                max_rain_pixels = rain_pixels
-                max_rain_pixels_j = j
-                max_rain_pixels_t = t
-    precip_noise_input = precip_models[max_rain_pixels_j][max_rain_pixels_t]
-
-    return precip_noise_input.astype(np.float64, copy=False)
diff --git a/pysteps/decorators.py b/pysteps/decorators.py
index 44fbaebdb..69c9945bc 100644
--- a/pysteps/decorators.py
+++ b/pysteps/decorators.py
@@ -22,6 +22,8 @@
 
 import numpy as np
 
+from pysteps.xarray_helpers import convert_input_to_xarray_dataset
+
 
 def _add_extra_kwrds_to_docstrings(target_func, extra_kwargs_doc_text):
     """
@@ -66,7 +68,7 @@ def postprocess_import(fillna=np.nan, dtype="double"):
     def _postprocess_import(importer):
         @wraps(importer)
         def _import_with_postprocessing(*args, **kwargs):
-            precip, *other_args = importer(*args, **kwargs)
+            precip, quality, metadata = importer(*args, **kwargs)
 
             _dtype = kwargs.get("dtype", dtype)
 
@@ -88,7 +90,9 @@ def _import_with_postprocessing(*args, **kwargs):
                     mask = ~np.isfinite(precip)
                     precip[mask] = _fillna
 
-            return (precip.astype(_dtype),) + tuple(other_args)
+            return convert_input_to_xarray_dataset(
+                precip.astype(_dtype), quality, metadata
+            )
 
         extra_kwargs_doc = """
             Other Parameters
@@ -124,7 +128,9 @@ def new_function(*args, **kwargs):
             target motion_method_func function.
             """
 
-            input_images = args[0]
+            dataset = args[0]
+            precip_var = dataset.attrs["precip_var"]
+            input_images = dataset[precip_var].values
             if input_images.ndim != 3:
                 raise ValueError(
                     "input_images dimension mismatch.\n"
diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py
index 9cfaa17f9..ed328d066 100644
--- a/pysteps/io/importers.py
+++ b/pysteps/io/importers.py
@@ -65,6 +65,104 @@
 |   zr_b           | the Z-R exponent b in Z = a*R**b                         |
 +------------------+----------------------------------------------------------+
 
+The data and metadata is then postprocessed into an xarray dataset. This dataset will
+always contain an x and y dimension, but can be extended with a time dimension and/or
+an ensemble member dimension over the course of the process.
+
+The dataset can contain the following coordinate variables:
+
+
+.. tabularcolumns:: |p{2cm}|L|
+
++--------------------+-------------------------------------------------------------------------------------------+
+|  Coordinate        |                Description                                                                |
++====================+===========================================================================================+
+|   y                | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
++--------------------+-------------------------------------------------------------------------------------------+
+|   x                | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
++--------------------+-------------------------------------------------------------------------------------------+
+|   lat              | latitude coordinate in degrees                                                            |
++--------------------+-------------------------------------------------------------------------------------------+
+|   lon              | longitude coordinate in degrees                                                           |
++--------------------+-------------------------------------------------------------------------------------------+
+|   time             | forecast time in seconds since forecast start time                                        |
++--------------------+-------------------------------------------------------------------------------------------+
+|   ens_number       | ensemble member number (integer)                                                          |
++--------------------+-------------------------------------------------------------------------------------------+
+|   direction        | used by proesmans to return the forward and backward advection and consistency fields     |
++--------------------+-------------------------------------------------------------------------------------------+
+
+The time, x and y dimensions all MUST be regularly spaced, with the stepsize included
+in a ``stepsize`` attribute. The stepsize is given in the unit of the dimension (this
+is alwyas seconds for the time dimension).
+
+The dataset can contain the following data variables:
+
+.. tabularcolumns:: |p{2cm}|L|
+
++-------------------+-----------------------------------------------------------------------------------------------------------+
+|    Variable       |                Description                                                                                |
++===================+===========================================================================================================+
+| precip_intensity, | precipitation data, based on the unit the data has it is stored in one of these 3 possible variables      |
+| precip_accum      | precip_intensity if unit is ``mm/h``, precip_accum if unit is ``mm`` and reflectivity if unit is ``dBZ``, |
+| or reflectivity   | the attributes of this variable contain metadata relevant to this attribute (see below)                   |
++-------------------+-----------------------------------------------------------------------------------------------------------+
+| velocity_x        | x-component of the advection field in cartesian_unit per timestep                                         |
++-------------------+-----------------------------------------------------------------------------------------------------------+
+| velocity_y        | y-component of the advection field in cartesian_unit per timestep                                         |
++-------------------+-----------------------------------------------------------------------------------------------------------+
+| quality           | value between 0 and 1 denoting the quality of the precipitation data, currently not used for anything     |
++-------------------+-----------------------------------------------------------------------------------------------------------+
+| velocity_quality  | value between 0 and 1 denoting the quality of the velocity data, currently only returned by proesmans     |
++-------------------+-----------------------------------------------------------------------------------------------------------+
+
+Some of the metadata in the metadata dictionary is not explicitely stored in the dataset,
+but is still implicitly present. For example ``x1`` can easily be found by taking the first
+value from the x coordinate variable. Metadata that is not implicitly present is explicitly
+stored either in the datasets global attributes or as attributes of the precipitation variable.
+Data that relates to the entire dataset is stored in the global attributes. The following data
+is stored in the global attributes:
+
+.. tabularcolumns:: |p{2cm}|L|
+
++------------------+----------------------------------------------------------+
+|       Key        |                Value                                     |
++==================+==========================================================+
+|   projection     | PROJ.4-compatible projection definition                  |
++------------------+----------------------------------------------------------+
+|   institution    | name of the institution who provides the data            |
++------------------+----------------------------------------------------------+
+|   precip_var     | the name of the precipitation variable in this dataset   |
++------------------+----------------------------------------------------------+
+
+The following data is stored as attributes of the precipitation variable:
+
+.. tabularcolumns:: |p{2cm}|L|
+
++------------------+----------------------------------------------------------+
+|       Key        |                Value                                     |
++==================+==========================================================+
+|   units          | the physical unit of the data: 'mm/h', 'mm' or 'dBZ'     |
++------------------+----------------------------------------------------------+
+|   transform      | the transformation of the data: None, 'dB', 'Box-Cox' or |
+|                  | others                                                   |
++------------------+----------------------------------------------------------+
+|   accutime       | the accumulation time in minutes of the data, float      |
++------------------+----------------------------------------------------------+
+|   threshold      | the rain/no rain threshold with the same unit,           |
+|                  | transformation and accutime of the data.                 |
++------------------+----------------------------------------------------------+
+|   zerovalue      | the value assigned to the no rain pixels with the same   |
+|                  | unit, transformation and accutime of the data.           |
++------------------+----------------------------------------------------------+
+|   zr_a           | the Z-R constant a in Z = a*R**b                         |
++------------------+----------------------------------------------------------+
+|   zr_b           | the Z-R exponent b in Z = a*R**b                         |
++------------------+----------------------------------------------------------+
+
+Furthermore the dataset can contain some additional metadata to make the dataset
+CF-compliant.
+
 Available Importers
 -------------------
 
@@ -89,12 +187,10 @@
 from functools import partial
 
 import numpy as np
-
 from matplotlib.pyplot import imread
 
 from pysteps.decorators import postprocess_import
-from pysteps.exceptions import DataModelError
-from pysteps.exceptions import MissingOptionalDependency
+from pysteps.exceptions import DataModelError, MissingOptionalDependency
 from pysteps.utils import aggregate_fields
 
 try:
diff --git a/pysteps/io/readers.py b/pysteps/io/readers.py
index fcc6bda2e..30c4d4fc0 100644
--- a/pysteps/io/readers.py
+++ b/pysteps/io/readers.py
@@ -12,13 +12,14 @@
 """
 
 import numpy as np
+import xarray as xr
 
 
-def read_timeseries(inputfns, importer, **kwargs):
+def read_timeseries(inputfns, importer, timestep=None, **kwargs) -> xr.Dataset | None:
     """
     Read a time series of input files using the methods implemented in the
-    :py:mod:`pysteps.io.importers` module and stack them into a 3d array of
-    shape (num_timesteps, height, width).
+    :py:mod:`pysteps.io.importers` module and stack them into a 3d xarray
+    dataset of shape (num_timesteps, height, width).
 
     Parameters
     ----------
@@ -27,55 +28,69 @@ def read_timeseries(inputfns, importer, **kwargs):
         :py:mod:`pysteps.io.archive` module.
     importer: function
         A function implemented in the :py:mod:`pysteps.io.importers` module.
+    timestep: int, optional
+        The timestep in seconds, this value is optional if more than 1 inputfns
+        are given.
     kwargs: dict
         Optional keyword arguments for the importer.
 
     Returns
     -------
-    out: tuple
-        A three-element tuple containing the read data and quality rasters and
+    out: Dataset
+        A dataset containing the read data and quality rasters and
         associated metadata. If an input file name is None, the corresponding
         precipitation and quality fields are filled with nan values. If all
         input file names are None or if the length of the file name list is
-        zero, a three-element tuple containing None values is returned.
+        zero, None is returned.
 
     """
 
     # check for missing data
-    precip_ref = None
+    dataset_ref = None
     if all(ifn is None for ifn in inputfns):
-        return None, None, None
+        return None
     else:
         if len(inputfns[0]) == 0:
-            return None, None, None
+            return None
         for ifn in inputfns[0]:
             if ifn is not None:
-                precip_ref, quality_ref, metadata = importer(ifn, **kwargs)
+                dataset_ref = importer(ifn, **kwargs)
                 break
 
-    if precip_ref is None:
-        return None, None, None
+    if dataset_ref is None:
+        return None
 
-    precip = []
-    quality = []
-    timestamps = []
+    startdate = min(inputfns[1])
+    sorted_dates = sorted(inputfns[1])
+    timestep_dates = int((sorted_dates[1] - sorted_dates[0]).total_seconds())
+
+    if timestep is None:
+        timestep = timestep_dates
+    if timestep != timestep_dates:
+        raise ValueError("given timestep does not match inputfns")
+    for i in range(len(sorted_dates) - 1):
+        if int((sorted_dates[i + 1] - sorted_dates[i]).total_seconds()) != timestep:
+            raise ValueError("supplied dates are not evenly spaced")
+
+    datasets = []
     for i, ifn in enumerate(inputfns[0]):
         if ifn is not None:
-            precip_, quality_, _ = importer(ifn, **kwargs)
-            precip.append(precip_)
-            quality.append(quality_)
-            timestamps.append(inputfns[1][i])
+            dataset_ = importer(ifn, **kwargs)
         else:
-            precip.append(precip_ref * np.nan)
-            if quality_ref is not None:
-                quality.append(quality_ref * np.nan)
-            else:
-                quality.append(None)
-            timestamps.append(inputfns[1][i])
-
-    # Replace this with stack?
-    precip = np.concatenate([precip_[None, :, :] for precip_ in precip])
-    # TODO: Q should be organized as R, but this is not trivial as Q_ can be also None or a scalar
-    metadata["timestamps"] = np.array(timestamps)
+            dataset_ = dataset_ref * np.nan
+        dataset_ = dataset_.expand_dims(dim="time", axis=0)
+        dataset_ = dataset_.assign_coords(
+            time=(
+                "time",
+                [inputfns[1][i]],
+                {
+                    "long_name": "forecast time",
+                    "units": f"seconds since {startdate:%Y-%m-%d %H:%M:%S}",
+                    "stepsize": timestep,
+                },
+            )
+        )
+        datasets.append(dataset_)
 
-    return precip, quality, metadata
+    dataset = xr.concat(datasets, dim="time")
+    return dataset
diff --git a/pysteps/motion/constant.py b/pysteps/motion/constant.py
index a5c153616..a26831ac0 100644
--- a/pysteps/motion/constant.py
+++ b/pysteps/motion/constant.py
@@ -14,27 +14,32 @@
 
 import numpy as np
 import scipy.optimize as op
+import xarray as xr
 from scipy.ndimage import map_coordinates
 
 
-def constant(R, **kwargs):
+def constant(dataset: xr.Dataset, **kwargs):
     """
     Compute a constant advection field by finding a translation vector that
     maximizes the correlation between two successive images.
 
     Parameters
     ----------
-    R: array_like
-      Array of shape (T,m,n) containing a sequence of T two-dimensional input
-      images of shape (m,n). If T > 2, two last elements along axis 0 are used.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
+        The dataset has to have a time dimension. If the size of this dimension
+        is larger than 2, the last 2 entries of this dimension are used.
 
     Returns
     -------
-    out: array_like
-        The constant advection field having shape (2, m, n), where out[0, :, :]
-        contains the x-components of the motion vectors and out[1, :, :]
-        contains the y-components.
+    out: xarray.Dataset
+        The input dataset with the constant advection field added in the ``velocity_x``
+        and ``velocity_y`` data variables.
     """
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    R = dataset[precip_var].values
     m, n = R.shape[1:]
     X, Y = np.meshgrid(np.arange(n), np.arange(m))
 
@@ -51,4 +56,7 @@ def f(v):
     options = {"initial_simplex": (np.array([(0, 1), (1, 0), (1, 1)]))}
     result = op.minimize(f, (1, 1), method="Nelder-Mead", options=options)
 
-    return np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))])
+    output = np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))])
+    dataset["velocity_x"] = (["y", "x"], output[0])
+    dataset["velocity_y"] = (["y", "x"], output[1])
+    return dataset
diff --git a/pysteps/motion/darts.py b/pysteps/motion/darts.py
index 4e5050d48..4aac80cd3 100644
--- a/pysteps/motion/darts.py
+++ b/pysteps/motion/darts.py
@@ -11,8 +11,10 @@
     DARTS
 """
 
-import numpy as np
 import time
+
+import numpy as np
+import xarray as xr
 from numpy.linalg import lstsq, svd
 
 from pysteps import utils
@@ -20,16 +22,17 @@
 
 
 @check_input_frames(just_ndim=True)
-def DARTS(input_images, **kwargs):
+def DARTS(dataset: xr.Dataset, **kwargs):
     """
     Compute the advection field from a sequence of input images by using the
     DARTS method. :cite:`RCW2011`
 
     Parameters
     ----------
-    input_images: array-like
-      Array of shape (T,m,n) containing a sequence of T two-dimensional input
-      images of shape (m,n).
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
+        The dataset has to have a time dimension.
 
     Other Parameters
     ----------------
@@ -67,13 +70,15 @@ def DARTS(input_images, **kwargs):
 
     Returns
     -------
-    out: ndarray
-        Three-dimensional array (2,m,n) containing the dense x- and y-components
-        of the motion field in units of pixels / timestep as given by the input
-        array R.
+    out: xarray.Dataset
+        The input dataset with the advection field added in the ``velocity_x``
+        and ``velocity_y`` data variables.
 
     """
 
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    input_images = dataset[precip_var].values
     N_x = kwargs.get("N_x", 50)
     N_y = kwargs.get("N_y", 50)
     N_t = kwargs.get("N_t", 4)
@@ -214,10 +219,14 @@ def DARTS(input_images, **kwargs):
             fft.ifft2(_fill(V, input_images.shape[0], input_images.shape[1], k_x, k_y))
         )
 
+    output = np.stack([U, V])
+    dataset["velocity_x"] = (["y", "x"], output[0])
+    dataset["velocity_y"] = (["y", "x"], output[1])
+
     if verbose:
         print("--- %s seconds ---" % (time.time() - t0))
 
-    return np.stack([U, V])
+    return dataset
 
 
 def _leastsq(A, B, y):
diff --git a/pysteps/motion/lucaskanade.py b/pysteps/motion/lucaskanade.py
index 133f860b7..b7a51a26b 100644
--- a/pysteps/motion/lucaskanade.py
+++ b/pysteps/motion/lucaskanade.py
@@ -22,22 +22,22 @@
     dense_lucaskanade
 """
 
+import time
+
 import numpy as np
+import xarray as xr
 from numpy.ma.core import MaskedArray
 
+from pysteps import feature, utils
 from pysteps.decorators import check_input_frames
-
-from pysteps import utils, feature
 from pysteps.tracking.lucaskanade import track_features
 from pysteps.utils.cleansing import decluster, detect_outliers
 from pysteps.utils.images import morph_opening
 
-import time
-
 
 @check_input_frames(2)
 def dense_lucaskanade(
-    input_images,
+    dataset: xr.Dataset,
     lk_kwargs=None,
     fd_method="shitomasi",
     fd_kwargs=None,
@@ -73,18 +73,14 @@ def dense_lucaskanade(
 
     Parameters
     ----------
-    input_images: ndarray_ or MaskedArray_
-        Array of shape (T, m, n) containing a sequence of *T* two-dimensional
-        input images of shape (m, n). The indexing order in **input_images** is
-        assumed to be (time, latitude, longitude).
-
-        *T* = 2 is the minimum required number of images.
-        With *T* > 2, all the resulting sparse vectors are pooled together for
-        the final interpolation on a regular grid.
-
-        In case of ndarray_, invalid values (Nans or infs) are masked,
-        otherwise the mask of the MaskedArray_ is used. Such mask defines a
-        region where features are not detected for the tracking algorithm.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
+        The dataset has to have a time dimension. The size of the time dimension needs to
+        be at least 2. If it is larger than 2, all the resulting sparse vectors are pooled
+        together for the final interpolation on a regular grid. Invalid values (Nans or infs)
+        are masked. This mask defines a region where features are not detected for the tracking
+        algorithm.
 
     lk_kwargs: dict, optional
         Optional dictionary containing keyword arguments for the `Lucas-Kanade`_
@@ -151,14 +147,10 @@ def dense_lucaskanade(
 
     Returns
     -------
-    out: ndarray_ or tuple
-        If **dense=True** (the default), return the advection field having shape
-        (2, m, n), where out[0, :, :] contains the x-components of the motion
-        vectors and out[1, :, :] contains the y-components.
-        The velocities are in units of pixels / timestep, where timestep is the
-        time difference between the two input images.
-        Return a zero motion field of shape (2, m, n) when no motion is
-        detected.
+    out: xarray.Dataset or tuple 
+        If **dense=True** (the default), return the input dataset with the advection
+        field added in the ``velocity_x`` and ``velocity_y`` data variables.
+        Return a zero motion field when no motion is detected.
 
         If **dense=False**, it returns a tuple containing the 2-dimensional
         arrays **xy** and **uv**, where x, y define the vector locations,
@@ -179,7 +171,9 @@ def dense_lucaskanade(
     Understanding Workshop, pp. 121–130, 1981.
     """
 
-    input_images = input_images.copy()
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    input_images = dataset[precip_var].values
 
     if verbose:
         print("Computing the motion field with the Lucas-Kanade method.")
@@ -244,7 +238,10 @@ def dense_lucaskanade(
     # return zero motion field is no sparse vectors are found
     if xy.shape[0] == 0:
         if dense:
-            return np.zeros((2, domain_size[0], domain_size[1]))
+            uvgrid = np.zeros((2, domain_size[0], domain_size[1]))
+            dataset["velocity_x"] = (["y", "x"], uvgrid[0])
+            dataset["velocity_y"] = (["y", "x"], uvgrid[1])
+            return dataset
         else:
             return xy, uv
 
@@ -266,14 +263,20 @@ def dense_lucaskanade(
 
     # return zero motion field if no sparse vectors are left for interpolation
     if xy.shape[0] == 0:
-        return np.zeros((2, domain_size[0], domain_size[1]))
+        uvgrid = np.zeros((2, domain_size[0], domain_size[1]))
+        dataset["velocity_x"] = (["y", "x"], uvgrid[0])
+        dataset["velocity_y"] = (["y", "x"], uvgrid[1])
+        return dataset
 
     # interpolation
     xgrid = np.arange(domain_size[1])
     ygrid = np.arange(domain_size[0])
     uvgrid = interpolation_method(xy, uv, xgrid, ygrid, **interp_kwargs)
 
+    dataset["velocity_x"] = (["y", "x"], uvgrid[0])
+    dataset["velocity_y"] = (["y", "x"], uvgrid[1])
+
     if verbose:
         print("--- total time: %.2f seconds ---" % (time.time() - t0))
 
-    return uvgrid
+    return dataset
diff --git a/pysteps/motion/proesmans.py b/pysteps/motion/proesmans.py
index 8760092ba..4b122a620 100644
--- a/pysteps/motion/proesmans.py
+++ b/pysteps/motion/proesmans.py
@@ -12,6 +12,7 @@
 """
 
 import numpy as np
+import xarray as xr
 from scipy.ndimage import gaussian_filter
 
 from pysteps.decorators import check_input_frames
@@ -20,7 +21,7 @@
 
 @check_input_frames(2, 2)
 def proesmans(
-    input_images,
+    dataset: xr.Dataset,
     lam=50.0,
     num_iter=100,
     num_levels=6,
@@ -34,8 +35,11 @@ def proesmans(
 
     Parameters
     ----------
-    input_images: array_like
-        Array of shape (2, m, n) containing the first and second input image.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
+        The dataset has to have a time dimension. The size of this dimension
+        has to be 2.
     lam: float
         Multiplier of the smoothness term. Smaller values give a smoother motion
         field.
@@ -49,22 +53,20 @@ def proesmans(
     verbose: bool, optional
         Verbosity enabled if True (default).
     full_output: bool, optional
-        If True, the output is a two-element tuple containing the
-        forward-backward advection and consistency fields. The first element
-        is shape (2, 2, m, n), where the index along the first dimension refers
-        to the forward and backward advection fields. The second element is an
-        array of shape (2, m, n), where the index along the first dimension
-        refers to the forward and backward consistency fields.
-        Default: False.
+        If True, both the forward and backwards advection fields are returned
+        and the consistency fields are returned as well in the ``velocity_quality``
+        data variable.
 
     Returns
     -------
     out: ndarray
-        If full_output=False, the advection field having shape (2, m, n), where
-        out[0, :, :] contains the x-components of the motion vectors and
-        out[1, :, :] contains the y-components. The velocities are in units of
-        pixels / timestep, where timestep is the time difference between the
-        two input images.
+        The input dataset with the advection field added in the ``velocity_x``
+        and ``velocity_y`` data variables.
+
+        If full_output=True, a ``velocity_direction`` dimension
+        is added to the dataset, so that the velocity data can be returned containing
+        the forward and backwards advection fields. Also the ``velocity_quality`` data
+        coordinate is present containing the forward and backward consistency fields.
 
     References
     ----------
@@ -73,6 +75,9 @@ def proesmans(
     """
     del verbose  # Not used
 
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    input_images = dataset[precip_var].values
     im1 = input_images[-2, :, :].copy()
     im2 = input_images[-1, :, :].copy()
 
@@ -89,6 +94,11 @@ def proesmans(
     advfield, quality = _compute_advection_field(im, lam, num_iter, num_levels)
 
     if not full_output:
-        return advfield[0]
+        dataset["velocity_x"] = (["y", "x"], advfield[0, 0])
+        dataset["velocity_y"] = (["y", "x"], advfield[0, 1])
     else:
-        return advfield, quality
+        dataset["velocity_x"] = (["direction", "y", "x"], advfield[:, 0])
+        dataset["velocity_y"] = (["direction", "y", "x"], advfield[:, 1])
+        dataset["velocity_quality"] = (["direction", "y", "x"], quality)
+
+    return dataset
diff --git a/pysteps/motion/vet.py b/pysteps/motion/vet.py
index 391ebe189..f30703bee 100644
--- a/pysteps/motion/vet.py
+++ b/pysteps/motion/vet.py
@@ -35,12 +35,13 @@
 """
 
 import numpy
+import xarray as xr
 from numpy.ma.core import MaskedArray
 from scipy.ndimage import zoom
 from scipy.optimize import minimize
 
 from pysteps.decorators import check_input_frames
-from pysteps.motion._vet import _warp, _cost_function
+from pysteps.motion._vet import _cost_function, _warp
 
 
 def round_int(scalar):
@@ -301,7 +302,7 @@ def vet_cost_function(
 
 @check_input_frames(2, 3)
 def vet(
-    input_images,
+    dataset: xr.Dataset,
     sectors=((32, 16, 4, 2), (32, 16, 4, 2)),
     smooth_gain=1e6,
     first_guess=None,
@@ -366,15 +367,13 @@ def vet(
 
     Parameters
     ----------
-    input_images: ndarray_ or MaskedArray
-        Input images, sequence of 2D arrays, or 3D arrays.
-        The first dimension represents the images time dimension.
-
-        The template_image (first element in first dimensions) denotes the
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
+        The dataset has to have a time dimension. The size of this dimension
+        has to be 2. The first element in the time dimension denotes the
         reference image used to obtain the displacement (2D array).
         The second is the target image.
-
-        The expected dimensions are (2,ni,nj).
     sectors: list or array, optional
         Number of sectors on each dimension used in the scaling procedure.
         If dimension is 1, the same sectors will be used both image dimensions
@@ -411,13 +410,11 @@ def vet(
 
     Returns
     -------
-    displacement_field: ndarray_
-        Displacement Field (2D array representing the transformation) that
-        warps the template image into the input image.
-        The dimensions are (2,ni,nj), where the first
-        dimension indicates the displacement along x (0) or y (1) in units of
-        pixels / timestep as given by the input_images array.
-    intermediate_steps: list of ndarray_
+    out: xarray.Dataset
+        The input dataset with the displacement field that
+        warps the template image into the input image added in the ``velocity_x``
+        and ``velocity_y`` data variables.
+    intermediate_steps: list of ndarray_, optional
         List with the first guesses obtained during the scaling procedure.
 
     References
@@ -437,6 +434,9 @@ def vet(
     Nocedal, J, and S J Wright. 2006. Numerical Optimization. Springer New York.
     """
 
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    input_images = dataset[precip_var].values
     if verbose:
 
         def debug_print(*args, **kwargs):
@@ -642,7 +642,10 @@ def debug_print(*args, **kwargs):
     if padding > 0:
         first_guess = first_guess[:, padding:-padding, padding:-padding]
 
+    dataset["velocity_x"] = (["y", "x"], first_guess[0])
+    dataset["velocity_y"] = (["y", "x"], first_guess[1])
+
     if intermediate_steps:
-        return first_guess, scaling_guesses
+        return dataset, scaling_guesses
 
-    return first_guess
+    return dataset
diff --git a/pysteps/nowcasts/anvil.py b/pysteps/nowcasts/anvil.py
index 9da0fb47e..88ed6b0af 100644
--- a/pysteps/nowcasts/anvil.py
+++ b/pysteps/nowcasts/anvil.py
@@ -19,12 +19,15 @@
 """
 
 import time
+
 import numpy as np
+import xarray as xr
 from scipy.ndimage import gaussian_filter
-from pysteps import cascade, extrapolation
+
+from pysteps import cascade, extrapolation, utils
 from pysteps.nowcasts.utils import nowcast_main_loop
 from pysteps.timeseries import autoregression
-from pysteps import utils
+from pysteps.xarray_helpers import convert_output_to_xarray_dataset
 
 try:
     import dask
@@ -35,10 +38,8 @@
 
 
 def forecast(
-    vil,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
-    rainrate=None,
     n_cascade_levels=6,
     extrap_method="semilagrangian",
     ar_order=2,
@@ -69,22 +70,21 @@ def forecast(
 
     Parameters
     ----------
-    vil: array_like
-        Array of shape (ar_order+2,m,n) containing the input fields ordered by
-        timestamp from oldest to newest. The inputs are expected to contain VIL
-        or rain rate. The time steps between the inputs are assumed to be regular.
-    velocity: array_like
-        Array of shape (2,m,n) containing the x- and y-components of the
-        advection field. The velocities are assumed to represent one time step
-        between the inputs. All values are required to be finite.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as either VIL values in the
+        ``precip_accum`` data variable or rainrate in the ``precip_intensity``
+        data variable. The time dimension of the dataset has to be size
+        ``ar_order + 2`` and the precipitation variable has to have this dimension.
+        When VIL values are supplied, optionally ``precip_accum`` can be supplied
+        as well without a time dimension, containing the most recently observed rain
+        rate field. If not supplied, no R(VIL) conversion is done and the outputs
+        are in the same units as the inputs.
     timesteps: int or list of floats
         Number of time steps to forecast or a list of time steps for which the
         forecasts are computed (relative to the input time step). The elements
         of the list are required to be in ascending order.
-    rainrate: array_like
-        Array of shape (m,n) containing the most recently observed rain rate
-        field. If set to None, no R(VIL) conversion is done and the outputs
-        are in the same units as the inputs.
     n_cascade_levels: int, optional
         The number of cascade levels to use. Defaults to 6, see issue #385
         on GitHub.
@@ -127,18 +127,28 @@ def forecast(
 
     Returns
     -------
-    out: ndarray
-        A three-dimensional array of shape (num_timesteps,m,n) containing a time
-        series of forecast precipitation fields. The time series starts from
-        t0+timestep, where timestep is taken from the input VIL/rain rate
-        fields. If measure_time is True, the return value is a three-element
-        tuple containing the nowcast array, the initialization time of the
-        nowcast generator and the time used in the main loop (seconds).
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
+        precipitation fields. Otherwise, a None value
+        is returned. The time series starts from t0+timestep, where timestep is
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
+        initialization time of the nowcast generator and the time used in the
+        main loop (seconds).
 
     References
     ----------
     :cite:`PCLH2020`
     """
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    vil = dataset[precip_var].values
+    velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]])
+    rainrate = None
+    if precip_var == "precip_intensity" and "precip_accum" in dataset:
+        rainrate = dataset["precip_accum"].values
+
     _check_inputs(vil, rainrate, velocity, timesteps, ar_order)
 
     if extrap_kwargs is None:
@@ -291,8 +301,6 @@ def worker(vil, i):
 
     print("Starting nowcast computation.")
 
-    rainrate_f = []
-
     extrap_kwargs["return_displacement"] = True
 
     state = {"vil_dec": vil_dec}
@@ -322,10 +330,11 @@ def worker(vil, i):
     if measure_time:
         rainrate_f, mainloop_time = rainrate_f
 
+    output_dataset = convert_output_to_xarray_dataset(dataset, timesteps, rainrate_f)
     if measure_time:
-        return np.stack(rainrate_f), init_time, mainloop_time
+        return output_dataset, init_time, mainloop_time
     else:
-        return np.stack(rainrate_f)
+        return output_dataset
 
 
 def _check_inputs(vil, rainrate, velocity, timesteps, ar_order):
diff --git a/pysteps/nowcasts/extrapolation.py b/pysteps/nowcasts/extrapolation.py
index 143a39d7c..a70b6985c 100644
--- a/pysteps/nowcasts/extrapolation.py
+++ b/pysteps/nowcasts/extrapolation.py
@@ -11,14 +11,16 @@
 """
 
 import time
+
 import numpy as np
+import xarray as xr
 
 from pysteps import extrapolation
+from pysteps.xarray_helpers import convert_output_to_xarray_dataset
 
 
 def forecast(
-    precip,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
     extrap_method="semilagrangian",
     extrap_kwargs=None,
@@ -32,13 +34,11 @@ def forecast(
 
     Parameters
     ----------
-    precip: array-like
-        Two-dimensional array of shape (m,n) containing the input precipitation
-        field.
-    velocity: array-like
-        Array of shape (2,m,n) containing the x- and y-components of the
-        advection field. The velocities are assumed to represent one time step
-        between the inputs.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as any pecipitation data variable.
+        It should contain a time dimension of size 1.
     timesteps: int or list of floats
         Number of time steps to forecast or a list of time steps for which the
         forecasts are computed (relative to the input time step). The elements
@@ -54,18 +54,25 @@ def forecast(
 
     Returns
     -------
-    out: ndarray_
-      Three-dimensional array of shape (num_timesteps, m, n) containing a time
-      series of nowcast precipitation fields. The time series starts from
-      t0 + timestep, where timestep is taken from the advection field velocity.
-      If *measure_time* is True, the return value is a two-element tuple
-      containing this array and the computation time (seconds).
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
+        precipitation fields. Otherwise, a None value
+        is returned. The time series starts from t0+timestep, where timestep is
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
+        initialization time of the nowcast generator and the time used in the
+        main loop (seconds).
 
     See also
     --------
     pysteps.extrapolation.interface
     """
 
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    precip = dataset[precip_var].values[0]
+    velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]])
     _check_inputs(precip, velocity, timesteps)
 
     if extrap_kwargs is None:
@@ -95,10 +102,13 @@ def forecast(
         computation_time = time.time() - start_time
         print(f"{computation_time:.2f} seconds.")
 
+    output_dataset = convert_output_to_xarray_dataset(
+        dataset, timesteps, precip_forecast
+    )
     if measure_time:
-        return precip_forecast, computation_time
+        return output_dataset, computation_time
     else:
-        return precip_forecast
+        return output_dataset
 
 
 def _check_inputs(precip, velocity, timesteps):
diff --git a/pysteps/nowcasts/lagrangian_probability.py b/pysteps/nowcasts/lagrangian_probability.py
index 727e94806..7bae440cc 100644
--- a/pysteps/nowcasts/lagrangian_probability.py
+++ b/pysteps/nowcasts/lagrangian_probability.py
@@ -12,20 +12,20 @@
 """
 
 import numpy as np
+import xarray as xr
 from scipy.signal import convolve
 
 from pysteps.nowcasts import extrapolation
 
 
 def forecast(
-    precip,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
     threshold,
     extrap_method="semilagrangian",
     extrap_kwargs=None,
     slope=5,
-):
+) -> xr.Dataset:
     """
     Generate a probability nowcast by a local lagrangian approach. The ouput is
     the probability of exceeding a given intensity threshold, i.e.
@@ -33,13 +33,11 @@ def forecast(
 
     Parameters
     ----------
-    precip: array_like
-       Two-dimensional array of shape (m,n) containing the input precipitation
-       field.
-    velocity: array_like
-       Array of shape (2,m,n) containing the x- and y-components of the
-       advection field. The velocities are assumed to represent one time step
-       between the inputs.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as any pecipitation data variable.
+        It should contain a time dimension of size 1.
     timesteps: int or list of floats
        Number of time steps to forecast or a sorted list of time steps for which
        the forecasts are computed (relative to the input time step).
@@ -54,10 +52,15 @@ def forecast(
 
     Returns
     -------
-    out: ndarray
-        Three-dimensional array of shape (num_timesteps, m, n) containing a time
-        series of nowcast exceedence probabilities. The time series starts from
-        t0 + timestep, where timestep is taken from the advection field velocity.
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
+        precipitation fields. Otherwise, a None value
+        is returned. The time series starts from t0+timestep, where timestep is
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
+        initialization time of the nowcast generator and the time used in the
+        main loop (seconds).
 
     References
     ----------
@@ -68,16 +71,14 @@ def forecast(
     """
     # Compute deterministic extrapolation forecast
     if isinstance(timesteps, int) and timesteps > 0:
-        timesteps = np.arange(1, timesteps + 1)
+        timesteps = list(range(1, timesteps + 1))
     elif not isinstance(timesteps, list):
         raise ValueError(f"invalid value for argument 'timesteps': {timesteps}")
-    precip_forecast = extrapolation.forecast(
-        precip,
-        velocity,
-        timesteps,
-        extrap_method,
-        extrap_kwargs,
+    dataset_forecast = extrapolation.forecast(
+        dataset, timesteps, extrap_method, extrap_kwargs
     )
+    precip_var = dataset_forecast.attrs["precip_var"]
+    precip_forecast = dataset_forecast[precip_var].values
 
     # Ignore missing values
     nanmask = np.isnan(precip_forecast)
@@ -104,7 +105,8 @@ def forecast(
         precip_forecast[i, ...] /= kernel_sum
     precip_forecast = np.clip(precip_forecast, 0, 1)
     precip_forecast[nanmask] = np.nan
-    return precip_forecast
+    dataset_forecast[precip_var].data[:] = precip_forecast
+    return dataset_forecast
 
 
 def _get_kernel(size):
diff --git a/pysteps/nowcasts/linda.py b/pysteps/nowcasts/linda.py
index 7d737eaa9..2fc5dfd70 100644
--- a/pysteps/nowcasts/linda.py
+++ b/pysteps/nowcasts/linda.py
@@ -40,6 +40,8 @@
 import time
 import warnings
 
+from pysteps.xarray_helpers import convert_output_to_xarray_dataset
+
 try:
     import dask
 
@@ -47,28 +49,19 @@
 except ImportError:
     DASK_IMPORTED = False
 import numpy as np
+import xarray as xr
+from scipy import optimize as opt
+from scipy import stats
 from scipy.integrate import nquad
 from scipy.interpolate import interp1d
-from scipy import optimize as opt
 from scipy.signal import convolve
-from scipy import stats
 
 from pysteps import extrapolation, feature, noise
-from pysteps.decorators import deprecate_args
 from pysteps.nowcasts.utils import nowcast_main_loop
 
 
-@deprecate_args(
-    {
-        "precip_fields": "precip",
-        "advection_field": "velocity",
-        "num_ens_members": "n_ens_members",
-    },
-    "1.8.0",
-)
 def forecast(
-    precip,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
     feature_method="blob",
     max_num_features=25,
@@ -100,15 +93,13 @@ def forecast(
 
     Parameters
     ----------
-    precip: array_like
-        Array of shape (ari_order + 2, m, n) containing the input rain rate
-        or reflectivity fields (in linear scale) ordered by timestamp from
-        oldest to newest. The time steps between the inputs are assumed to be
-        regular.
-    velocity: array_like
-        Array of shape (2, m, n) containing the x- and y-components of the
-        advection field. The velocities are assumed to represent one time step
-        between the inputs.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as either reflectivity values in the
+        ``reflectivity`` data variable (in linear scale) or rainrate in the ``precip_intensity``
+        data variable. The time dimension of the dataset has to be size
+        ``ari_order + 2`` and the precipitation variable has to have this dimension.
     timesteps: int
         Number of time steps to forecast.
     feature_method: {'blob', 'domain' 'shitomasi'}
@@ -202,16 +193,15 @@ def forecast(
 
     Returns
     -------
-    out: numpy.ndarray
-        A four-dimensional array of shape (n_ens_members, timesteps, m, n)
-        containing a time series of forecast precipitation fields for each
-        ensemble member. If add_perturbations is False, the first dimension is
-        dropped. The time series starts from t0 + timestep, where timestep is
-        taken from the input fields. If measure_time is True, the return value
-        is a three-element tuple containing the nowcast array, the initialization
-        time of the nowcast generator and the time used in the main loop
-        (seconds). If return_output is set to False, a single None value is
-        returned instead.
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
+        precipitation fields for each ensemble member. Otherwise, a None value
+        is returned. The time series starts from t0+timestep, where timestep is
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
+        initialization time of the nowcast generator and the time used in the
+        main loop (seconds).
 
     Notes
     -----
@@ -224,6 +214,10 @@ def forecast(
     variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many
     simultaneous threads.
     """
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    precip = dataset[precip_var].values
+    velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]])
     _check_inputs(precip, velocity, timesteps, ari_order)
 
     if feature_kwargs is None:
@@ -363,14 +357,21 @@ def forecast(
         callback,
     )
 
-    if return_output:
-        if measure_time:
-            return precip_forecast[0], init_time, precip_forecast[1]
-        else:
-            return precip_forecast
-    else:
+    if not return_output:
         return None
 
+    if measure_time:
+        precip_forecast, mainloop_time = precip_forecast
+
+    output_dataset = convert_output_to_xarray_dataset(
+        dataset, timesteps, precip_forecast
+    )
+
+    if measure_time:
+        return output_dataset, init_time, mainloop_time
+    else:
+        return output_dataset
+
 
 def _check_inputs(precip, velocity, timesteps, ari_order):
     if ari_order not in [1, 2]:
diff --git a/pysteps/nowcasts/sprog.py b/pysteps/nowcasts/sprog.py
index 86c840dcb..2ebcfde41 100644
--- a/pysteps/nowcasts/sprog.py
+++ b/pysteps/nowcasts/sprog.py
@@ -10,17 +10,17 @@
     forecast
 """
 
-import numpy as np
 import time
 
-from pysteps import cascade
-from pysteps import extrapolation
-from pysteps import utils
-from pysteps.decorators import deprecate_args
+import numpy as np
+import xarray as xr
+
+from pysteps import cascade, extrapolation, utils
 from pysteps.nowcasts import utils as nowcast_utils
+from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop
 from pysteps.postprocessing import probmatching
 from pysteps.timeseries import autoregression, correlation
-from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop
+from pysteps.xarray_helpers import convert_output_to_xarray_dataset
 
 try:
     import dask
@@ -30,10 +30,8 @@
     DASK_IMPORTED = False
 
 
-@deprecate_args({"R": "precip", "V": "velocity", "R_thr": "precip_thr"}, "1.8.0")
 def forecast(
-    precip,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
     precip_thr=None,
     n_cascade_levels=6,
@@ -55,15 +53,13 @@ def forecast(
 
     Parameters
     ----------
-    precip: array-like
-        Array of shape (ar_order+1,m,n) containing the input precipitation fields
-        ordered by timestamp from oldest to newest. The time steps between
-        the inputs are assumed to be regular.
-    velocity: array-like
-        Array of shape (2,m,n) containing the x- and y-components of the
-        advection field.
-        The velocities are assumed to represent one time step between the
-        inputs. All values are required to be finite.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as any precipitation data variable.
+        The time dimension of the dataset has to be size
+        ``ar_order + 1`` and the precipitation variable has to have this dimension. All
+        velocity values are required to be finite.
     timesteps: int or list of floats
         Number of time steps to forecast or a list of time steps for which the
         forecasts are computed (relative to the input time step). The elements
@@ -120,13 +116,15 @@ def forecast(
 
     Returns
     -------
-    out: ndarray
-        A three-dimensional array of shape (num_timesteps,m,n) containing a time
-        series of forecast precipitation fields. The time series starts from
-        t0+timestep, where timestep is taken from the input precipitation fields
-        precip. If measure_time is True, the return value is a three-element
-        tuple containing the nowcast array, the initialization time of the
-        nowcast generator and the time used in the main loop (seconds).
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
+        precipitation fields. Otherwise, a None value
+        is returned. The time series starts from t0+timestep, where timestep is
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
+        initialization time of the nowcast generator and the time used in the
+        main loop (seconds).
 
     See also
     --------
@@ -137,6 +135,10 @@ def forecast(
     :cite:`Seed2003`, :cite:`PCH2019a`
     """
 
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    precip = dataset[precip_var].values
+    velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]])
     _check_inputs(precip, velocity, timesteps, ar_order)
 
     if extrap_kwargs is None:
@@ -327,8 +329,6 @@ def f(precip, i):
 
     print("Starting nowcast computation.")
 
-    precip_forecast = []
-
     state = {"precip_cascades": precip_cascades, "precip_decomp": precip_decomp}
     params = {
         "domain": domain,
@@ -358,12 +358,14 @@ def f(precip, i):
     if measure_time:
         precip_forecast, mainloop_time = precip_forecast
 
-    precip_forecast = np.stack(precip_forecast)
+    output_dataset = convert_output_to_xarray_dataset(
+        dataset, timesteps, precip_forecast
+    )
 
     if measure_time:
-        return precip_forecast, init_time, mainloop_time
+        return output_dataset, init_time, mainloop_time
     else:
-        return precip_forecast
+        return output_dataset
 
 
 def _check_inputs(precip, velocity, timesteps, ar_order):
diff --git a/pysteps/nowcasts/sseps.py b/pysteps/nowcasts/sseps.py
index a8848d3e3..1d083c04b 100644
--- a/pysteps/nowcasts/sseps.py
+++ b/pysteps/nowcasts/sseps.py
@@ -18,18 +18,17 @@
     forecast
 """
 
-import numpy as np
 import time
-from scipy.ndimage import generate_binary_structure, iterate_structure
 
+import numpy as np
+import xarray as xr
+from scipy.ndimage import generate_binary_structure, iterate_structure
 
-from pysteps import cascade
-from pysteps import extrapolation
-from pysteps import noise
-from pysteps.decorators import deprecate_args
+from pysteps import cascade, extrapolation, noise
 from pysteps.nowcasts import utils as nowcast_utils
 from pysteps.postprocessing import probmatching
 from pysteps.timeseries import autoregression, correlation
+from pysteps.xarray_helpers import convert_output_to_xarray_dataset
 
 try:
     import dask
@@ -39,11 +38,8 @@
     dask_imported = False
 
 
-@deprecate_args({"R": "precip", "V": "velocity"}, "1.8.0")
 def forecast(
-    precip,
-    metadata,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
     n_ens_members=24,
     n_cascade_levels=6,
@@ -78,18 +74,14 @@ def forecast(
 
     Parameters
     ----------
-    precip: array-like
-        Array of shape (ar_order+1,m,n) containing the input precipitation fields
-        ordered by timestamp from oldest to newest. The time steps between the inputs
-        are assumed to be regular, and the inputs are required to have finite values.
-    metadata: dict
-        Metadata dictionary containing the accutime, xpixelsize, threshold and
-        zerovalue attributes as described in the documentation of
-        :py:mod:`pysteps.io.importers`. xpixelsize is assumed to be in meters.
-    velocity: array-like
-        Array of shape (2,m,n) containing the x- and y-components of the advection
-        field. The velocities are assumed to represent one time step between the
-        inputs. All values are required to be finite.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as any precipitation data variable.
+        The units and stepsize of ``y`` and ``x`` have to be the same and the only supported
+        units are meters and kilometers. The time dimension of the dataset has to be size
+        ``ar_order + 1`` and the precipitation variable has to have this dimension. All
+        velocity values are required to be finite.
     win_size: int or two-element sequence of ints
         Size-length of the localization window.
     overlap: float [0,1[
@@ -181,12 +173,15 @@ def forecast(
 
     Returns
     -------
-    out: ndarray
-        If return_output is True, a four-dimensional array of shape
-        (n_ens_members,num_timesteps,m,n) containing a time series of forecast
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
         precipitation fields for each ensemble member. Otherwise, a None value
         is returned. The time series starts from t0+timestep, where timestep is
-        taken from the input precipitation fields.
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
+        initialization time of the nowcast generator and the time used in the
+        main loop (seconds).
 
     See also
     --------
@@ -201,7 +196,20 @@ def forecast(
     ----------
     :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`NBSG2017`
     """
-
+    timesteps_in = timesteps
+    x_units = dataset["x"].attrs["units"]
+    y_units = dataset["y"].attrs["units"]
+    x_stepsize = dataset["x"].attrs["stepsize"]
+    y_stepsize = dataset["y"].attrs["stepsize"]
+    if x_units != y_units or x_stepsize != y_stepsize:
+        raise ValueError("units and stepsize needs to be the same for x and y")
+    if x_units not in ["m", "km"]:
+        raise ValueError("only m and km supported as x and y units")
+
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    precip = dataset[precip_var].values
+    velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]])
     _check_inputs(precip, velocity, timesteps, ar_order)
 
     if extrap_kwargs is None:
@@ -237,8 +245,10 @@ def forecast(
     else:
         win_size = tuple([int(win_size[i]) for i in range(2)])
 
-    timestep = metadata["accutime"]
-    kmperpixel = metadata["xpixelsize"] / 1000
+    timestep = dataset["time"].attrs["stepsize"] / 60
+    kmperpixel = x_stepsize
+    if x_units == "m":
+        kmperpixel = kmperpixel / 1000
 
     print("Computing SSEPS nowcast")
     print("-----------------------")
@@ -292,8 +302,8 @@ def forecast(
             f"velocity perturbations, perpendicular: {vp_perp[0]},{vp_perp[1]},{vp_perp[2]}"
         )
 
-    precip_thr = metadata["threshold"]
-    precip_min = metadata["zerovalue"]
+    precip_thr = dataset[precip_var].attrs["threshold"]
+    precip_min = dataset[precip_var].attrs["zerovalue"]
 
     num_ensemble_workers = n_ens_members if num_workers > n_ens_members else num_workers
 
@@ -911,10 +921,12 @@ def worker(j):
 
     if return_output:
         outarr = np.stack([np.stack(precip_forecast[j]) for j in range(n_ens_members)])
+        output_dataset = convert_output_to_xarray_dataset(dataset, timesteps_in, outarr)
+
         if measure_time:
-            return outarr, init_time, mainloop_time
+            return output_dataset, init_time, mainloop_time
         else:
-            return outarr
+            return output_dataset
     else:
         return None
 
diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py
index 806c4082b..b4d050cc8 100644
--- a/pysteps/nowcasts/steps.py
+++ b/pysteps/nowcasts/steps.py
@@ -11,22 +11,21 @@
     forecast
 """
 
+import time
+from copy import deepcopy
+from dataclasses import dataclass, field
+from typing import Any, Callable
+
 import numpy as np
+import xarray as xr
 from scipy.ndimage import generate_binary_structure, iterate_structure
-import time
 
-from pysteps import cascade
-from pysteps import extrapolation
-from pysteps import noise
-from pysteps import utils
-from pysteps.decorators import deprecate_args
+from pysteps import cascade, extrapolation, noise, utils
 from pysteps.nowcasts import utils as nowcast_utils
+from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop
 from pysteps.postprocessing import probmatching
 from pysteps.timeseries import autoregression, correlation
-from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop
-
-from dataclasses import dataclass, field
-from typing import Any, Callable
+from pysteps.xarray_helpers import convert_output_to_xarray_dataset
 
 try:
     import dask
@@ -36,7 +35,7 @@
     DASK_IMPORTED = False
 
 
-@dataclass
+@dataclass(frozen=True)
 class StepsNowcasterConfig:
     """
     Parameters
@@ -248,6 +247,10 @@ class StepsNowcasterParams:
     xy_coordinates: np.ndarray | None = None
     velocity_perturbation_parallel: list[float] | None = None
     velocity_perturbation_perpendicular: list[float] | None = None
+    filter_kwargs: dict | None = None
+    noise_kwargs: dict | None = None
+    velocity_perturbation_kwargs: dict | None = None
+    mask_kwargs: dict | None = None
 
 
 @dataclass
@@ -269,6 +272,7 @@ class StepsNowcasterState:
     )
     velocity_perturbations: list[Callable] | None = field(default_factory=list)
     fft_objects: list[Any] | None = field(default_factory=list)
+    extrapolation_kwargs: dict[str, Any] | None = field(default_factory=dict)
 
 
 class StepsNowcaster:
@@ -341,9 +345,9 @@ def compute_forecast(self):
         if self.__config.measure_time:
             self.__start_time_init = time.time()
 
-        self.__initialize_nowcast_components()
         # Slice the precipitation field to only use the last ar_order + 1 fields
         self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy()
+        self.__initialize_nowcast_components()
 
         self.__perform_extrapolation()
         self.__apply_noise_and_ar_model()
@@ -352,15 +356,19 @@ def compute_forecast(self):
         self.__initialize_fft_objects()
         # Measure and print initialization time
         if self.__config.measure_time:
-            self.__measure_time("Initialization", self.__start_time_init)
+            self.__init_time = self.__measure_time(
+                "Initialization", self.__start_time_init
+            )
 
         # Run the main nowcast loop
         self.__nowcast_main()
 
+        # Unstack nowcast output if return_output is True
         if self.__config.measure_time:
-            self.__state.precip_forecast, self.__mainloop_time = (
-                self.__state.precip_forecast
-            )
+            (
+                self.__state.precip_forecast,
+                self.__mainloop_time,
+            ) = self.__state.precip_forecast
 
         # Stack and return the forecast output
         if self.__config.return_output:
@@ -386,14 +394,14 @@ def __nowcast_main(self):
         Main nowcast loop that iterates through the ensemble members and time steps
         to generate forecasts.
         """
-        # Isolate the last time slice of precipitation
+        # Isolate the last time slice of observed precipitation
         precip = self.__precip[
             -1, :, :
         ]  # Extract the last available precipitation field
 
         # Prepare state and params dictionaries, these need to be formatted a specific way for the nowcast_main_loop
-        state = self.__initialize_state()
-        params = self.__initialize_params(precip)
+        state = self.__return_state_dict()
+        params = self.__return_params_dict(precip)
 
         print("Starting nowcast computation.")
 
@@ -405,7 +413,7 @@ def __nowcast_main(self):
             self.__time_steps,
             self.__config.extrapolation_method,
             self.__update_state,  # Reference to the update function
-            extrap_kwargs=self.__config.extrapolation_kwargs,
+            extrap_kwargs=self.__state.extrapolation_kwargs,
             velocity_pert_gen=self.__state.velocity_perturbations,
             params=params,
             ensemble=True,
@@ -480,15 +488,33 @@ def __check_inputs(self):
 
         # Handle None values for various kwargs
         if self.__config.extrapolation_kwargs is None:
-            self.__config.extrapolation_kwargs = {}
+            self.__state.extrapolation_kwargs = dict()
+        else:
+            self.__state.extrapolation_kwargs = deepcopy(
+                self.__config.extrapolation_kwargs
+            )
+
         if self.__config.filter_kwargs is None:
-            self.__config.filter_kwargs = {}
+            self.__params.filter_kwargs = dict()
+        else:
+            self.__params.filter_kwargs = deepcopy(self.__config.filter_kwargs)
+
         if self.__config.noise_kwargs is None:
-            self.__config.noise_kwargs = {}
+            self.__params.noise_kwargs = dict()
+        else:
+            self.__params.noise_kwargs = deepcopy(self.__config.noise_kwargs)
+
         if self.__config.velocity_perturbation_kwargs is None:
-            self.__config.velocity_perturbation_kwargs = {}
+            self.__params.velocity_perturbation_kwargs = dict()
+        else:
+            self.__params.velocity_perturbation_kwargs = deepcopy(
+                self.__config.velocity_perturbation_kwargs
+            )
+
         if self.__config.mask_kwargs is None:
-            self.__config.mask_kwargs = {}
+            self.__params.mask_kwargs = dict()
+        else:
+            self.__params.mask_kwargs = deepcopy(self.__config.mask_kwargs)
 
         print("Inputs validated and initialized successfully.")
 
@@ -545,12 +571,12 @@ def __print_forecast_info(self):
 
         if self.__config.velocity_perturbation_method == "bps":
             self.__params.velocity_perturbation_parallel = (
-                self.__config.velocity_perturbation_kwargs.get(
+                self.__params.velocity_perturbation_kwargs.get(
                     "p_par", noise.motion.get_default_params_bps_par()
                 )
             )
             self.__params.velocity_perturbation_perpendicular = (
-                self.__config.velocity_perturbation_kwargs.get(
+                self.__params.velocity_perturbation_kwargs.get(
                     "p_perp", noise.motion.get_default_params_bps_perp()
                 )
             )
@@ -585,13 +611,14 @@ def __initialize_nowcast_components(self):
         self.__params.bandpass_filter = filter_method(
             (M, N),
             self.__config.n_cascade_levels,
-            **(self.__config.filter_kwargs or {}),
+            **(self.__params.filter_kwargs or {}),
         )
 
         # Get the decomposition method (e.g., FFT)
-        self.__params.decomposition_method, self.__params.recomposition_method = (
-            cascade.get_method(self.__config.decomposition_method)
-        )
+        (
+            self.__params.decomposition_method,
+            self.__params.recomposition_method,
+        ) = cascade.get_method(self.__config.decomposition_method)
 
         # Get the extrapolation method (e.g., semilagrangian)
         self.__params.extrapolation_method = extrapolation.get_method(
@@ -625,7 +652,7 @@ def __perform_extrapolation(self):
         else:
             self.__state.mask_threshold = None
 
-        extrap_kwargs = self.__config.extrapolation_kwargs.copy()
+        extrap_kwargs = self.__state.extrapolation_kwargs.copy()
         extrap_kwargs["xy_coords"] = self.__params.xy_coordinates
         extrap_kwargs["allow_nonfinite_values"] = (
             True if np.any(~np.isfinite(self.__precip)) else False
@@ -687,7 +714,7 @@ def __apply_noise_and_ar_model(self):
             self.__params.perturbation_generator = init_noise(
                 self.__precip,
                 fft_method=self.__params.fft,
-                **self.__config.noise_kwargs,
+                **self.__params.noise_kwargs,
             )
 
             # Handle noise standard deviation adjustments if necessary
@@ -715,7 +742,7 @@ def __apply_noise_and_ar_model(self):
 
                 # Measure and print time taken
                 if self.__config.measure_time:
-                    self.__measure_time(
+                    __ = self.__measure_time(
                         "Noise adjustment coefficient computation", starttime
                     )
                 else:
@@ -827,21 +854,16 @@ def __apply_noise_and_ar_model(self):
         if self.__config.noise_method is not None:
             self.__state.random_generator_precip = []
             self.__state.random_generator_motion = []
-
+            seed = self.__config.seed
             for _ in range(self.__config.n_ens_members):
                 # Create random state for precipitation noise generator
-                rs = np.random.RandomState(self.__config.seed)
+                rs = np.random.RandomState(seed)
                 self.__state.random_generator_precip.append(rs)
-                self.__config.seed = rs.randint(
-                    0, high=int(1e9)
-                )  # Update seed after generating
-
+                seed = rs.randint(0, high=int(1e9))
                 # Create random state for motion perturbations generator
-                rs = np.random.RandomState(self.__config.seed)
+                rs = np.random.RandomState(seed)
                 self.__state.random_generator_motion.append(rs)
-                self.__config.seed = rs.randint(
-                    0, high=int(1e9)
-                )  # Update seed after generating
+                seed = rs.randint(0, high=int(1e9))
         else:
             self.__state.random_generator_precip = None
             self.__state.random_generator_motion = None
@@ -861,10 +883,10 @@ def __initialize_velocity_perturbations(self):
             for j in range(self.__config.n_ens_members):
                 kwargs = {
                     "randstate": self.__state.random_generator_motion[j],
-                    "p_par": self.__config.velocity_perturbation_kwargs.get(
+                    "p_par": self.__params.velocity_perturbation_kwargs.get(
                         "p_par", self.__params.velocity_perturbation_parallel
                     ),
-                    "p_perp": self.__config.velocity_perturbation_kwargs.get(
+                    "p_perp": self.__params.velocity_perturbation_kwargs.get(
                         "p_perp", self.__params.velocity_perturbation_perpendicular
                     ),
                 }
@@ -916,8 +938,8 @@ def __initialize_precipitation_mask(self):
 
             elif self.__config.mask_method == "incremental":
                 # Get mask parameters
-                self.__params.mask_rim = self.__config.mask_kwargs.get("mask_rim", 10)
-                mask_f = self.__config.mask_kwargs.get("mask_f", 1.0)
+                self.__params.mask_rim = self.__params.mask_kwargs.get("mask_rim", 10)
+                mask_f = self.__params.mask_kwargs.get("mask_f", 1.0)
                 # Initialize the structuring element
                 self.__params.structuring_element = generate_binary_structure(2, 1)
                 # Expand the structuring element based on mask factor and timestep
@@ -957,7 +979,7 @@ def __initialize_fft_objects(self):
             self.__state.fft_objs.append(fft_obj)
         print("FFT objects initialized successfully.")
 
-    def __initialize_state(self):
+    def __return_state_dict(self):
         """
         Initialize the state dictionary used during the nowcast iteration.
         """
@@ -971,7 +993,7 @@ def __initialize_state(self):
             "randgen_prec": self.__state.random_generator_precip,
         }
 
-    def __initialize_params(self, precip):
+    def __return_params_dict(self, precip):
         """
         Initialize the params dictionary used during the nowcast iteration.
         """
@@ -1196,6 +1218,8 @@ def __measure_time(self, label, start_time):
         if self.__config.measure_time:
             elapsed_time = time.time() - start_time
             print(f"{label} took {elapsed_time:.2f} seconds.")
+            return elapsed_time
+        return None
 
     def reset_states_and_params(self):
         """
@@ -1214,10 +1238,8 @@ def reset_states_and_params(self):
 
 
 # Wrapper function to preserve backward compatibility
-@deprecate_args({"R": "precip", "V": "velocity", "R_thr": "precip_thr"}, "1.8.0")
 def forecast(
-    precip,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
     n_ens_members=24,
     n_cascade_levels=6,
@@ -1253,14 +1275,13 @@ def forecast(
 
     Parameters
     ----------
-    precip: array-like
-        Array of shape (ar_order+1,m,n) containing the input precipitation fields
-        ordered by timestamp from oldest to newest. The time steps between the
-        inputs are assumed to be regular.
-    velocity: array-like
-        Array of shape (2,m,n) containing the x- and y-components of the advection
-        field. The velocities are assumed to represent one time step between the
-        inputs. All values are required to be finite.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as any precipitation data variable.
+        The time dimension of the dataset has to be size
+        ``ar_order + 1`` and the precipitation variable has to have this dimension. All
+        velocity values are required to be finite.
     timesteps: int or list of floats
         Number of time steps to forecast or a list of time steps for which the
         forecasts are computed (relative to the input time step). The elements
@@ -1422,13 +1443,13 @@ def forecast(
 
     Returns
     -------
-    out: ndarray
-        If return_output is True, a four-dimensional array of shape
-        (n_ens_members,num_timesteps,m,n) containing a time series of forecast
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
         precipitation fields for each ensemble member. Otherwise, a None value
         is returned. The time series starts from t0+timestep, where timestep is
-        taken from the input precipitation fields. If measure_time is True, the
-        return value is a three-element tuple containing the nowcast array, the
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
         initialization time of the nowcast generator and the time used in the
         main loop (seconds).
 
diff --git a/pysteps/nowcasts/utils.py b/pysteps/nowcasts/utils.py
index fd111e28d..fed1c2f96 100644
--- a/pysteps/nowcasts/utils.py
+++ b/pysteps/nowcasts/utils.py
@@ -17,6 +17,7 @@
 """
 
 import time
+
 import numpy as np
 from scipy.ndimage import binary_dilation, generate_binary_structure
 
@@ -412,10 +413,10 @@ def worker2(i):
         if not ensemble:
             precip_forecast_out = precip_forecast_out[0, :]
 
-    if measure_time:
-        return precip_forecast_out, time.time() - starttime_total
-    else:
-        return precip_forecast_out
+        if measure_time:
+            return precip_forecast_out, time.time() - starttime_total
+        else:
+            return precip_forecast_out
 
 
 def print_ar_params(phi):
diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py
index 7e5ad9132..ba493c51a 100644
--- a/pysteps/postprocessing/probmatching.py
+++ b/pysteps/postprocessing/probmatching.py
@@ -289,7 +289,7 @@ def resample_distributions(
         cascade). It must be of the same shape as `second_array`. Input must not contain NaNs.
     second_array: array_like
         One of the two arrays from which the distribution should be sampled (e.g., the NWP (model)
-        cascade). It must be of the same shape as `first_array`.. Input must not contain NaNs.
+        cascade). It must be of the same shape as `first_array`. Input must not contain NaNs.
     probability_first_array: float
         The weight that `first_array` should get (a value between 0 and 1). This determines the
         likelihood of selecting elements from `first_array` over `second_array`.
diff --git a/pysteps/tests/helpers.py b/pysteps/tests/helpers.py
index 85bd861f5..24c58f8d1 100644
--- a/pysteps/tests/helpers.py
+++ b/pysteps/tests/helpers.py
@@ -9,10 +9,12 @@
 
 import numpy as np
 import pytest
+import xarray as xr
 
 import pysteps as stp
 from pysteps import io, rcparams
 from pysteps.utils import aggregate_fields_space
+from pysteps.utils.dimension import clip_domain
 
 _reference_dates = dict()
 _reference_dates["bom"] = datetime(2018, 6, 16, 10, 0)
@@ -24,11 +26,34 @@
 _reference_dates["mrms"] = datetime(2019, 6, 10, 0, 0)
 
 
+def assert_dataset_equivalent(dataset1: xr.Dataset, dataset2: xr.Dataset) -> None:
+    xr.testing.assert_allclose(dataset1, dataset2)
+    precip_var = dataset1.attrs["precip_var"]
+    assert precip_var == dataset2.attrs["precip_var"]
+    assert np.isclose(
+        dataset1[precip_var].attrs["threshold"],
+        dataset2[precip_var].attrs["threshold"],
+    )
+    assert np.isclose(
+        dataset1[precip_var].attrs["zerovalue"],
+        dataset2[precip_var].attrs["zerovalue"],
+    )
+    assert dataset1[precip_var].attrs["units"] == dataset2[precip_var].attrs["units"]
+    assert (
+        dataset1[precip_var].attrs["transform"]
+        == dataset2[precip_var].attrs["transform"]
+        or dataset1[precip_var].attrs["transform"] is None
+        and dataset2[precip_var].attrs["transform"] is None
+    )
+    assert (
+        dataset1[precip_var].attrs["accutime"] == dataset2[precip_var].attrs["accutime"]
+    )
+
+
 def get_precipitation_fields(
     num_prev_files=0,
     num_next_files=0,
     return_raw=False,
-    metadata=False,
     upscale=None,
     source="mch",
     log_transform=True,
@@ -75,9 +100,6 @@ def get_precipitation_fields(
         The pre-processing steps are: 1) Convert to mm/h,
         2) Mask invalid values, 3) Log-transform the data [dBR].
 
-    metadata: bool, optional
-        If True, also return file metadata.
-
     upscale: float or None, optional
         Upscale fields in space during the pre-processing steps.
         If it is None, the precipitation field is not modified.
@@ -102,8 +124,8 @@ def get_precipitation_fields(
 
     Returns
     -------
-    reference_field : array
-    metadata : dict
+    dataset: xarray.Dataset
+        As described in the documentation of :py:mod:`pysteps.io.importers`.
     """
 
     if source == "bom":
@@ -161,47 +183,34 @@ def get_precipitation_fields(
     # Read the radar composites
     importer = io.get_method(importer_name, "importer")
 
-    reference_field, __, ref_metadata = io.read_timeseries(
-        fns, importer, **_importer_kwargs
-    )
+    dataset = io.read_timeseries(fns, importer, **_importer_kwargs)
 
     if not return_raw:
-        if (num_prev_files == 0) and (num_next_files == 0):
-            # Remove time dimension
-            reference_field = np.squeeze(reference_field)
+        precip_var = dataset.attrs["precip_var"]
 
         # Convert to mm/h
-        reference_field, ref_metadata = stp.utils.to_rainrate(
-            reference_field, ref_metadata
-        )
+        dataset = stp.utils.to_rainrate(dataset)
+        precip_var = dataset.attrs["precip_var"]
 
         # Clip domain
-        reference_field, ref_metadata = stp.utils.clip_domain(
-            reference_field, ref_metadata, clip
-        )
+        dataset = clip_domain(dataset, clip)
 
         # Upscale data
-        reference_field, ref_metadata = aggregate_fields_space(
-            reference_field, ref_metadata, upscale
-        )
+        dataset = aggregate_fields_space(dataset, upscale)
 
         # Mask invalid values
-        reference_field = np.ma.masked_invalid(reference_field)
+        valid_mask = np.isfinite(dataset[precip_var].values)
 
         if log_transform:
             # Log-transform the data [dBR]
-            reference_field, ref_metadata = stp.utils.dB_transform(
-                reference_field, ref_metadata, threshold=0.1, zerovalue=-15.0
-            )
+            dataset = stp.utils.dB_transform(dataset, threshold=0.1, zerovalue=-15.0)
 
         # Set missing values with the fill value
-        np.ma.set_fill_value(reference_field, ref_metadata["zerovalue"])
-        reference_field.data[reference_field.mask] = ref_metadata["zerovalue"]
-
-    if metadata:
-        return reference_field, ref_metadata
+        metadata = dataset[precip_var].attrs
+        zerovalue = metadata["zerovalue"]
+        dataset[precip_var].data[~valid_mask] = zerovalue
 
-    return reference_field
+    return dataset
 
 
 def smart_assert(actual_value, expected, tolerance=None):
diff --git a/pysteps/tests/test_blending_steps.py b/pysteps/tests/test_blending_steps.py
index 4752ea32b..77f975c75 100644
--- a/pysteps/tests/test_blending_steps.py
+++ b/pysteps/tests/test_blending_steps.py
@@ -28,6 +28,9 @@
     (2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False, None),
     (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False, None),
     (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False, "bps"),
+    # TODO: make next test work! This is currently not working on the main branch
+    # (2, 3, 4, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False),
+    # (2, 3, 4, 8, "incremental", "cdf", False, "spn", True, 2, False, False, 0, False),
     #    Test the case where the radar image contains no rain.
     (1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0, False, None),
     (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, False, None),
diff --git a/pysteps/tests/test_exporters.py b/pysteps/tests/test_exporters.py
index 10e87d46e..dfe7e8ace 100644
--- a/pysteps/tests/test_exporters.py
+++ b/pysteps/tests/test_exporters.py
@@ -9,12 +9,14 @@
 from numpy.testing import assert_array_almost_equal
 
 from pysteps.io import import_netcdf_pysteps
-from pysteps.io.exporters import _get_geotiff_filename
-from pysteps.io.exporters import close_forecast_files
-from pysteps.io.exporters import export_forecast_dataset
-from pysteps.io.exporters import initialize_forecast_exporter_netcdf
-from pysteps.io.exporters import _convert_proj4_to_grid_mapping
-from pysteps.tests.helpers import get_precipitation_fields, get_invalid_mask
+from pysteps.io.exporters import (
+    _convert_proj4_to_grid_mapping,
+    _get_geotiff_filename,
+    close_forecast_files,
+    export_forecast_dataset,
+    initialize_forecast_exporter_netcdf,
+)
+from pysteps.tests.helpers import get_invalid_mask, get_precipitation_fields
 
 # Test arguments
 exporter_arg_names = (
diff --git a/pysteps/tests/test_motion_lk.py b/pysteps/tests/test_motion_lk.py
index 871dcd98b..a8f640533 100644
--- a/pysteps/tests/test_motion_lk.py
+++ b/pysteps/tests/test_motion_lk.py
@@ -3,8 +3,8 @@
 """
 """
 
-import pytest
 import numpy as np
+import pytest
 
 from pysteps import motion, verification
 from pysteps.tests.helpers import get_precipitation_fields
@@ -61,19 +61,19 @@ def test_lk(
         pytest.importorskip("pandas")
 
     # inputs
-    precip, metadata = get_precipitation_fields(
+    dataset = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         return_raw=False,
         metadata=True,
         upscale=2000,
     )
-    precip = precip.filled()
+    precip_var = dataset.attrs["precip_var"]
 
     # Retrieve motion field
     oflow_method = motion.get_method("LK")
-    output = oflow_method(
-        precip,
+    output_dataset = oflow_method(
+        dataset,
         lk_kwargs=lk_kwargs,
         fd_method=fd_method,
         dense=dense,
@@ -86,13 +86,17 @@ def test_lk(
 
     # Check format of ouput
     if dense:
+        output = np.stack(
+            [output_dataset["velocity_x"].values, output_dataset["velocity_y"].values]
+        )
         assert isinstance(output, np.ndarray)
         assert output.ndim == 3
         assert output.shape[0] == 2
-        assert output.shape[1:] == precip[0].shape
+        assert output.shape[1:] == dataset[precip_var].values[0].shape
         if nr_std_outlier == 0:
             assert output.sum() == 0
     else:
+        output = output_dataset
         assert isinstance(output, tuple)
         assert len(output) == 2
         assert isinstance(output[0], np.ndarray)
diff --git a/pysteps/tests/test_nowcasts_anvil.py b/pysteps/tests/test_nowcasts_anvil.py
index 14a130fb1..35d84e0f3 100644
--- a/pysteps/tests/test_nowcasts_anvil.py
+++ b/pysteps/tests/test_nowcasts_anvil.py
@@ -31,31 +31,28 @@ def test_anvil_rainrate(
 ):
     """Tests ANVIL nowcast using rain rate precipitation fields."""
     # inputs
-    precip_input = get_precipitation_fields(
+    dataset_input = get_precipitation_fields(
         num_prev_files=4,
         num_next_files=0,
         return_raw=False,
         metadata=False,
         upscale=2000,
     )
-    precip_input = precip_input.filled()
 
-    precip_obs = get_precipitation_fields(
+    dataset_obs = get_precipitation_fields(
         num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000
-    )[1:, :, :]
-    precip_obs = precip_obs.filled()
+    ).isel(time=slice(1, None, None))
+    precip_var = dataset_input.attrs["precip_var"]
 
     pytest.importorskip("cv2")
     oflow_method = motion.get_method("LK")
-    retrieved_motion = oflow_method(precip_input)
+    dataset_w_motion = oflow_method(dataset_input)
 
     nowcast_method = nowcasts.get_method("anvil")
 
     output = nowcast_method(
-        precip_input[-(ar_order + 2) :],
-        retrieved_motion,
+        dataset_w_motion.isel(time=slice(-(ar_order + 2), None, None)),
         timesteps=timesteps,
-        rainrate=None,  # no R(VIL) conversion is done
         n_cascade_levels=n_cascade_levels,
         ar_order=ar_order,
         ar_window_radius=ar_window_radius,
@@ -63,9 +60,10 @@ def test_anvil_rainrate(
         measure_time=measure_time,
     )
     if measure_time:
-        precip_forecast, __, __ = output
+        dataset_forecast, __, __ = output
     else:
-        precip_forecast = output
+        dataset_forecast = output
+    precip_forecast = dataset_forecast[precip_var].values
 
     assert precip_forecast.ndim == 3
     assert precip_forecast.shape[0] == (
@@ -73,7 +71,7 @@ def test_anvil_rainrate(
     )
 
     result = verification.det_cat_fct(
-        precip_forecast[-1], precip_obs[-1], thr=0.1, scores="CSI"
+        precip_forecast[-1], dataset_obs[precip_var].values[-1], thr=0.1, scores="CSI"
     )["CSI"]
     assert result > min_csi, f"CSI={result:.2f}, required > {min_csi:.2f}"
 
diff --git a/pysteps/tests/test_nowcasts_lagrangian_probability.py b/pysteps/tests/test_nowcasts_lagrangian_probability.py
index 1ec352b0b..d75b29e87 100644
--- a/pysteps/tests/test_nowcasts_lagrangian_probability.py
+++ b/pysteps/tests/test_nowcasts_lagrangian_probability.py
@@ -1,10 +1,13 @@
 # -*- coding: utf-8 -*-
+from datetime import datetime, timezone
+
 import numpy as np
 import pytest
+import xarray as xr
 
+from pysteps.motion.lucaskanade import dense_lucaskanade
 from pysteps.nowcasts.lagrangian_probability import forecast
 from pysteps.tests.helpers import get_precipitation_fields
-from pysteps.motion.lucaskanade import dense_lucaskanade
 
 
 def test_numerical_example():
@@ -12,12 +15,23 @@ def test_numerical_example():
     precip = np.zeros((20, 20))
     precip[5:10, 5:10] = 1
     velocity = np.zeros((2, *precip.shape))
+    now = datetime.now(tz=timezone.utc).replace(tzinfo=None)
+    dataset_input = xr.Dataset(
+        data_vars={
+            "precip_intensity": (["time", "y", "x"], [precip]),
+            "velocity_x": (["y", "x"], velocity[0]),
+            "velocity_y": (["y", "x"], velocity[1]),
+        },
+        coords={"time": (["time"], [now], {"stepsize": 300})},
+        attrs={"precip_var": "precip_intensity"},
+    )
     timesteps = 4
     thr = 0.5
     slope = 1  # pixels / timestep
 
     # compute probability forecast
-    fct = forecast(precip, velocity, timesteps, thr, slope=slope)
+    dataset_forecast = forecast(dataset_input, timesteps, thr, slope=slope)
+    fct = dataset_forecast["precip_intensity"].values
 
     assert fct.ndim == 3
     assert fct.shape[0] == timesteps
@@ -26,7 +40,8 @@ def test_numerical_example():
     assert fct.min() >= 0.0
 
     # slope = 0 should return a binary field
-    fct = forecast(precip, velocity, timesteps, thr, slope=0)
+    dataset_forecast = forecast(dataset_input, timesteps, thr, slope=0)
+    fct = dataset_forecast["precip_intensity"].values
     ref = (np.repeat(precip[None, ...], timesteps, axis=0) >= thr).astype(float)
     assert np.allclose(fct, fct.astype(bool))
     assert np.allclose(fct, ref)
@@ -37,12 +52,23 @@ def test_numerical_example_with_float_slope_and_float_list_timesteps():
     precip = np.zeros((20, 20))
     precip[5:10, 5:10] = 1
     velocity = np.zeros((2, *precip.shape))
+    now = datetime.now(tz=timezone.utc).replace(tzinfo=None)
+    dataset_input = xr.Dataset(
+        data_vars={
+            "precip_intensity": (["time", "y", "x"], [precip]),
+            "velocity_x": (["y", "x"], velocity[0]),
+            "velocity_y": (["y", "x"], velocity[1]),
+        },
+        coords={"time": (["time"], [now], {"stepsize": 300})},
+        attrs={"precip_var": "precip_intensity"},
+    )
     timesteps = [1.0, 2.0, 5.0, 12.0]
     thr = 0.5
     slope = 1.0  # pixels / timestep
 
     # compute probability forecast
-    fct = forecast(precip, velocity, timesteps, thr, slope=slope)
+    dataset_forecast = forecast(dataset_input, timesteps, thr, slope=slope)
+    fct = dataset_forecast["precip_intensity"].values
 
     assert fct.ndim == 3
     assert fct.shape[0] == len(timesteps)
@@ -56,16 +82,18 @@ def test_real_case():
     pytest.importorskip("cv2")
 
     # inputs
-    precip, metadata = get_precipitation_fields(
+    dataset_input = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         return_raw=False,
         metadata=True,
         upscale=2000,
     )
+    precip_var = dataset_input.attrs["precip_var"]
+    metadata = dataset_input[precip_var].attrs
 
     # motion
-    motion = dense_lucaskanade(precip)
+    dataset_w_motion = dense_lucaskanade(dataset_input)
 
     # parameters
     timesteps = [1, 2, 3]
@@ -74,13 +102,18 @@ def test_real_case():
 
     # compute probability forecast
     extrap_kwargs = dict(allow_nonfinite_values=True)
-    fct = forecast(
-        precip[-1], motion, timesteps, thr, slope=slope, extrap_kwargs=extrap_kwargs
+    dataset_forecast = forecast(
+        dataset_w_motion.isel(time=slice(-1, None, None)),
+        timesteps,
+        thr,
+        slope=slope,
+        extrap_kwargs=extrap_kwargs,
     )
+    fct = dataset_forecast["precip_intensity"].values
 
     assert fct.ndim == 3
     assert fct.shape[0] == len(timesteps)
-    assert fct.shape[1:] == precip.shape[1:]
+    assert fct.shape[1:] == dataset_input[precip_var].values.shape[1:]
     assert np.nanmax(fct) <= 1.0
     assert np.nanmin(fct) >= 0.0
 
@@ -89,11 +122,19 @@ def test_wrong_inputs():
     # dummy inputs
     precip = np.zeros((3, 3))
     velocity = np.zeros((2, *precip.shape))
+    dataset_input = xr.Dataset(
+        data_vars={
+            "precip_intensity": (["y", "x"], precip),
+            "velocity_x": (["y", "x"], velocity[0]),
+            "velocity_y": (["y", "x"], velocity[1]),
+        },
+        attrs={"precip_var": "precip_intensity"},
+    )
 
     # timesteps must be > 0
     with pytest.raises(ValueError):
-        forecast(precip, velocity, 0, 1)
+        forecast(dataset_input, 0, 1)
 
     # timesteps must be a sorted list
     with pytest.raises(ValueError):
-        forecast(precip, velocity, [2, 1], 1)
+        forecast(dataset_input, [2, 1], 1)
diff --git a/pysteps/tests/test_nowcasts_linda.py b/pysteps/tests/test_nowcasts_linda.py
index 2d5f03b71..a5b60611b 100644
--- a/pysteps/tests/test_nowcasts_linda.py
+++ b/pysteps/tests/test_nowcasts_linda.py
@@ -1,13 +1,13 @@
-from datetime import timedelta
 import os
+from datetime import timedelta
+
 import numpy as np
 import pytest
+import xarray as xr
 
 from pysteps import io, motion, nowcasts, verification
-from pysteps.nowcasts.linda import forecast
 from pysteps.tests.helpers import get_precipitation_fields
 
-
 linda_arg_names = (
     "add_perturbations",
     "kernel_type",
@@ -42,7 +42,7 @@ def test_linda(
     pytest.importorskip("skimage")
 
     # inputs
-    precip_input, metadata = get_precipitation_fields(
+    dataset_input = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         metadata=True,
@@ -51,20 +51,23 @@ def test_linda(
         log_transform=False,
     )
 
-    precip_obs = get_precipitation_fields(
+    dataset_obs = get_precipitation_fields(
         num_prev_files=0,
         num_next_files=3,
         clip=(354000, 866000, -96000, 416000),
         upscale=4000,
         log_transform=False,
-    )[1:, :, :]
+    ).isel(time=slice(1, None, None))
+    precip_var = dataset_input.attrs["precip_var"]
+    metadata = dataset_input[precip_var].attrs
 
     oflow_method = motion.get_method("LK")
-    retrieved_motion = oflow_method(precip_input)
+    dataset_w_motion = oflow_method(dataset_input)
 
-    precip_forecast = forecast(
-        precip_input,
-        retrieved_motion,
+    nowcast_method = nowcasts.get_method("linda")
+
+    dataset_forecast = nowcast_method(
+        dataset_w_motion,
         3,
         kernel_type=kernel_type,
         vel_pert_method=vel_pert_method,
@@ -78,68 +81,82 @@ def test_linda(
         seed=42,
     )
     if measure_time:
-        assert len(precip_forecast) == 3
-        assert isinstance(precip_forecast[1], float)
-        precip_forecast = precip_forecast[0]
+        assert len(dataset_forecast) == 3
+        assert isinstance(dataset_forecast[1], float)
+        dataset_forecast = dataset_forecast[0]
+
+    precip_forecast = dataset_forecast[precip_var].values
 
     if not add_perturbations:
         assert precip_forecast.ndim == 3
         assert precip_forecast.shape[0] == 3
-        assert precip_forecast.shape[1:] == precip_input.shape[1:]
+        assert precip_forecast.shape[1:] == dataset_input[precip_var].values.shape[1:]
 
         csi = verification.det_cat_fct(
-            precip_forecast[-1], precip_obs[-1], thr=1.0, scores="CSI"
+            precip_forecast[-1],
+            dataset_obs[precip_var].values[-1],
+            thr=1.0,
+            scores="CSI",
         )["CSI"]
         assert csi > min_csi, f"CSI={csi:.1f}, required > {min_csi:.1f}"
     else:
         assert precip_forecast.ndim == 4
         assert precip_forecast.shape[0] == 5
         assert precip_forecast.shape[1] == 3
-        assert precip_forecast.shape[2:] == precip_input.shape[1:]
+        assert precip_forecast.shape[2:] == dataset_input[precip_var].values.shape[1:]
 
-        crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1])
+        crps = verification.probscores.CRPS(
+            precip_forecast[:, -1], dataset_obs[precip_var].values[-1]
+        )
         assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}"
 
 
 def test_linda_wrong_inputs():
     # dummy inputs
-    precip = np.zeros((3, 3, 3))
-    velocity = np.zeros((2, 3, 3))
+    dataset_input = xr.Dataset(
+        data_vars={
+            "precip_intensity": (["time", "y", "x"], np.zeros((3, 3, 3))),
+            "velocity_x": (["y", "x"], np.zeros((3, 3))),
+            "velocity_y": (["y", "x"], np.zeros((3, 3))),
+        },
+        attrs={"precip_var": "precip_intensity"},
+    )
+    dataset_input_4d = xr.Dataset(
+        data_vars={
+            "precip_intensity": (
+                ["ens_number", "time", "y", "x"],
+                np.zeros((3, 3, 3, 3)),
+            ),
+            "velocity_x": (["y", "x"], np.zeros((3, 3))),
+            "velocity_y": (["y", "x"], np.zeros((3, 3))),
+        },
+        attrs={"precip_var": "precip_intensity"},
+    )
+
+    nowcast_method = nowcasts.get_method("linda")
 
     # vel_pert_method is set but kmperpixel is None
     with pytest.raises(ValueError):
-        forecast(precip, velocity, 1, vel_pert_method="bps", kmperpixel=None)
+        nowcast_method(dataset_input, 1, vel_pert_method="bps", kmperpixel=None)
 
     # vel_pert_method is set but timestep is None
     with pytest.raises(ValueError):
-        forecast(
-            precip, velocity, 1, vel_pert_method="bps", kmperpixel=1, timestep=None
+        nowcast_method(
+            dataset_input, 1, vel_pert_method="bps", kmperpixel=1, timestep=None
         )
 
     # fractional time steps not yet implemented
     # timesteps is not an integer
     with pytest.raises(ValueError):
-        forecast(precip, velocity, [1.0, 2.0])
+        nowcast_method(dataset_input, [1.0, 2.0])
 
     # ari_order 1 or 2 required
     with pytest.raises(ValueError):
-        forecast(precip, velocity, 1, ari_order=3)
+        nowcast_method(dataset_input, 1, ari_order=3)
 
     # precip_fields must be a three-dimensional array
     with pytest.raises(ValueError):
-        forecast(np.zeros((3, 3, 3, 3)), velocity, 1)
-
-    # precip_fields.shape[0] < ari_order+2
-    with pytest.raises(ValueError):
-        forecast(np.zeros((2, 3, 3)), velocity, 1, ari_order=1)
-
-    # advection_field must be a three-dimensional array
-    with pytest.raises(ValueError):
-        forecast(precip, velocity[0], 1)
-
-    # dimension mismatch between precip_fields and advection_field
-    with pytest.raises(ValueError):
-        forecast(np.zeros((3, 2, 3)), velocity, 1)
+        nowcast_method(dataset_input_4d, 1)
 
 
 def test_linda_callback(tmp_path):
diff --git a/pysteps/tests/test_nowcasts_sprog.py b/pysteps/tests/test_nowcasts_sprog.py
index 1077c3edd..f64900cd8 100644
--- a/pysteps/tests/test_nowcasts_sprog.py
+++ b/pysteps/tests/test_nowcasts_sprog.py
@@ -30,29 +30,28 @@ def test_sprog(
 ):
     """Tests SPROG nowcast."""
     # inputs
-    precip_input, metadata = get_precipitation_fields(
+    dataset_input = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         return_raw=False,
         metadata=True,
         upscale=2000,
     )
-    precip_input = precip_input.filled()
 
-    precip_obs = get_precipitation_fields(
+    dataset_obs = get_precipitation_fields(
         num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000
-    )[1:, :, :]
-    precip_obs = precip_obs.filled()
+    ).isel(time=slice(1, None, None))
+    precip_var = dataset_input.attrs["precip_var"]
+    metadata = dataset_input[precip_var].attrs
 
     pytest.importorskip("cv2")
     oflow_method = motion.get_method("LK")
-    retrieved_motion = oflow_method(precip_input)
+    dataset_w_motion = oflow_method(dataset_input)
 
     nowcast_method = nowcasts.get_method("sprog")
 
-    precip_forecast = nowcast_method(
-        precip_input,
-        retrieved_motion,
+    dataset_forecast = nowcast_method(
+        dataset_w_motion,
         timesteps=timesteps,
         precip_thr=metadata["threshold"],
         n_cascade_levels=n_cascade_levels,
@@ -60,6 +59,7 @@ def test_sprog(
         probmatching_method=probmatching_method,
         domain=domain,
     )
+    precip_forecast = dataset_forecast[precip_var].values
 
     assert precip_forecast.ndim == 3
     assert precip_forecast.shape[0] == (
@@ -67,7 +67,7 @@ def test_sprog(
     )
 
     result = verification.det_cat_fct(
-        precip_forecast[-1], precip_obs[-1], thr=0.1, scores="CSI"
+        precip_forecast[-1], dataset_obs[precip_var].values[-1], thr=0.1, scores="CSI"
     )["CSI"]
     assert result > min_csi, f"CSI={result:.1f}, required > {min_csi:.1f}"
 
diff --git a/pysteps/tests/test_nowcasts_sseps.py b/pysteps/tests/test_nowcasts_sseps.py
index 4d89fd33a..6d3a3c9c0 100644
--- a/pysteps/tests/test_nowcasts_sseps.py
+++ b/pysteps/tests/test_nowcasts_sseps.py
@@ -17,8 +17,8 @@
 )
 
 sseps_arg_values = [
-    (5, 6, 2, "incremental", "cdf", 200, 3, 0.60),
-    (5, 6, 2, "incremental", "cdf", 200, [3], 0.60),
+    (5, 6, 2, "incremental", "cdf", 200, 3, 0.62),
+    (5, 6, 2, "incremental", "cdf", 200, [3], 0.62),
 ]
 
 
@@ -35,32 +35,29 @@ def test_sseps(
 ):
     """Tests SSEPS nowcast."""
     # inputs
-    precip_input, metadata = get_precipitation_fields(
+    dataset_input = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         return_raw=False,
         metadata=True,
         upscale=2000,
     )
-    precip_input = precip_input.filled()
+    precip_var = dataset_input.attrs["precip_var"]
 
-    precip_obs = get_precipitation_fields(
+    dataset_obs = get_precipitation_fields(
         num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000
-    )[1:, :, :]
-    precip_obs = precip_obs.filled()
+    ).isel(time=slice(1, None, None))
 
     pytest.importorskip("cv2")
     oflow_method = motion.get_method("LK")
-    retrieved_motion = oflow_method(precip_input)
+    dataset_w_motion = oflow_method(dataset_input)
 
     nowcast_method = nowcasts.get_method("sseps")
 
-    precip_forecast = nowcast_method(
-        precip_input,
-        metadata,
-        retrieved_motion,
+    dataset_forecast = nowcast_method(
+        dataset_w_motion,
+        timesteps,
         win_size=win_size,
-        timesteps=timesteps,
         n_ens_members=n_ens_members,
         n_cascade_levels=n_cascade_levels,
         ar_order=ar_order,
@@ -68,6 +65,7 @@ def test_sseps(
         mask_method=mask_method,
         probmatching_method=probmatching_method,
     )
+    precip_forecast = dataset_forecast[precip_var].values
 
     assert precip_forecast.ndim == 4
     assert precip_forecast.shape[0] == n_ens_members
@@ -75,7 +73,9 @@ def test_sseps(
         timesteps if isinstance(timesteps, int) else len(timesteps)
     )
 
-    crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1])
+    crps = verification.probscores.CRPS(
+        precip_forecast[:, -1], dataset_obs[precip_var].values[-1]
+    )
     assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}"
 
 
diff --git a/pysteps/tests/test_nowcasts_steps.py b/pysteps/tests/test_nowcasts_steps.py
index 61af86ba5..16d2e4de5 100644
--- a/pysteps/tests/test_nowcasts_steps.py
+++ b/pysteps/tests/test_nowcasts_steps.py
@@ -7,7 +7,6 @@
 from pysteps import io, motion, nowcasts, verification
 from pysteps.tests.helpers import get_precipitation_fields
 
-
 steps_arg_names = (
     "n_ens_members",
     "n_cascade_levels",
@@ -44,29 +43,28 @@ def test_steps_skill(
 ):
     """Tests STEPS nowcast skill."""
     # inputs
-    precip_input, metadata = get_precipitation_fields(
+    dataset_input = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         return_raw=False,
         metadata=True,
         upscale=2000,
     )
-    precip_input = precip_input.filled()
 
-    precip_obs = get_precipitation_fields(
+    dataset_obs = get_precipitation_fields(
         num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000
-    )[1:, :, :]
-    precip_obs = precip_obs.filled()
+    ).isel(time=slice(1, None, None))
+    precip_var = dataset_input.attrs["precip_var"]
+    metadata = dataset_input[precip_var].attrs
 
     pytest.importorskip("cv2")
     oflow_method = motion.get_method("LK")
-    retrieved_motion = oflow_method(precip_input)
+    dataset_w_motion = oflow_method(dataset_input)
 
     nowcast_method = nowcasts.get_method("steps")
 
-    precip_forecast = nowcast_method(
-        precip_input,
-        retrieved_motion,
+    dataset_forecast = nowcast_method(
+        dataset_w_motion,
         timesteps=timesteps,
         precip_thr=metadata["threshold"],
         kmperpixel=2.0,
@@ -79,6 +77,7 @@ def test_steps_skill(
         probmatching_method=probmatching_method,
         domain=domain,
     )
+    precip_forecast = dataset_forecast[precip_var].values
 
     assert precip_forecast.ndim == 4
     assert precip_forecast.shape[0] == n_ens_members
@@ -86,7 +85,9 @@ def test_steps_skill(
         timesteps if isinstance(timesteps, int) else len(timesteps)
     )
 
-    crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1])
+    crps = verification.probscores.CRPS(
+        precip_forecast[:, -1], dataset_obs[precip_var].values[-1]
+    )
     assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}"
 
 
diff --git a/pysteps/tests/test_nowcasts_utils.py b/pysteps/tests/test_nowcasts_utils.py
index 075225427..1dfeb27a9 100644
--- a/pysteps/tests/test_nowcasts_utils.py
+++ b/pysteps/tests/test_nowcasts_utils.py
@@ -26,17 +26,18 @@ def test_nowcast_main_loop(
     timesteps, ensemble, num_ensemble_members, velocity_perturbations
 ):
     """Test the nowcast_main_loop function."""
-    precip = get_precipitation_fields(
+    dataset = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         return_raw=False,
         metadata=False,
         upscale=2000,
     )
-    precip = precip.filled()
 
     oflow_method = motion.get_method("LK")
-    velocity = oflow_method(precip)
+    dataset = oflow_method(dataset)
+    precip = dataset["precip_intensity"].values
+    velocity = np.stack([dataset["velocity_x"].values, dataset["velocity_y"].values])
 
     precip = precip[-1]
 
diff --git a/pysteps/tests/test_utils_conversion.py b/pysteps/tests/test_utils_conversion.py
index 169cdb50e..bdf1fa42f 100644
--- a/pysteps/tests/test_utils_conversion.py
+++ b/pysteps/tests/test_utils_conversion.py
@@ -1,336 +1,927 @@
 # -*- coding: utf-8 -*-
-
 import numpy as np
 import pytest
+import xarray as xr
 from numpy.testing import assert_array_almost_equal
 
+from pysteps.tests.helpers import assert_dataset_equivalent
 from pysteps.utils import conversion
 
 # to_rainrate
-test_data = [
+test_data_to_rainrate = [
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": None,
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([1]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": None,
-            "unit": "mm",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([12]),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([12.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 12.0,
+                        "zerovalue": 12.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "dB",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([1.25892541]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.25892541]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1.25892541,
+                        "zerovalue": 0.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "dB",
-            "unit": "mm",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([15.10710494]),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([15.10710494]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 15.10710494,
+                        "zerovalue": 0.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "dB",
-            "unit": "dBZ",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([0.04210719]),
+        xr.Dataset(
+            data_vars={
+                "reflectivity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "dBZ",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "reflectivity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([0.04210719]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 0.04210719,
+                        "zerovalue": 0.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "log",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([2.71828183]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "log",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([2.71828183]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 2.71828183,
+                        "zerovalue": 0.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1.0]),
-        {
-            "accutime": 5,
-            "transform": "log",
-            "unit": "mm",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([32.61938194]),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": "log",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([32.61938194]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 32.61938194,
+                        "zerovalue": 0.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "sqrt",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([1]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "sqrt",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1.0]),
-        {
-            "accutime": 5,
-            "transform": "sqrt",
-            "unit": "mm",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([12.0]),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": "sqrt",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([12.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 12.0,
+                        "zerovalue": 12.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
 ]
 
 
-@pytest.mark.parametrize("R, metadata, expected", test_data)
-def test_to_rainrate(R, metadata, expected):
+@pytest.mark.parametrize("dataset, expected", test_data_to_rainrate)
+def test_to_rainrate(dataset, expected):
     """Test the to_rainrate."""
-    assert_array_almost_equal(conversion.to_rainrate(R, metadata)[0], expected)
+    actual = conversion.to_rainrate(dataset)
+    assert_dataset_equivalent(actual, expected)
 
 
 # to_raindepth
-test_data = [
-    (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": None,
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([0.08333333]),
-    ),
+test_data_to_raindepth = [
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": None,
-            "unit": "mm",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([1]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([0.08333333]),
+                    {
+                        "units": "mm",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 0.08333333,
+                        "zerovalue": 0.08333333,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "dB",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([0.10491045]),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "dB",
-            "unit": "mm",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([1.25892541]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([0.10491045]),
+                    {
+                        "units": "mm",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 0.10491045,
+                        "zerovalue": 0.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "dB",
-            "unit": "dBZ",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([0.00350893]),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.25892541]),
+                    {
+                        "units": "mm",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1.25892541,
+                        "zerovalue": 0.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "log",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([0.22652349]),
+        xr.Dataset(
+            data_vars={
+                "reflectivity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "dBZ",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "reflectivity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([0.00350893]),
+                    {
+                        "units": "mm",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 0.00350893,
+                        "zerovalue": 0.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
     ),
     (
-        np.array([1.0]),
-        {
-            "accutime": 5,
-            "transform": "log",
-            "unit": "mm",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([2.71828183]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "log",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([0.22652349]),
+                    {
+                        "units": "mm",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 0.22652349,
+                        "zerovalue": 0.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "sqrt",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([0.08333333]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "sqrt",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([0.08333333]),
+                    {
+                        "units": "mm",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 0.08333333,
+                        "zerovalue": 0.08333333,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
     ),
     (
-        np.array([1.0]),
-        {
-            "accutime": 5,
-            "transform": "sqrt",
-            "unit": "mm",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([1.0]),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": "sqrt",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
     ),
 ]
 
 
-@pytest.mark.parametrize("R, metadata, expected", test_data)
-def test_to_raindepth(R, metadata, expected):
+@pytest.mark.parametrize("dataset, expected", test_data_to_raindepth)
+def test_to_raindepth(dataset, expected):
     """Test the to_raindepth."""
-    assert_array_almost_equal(conversion.to_raindepth(R, metadata)[0], expected)
+    actual = conversion.to_raindepth(dataset)
+    assert_dataset_equivalent(actual, expected)
 
 
 # to_reflectivity
-test_data = [
+test_data_to_reflectivity = [
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": None,
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([23.01029996]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "reflectivity": (
+                    ["x"],
+                    np.array([23.01029996]),
+                    {
+                        "units": "dBZ",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 23.01029996,
+                        "zerovalue": 18.01029996,
+                    },
+                )
+            },
+            attrs={"precip_var": "reflectivity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": None,
-            "unit": "mm",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([40.27719989]),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "reflectivity": (
+                    ["x"],
+                    np.array([40.27719989]),
+                    {
+                        "units": "dBZ",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 40.27719989,
+                        "zerovalue": 35.27719989,
+                    },
+                )
+            },
+            attrs={"precip_var": "reflectivity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "dB",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([24.61029996]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "reflectivity": (
+                    ["x"],
+                    np.array([24.61029996]),
+                    {
+                        "units": "dBZ",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 24.61029996,
+                        "zerovalue": 19.61029996,
+                    },
+                )
+            },
+            attrs={"precip_var": "reflectivity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "dB",
-            "unit": "mm",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([41.87719989]),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "reflectivity": (
+                    ["x"],
+                    np.array([41.87719989]),
+                    {
+                        "units": "dBZ",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 41.87719989,
+                        "zerovalue": 36.87719989,
+                    },
+                )
+            },
+            attrs={"precip_var": "reflectivity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "dB",
-            "unit": "dBZ",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([1]),
+        xr.Dataset(
+            data_vars={
+                "reflectivity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "dBZ",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "reflectivity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "reflectivity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "dBZ",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": -4.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "reflectivity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "log",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([29.95901167]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "log",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "reflectivity": (
+                    ["x"],
+                    np.array([29.95901167]),
+                    {
+                        "units": "dBZ",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 29.95901167,
+                        "zerovalue": 24.95901167,
+                    },
+                )
+            },
+            attrs={"precip_var": "reflectivity"},
+        ),
     ),
     (
-        np.array([1.0]),
-        {
-            "accutime": 5,
-            "transform": "log",
-            "unit": "mm",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([47.2259116]),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": "log",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "reflectivity": (
+                    ["x"],
+                    np.array([47.2259116]),
+                    {
+                        "units": "dBZ",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 47.2259116,
+                        "zerovalue": 42.2259116,
+                    },
+                )
+            },
+            attrs={"precip_var": "reflectivity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "sqrt",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([23.01029996]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "sqrt",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "reflectivity": (
+                    ["x"],
+                    np.array([23.01029996]),
+                    {
+                        "units": "dBZ",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 23.01029996,
+                        "zerovalue": 18.01029996,
+                    },
+                )
+            },
+            attrs={"precip_var": "reflectivity"},
+        ),
     ),
     (
-        np.array([1.0]),
-        {
-            "accutime": 5,
-            "transform": "sqrt",
-            "unit": "mm",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
-        np.array([40.27719989]),
+        xr.Dataset(
+            data_vars={
+                "precip_accum": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm",
+                        "transform": "sqrt",
+                        "accutime": 5,
+                        "threshold": 1.0,
+                        "zerovalue": 1.0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_accum"},
+        ),
+        xr.Dataset(
+            data_vars={
+                "reflectivity": (
+                    ["x"],
+                    np.array([40.27719989]),
+                    {
+                        "units": "dBZ",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 40.27719989,
+                        "zerovalue": 35.27719989,
+                    },
+                )
+            },
+            attrs={"precip_var": "reflectivity"},
+        ),
     ),
 ]
 
 
-@pytest.mark.parametrize("R, metadata, expected", test_data)
-def test_to_reflectivity(R, metadata, expected):
+@pytest.mark.parametrize("dataset, expected", test_data_to_reflectivity)
+def test_to_reflectivity(dataset, expected):
     """Test the to_reflectivity."""
-    assert_array_almost_equal(conversion.to_reflectivity(R, metadata)[0], expected)
+    actual = conversion.to_reflectivity(dataset)
+    assert_dataset_equivalent(actual, expected)
diff --git a/pysteps/tests/test_utils_dimension.py b/pysteps/tests/test_utils_dimension.py
index ab753ed7d..038b725a0 100644
--- a/pysteps/tests/test_utils_dimension.py
+++ b/pysteps/tests/test_utils_dimension.py
@@ -4,63 +4,86 @@
 
 import numpy as np
 import pytest
-from numpy.testing import assert_array_equal
+import xarray as xr
+from numpy.testing import assert_array_almost_equal, assert_array_equal
 from pytest import raises
 
 from pysteps.utils import dimension
+from pysteps.xarray_helpers import convert_input_to_xarray_dataset
+
+fillvalues_metadata = {
+    "x1": 0,
+    "x2": 4,
+    "y1": 0,
+    "y2": 4,
+    "zerovalue": 0,
+    "yorigin": "lower",
+    "unit": "mm/h",
+    "transform": None,
+    "accutime": 5,
+    "threshold": 1.0,
+    "projection": "+proj=stere +lat_0=90 +lon_0=0.0 +lat_ts=60.0 +a=6378.137 +b=6356.752 +x_0=0 +y_0=0",
+    "zr_a": 200,
+    "zr_b": 1.6,
+    "cartesian_unit": "km",
+    "institution": "KNMI",
+}
 
 test_data_not_trim = (
-    # "data, window_size, axis, method, expected"
-    (np.arange(6), 2, 0, "mean", np.array([0.5, 2.5, 4.5])),
+    (
+        np.arange(12).reshape(2, 6),
+        2,
+        "x",
+        "mean",
+        np.array([[0.5, 2.5, 4.5], [6.5, 8.5, 10.5]]),
+    ),
     (
         np.arange(4 * 6).reshape(4, 6),
         (2, 3),
-        (0, 1),
+        ("y", "x"),
         "sum",
         np.array([[24, 42], [96, 114]]),
     ),
     (
         np.arange(4 * 6).reshape(4, 6),
         (2, 2),
-        (0, 1),
+        ("y", "x"),
         "sum",
         np.array([[14, 22, 30], [62, 70, 78]]),
     ),
     (
         np.arange(4 * 6).reshape(4, 6),
         2,
-        (0, 1),
+        ("y", "x"),
         "sum",
         np.array([[14, 22, 30], [62, 70, 78]]),
     ),
     (
         np.arange(4 * 6).reshape(4, 6),
         (2, 3),
-        (0, 1),
+        ("y", "x"),
         "mean",
         np.array([[4.0, 7.0], [16.0, 19.0]]),
     ),
     (
         np.arange(4 * 6).reshape(4, 6),
         (2, 2),
-        (0, 1),
+        ("y", "x"),
         "mean",
         np.array([[3.5, 5.5, 7.5], [15.5, 17.5, 19.5]]),
     ),
     (
         np.arange(4 * 6).reshape(4, 6),
         2,
-        (0, 1),
+        ("y", "x"),
         "mean",
         np.array([[3.5, 5.5, 7.5], [15.5, 17.5, 19.5]]),
     ),
 )
 
 
-@pytest.mark.parametrize(
-    "data, window_size, axis, method, expected", test_data_not_trim
-)
-def test_aggregate_fields(data, window_size, axis, method, expected):
+@pytest.mark.parametrize("data, window_size, dim, method, expected", test_data_not_trim)
+def test_aggregate_fields(data, window_size, dim, method, expected):
     """
     Test the aggregate_fields function.
     The windows size must divide exactly the data dimensions.
@@ -68,23 +91,97 @@ def test_aggregate_fields(data, window_size, axis, method, expected):
     windows size does not divide the data dimensions.
     The length of each dimension should be larger than 2.
     """
+    dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata)
 
-    assert_array_equal(
-        dimension.aggregate_fields(data, window_size, axis=axis, method=method),
-        expected,
-    )
+    actual = dimension.aggregate_fields(dataset, window_size, dim=dim, method=method)
+    assert_array_equal(actual["precip_intensity"].values, expected)
 
     # Test the trimming capabilities.
-    data = np.pad(data, (0, 1))
-    assert_array_equal(
-        dimension.aggregate_fields(
-            data, window_size, axis=axis, method=method, trim=True
-        ),
-        expected,
+    if np.ndim(window_size) == 0:
+        data = np.pad(data, ((0, 0), (0, 1)))
+    else:
+        data = np.pad(data, (0, 1))
+    dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata)
+
+    actual = dimension.aggregate_fields(
+        dataset, window_size, dim=dim, method=method, trim=True
     )
+    assert_array_equal(actual["precip_intensity"].values, expected)
 
     with raises(ValueError):
-        dimension.aggregate_fields(data, window_size, axis=axis, method=method)
+        dimension.aggregate_fields(dataset, window_size, dim=dim, method=method)
+
+
+test_data_agg_w_velocity = (
+    (
+        np.arange(12).reshape(2, 6),
+        np.arange(12).reshape(2, 6),
+        np.arange(12).reshape(2, 6),
+        np.arange(0, 1.2, 0.1).reshape(2, 6),
+        2,
+        "x",
+        "mean",
+        "mean",
+        np.array([[0.5, 2.5, 4.5], [6.5, 8.5, 10.5]]),
+        np.array([[0.5, 2.5, 4.5], [6.5, 8.5, 10.5]]),
+        np.array([[0, 0.2, 0.4], [0.6, 0.8, 1]]),
+    ),
+    (
+        np.arange(4 * 6).reshape(4, 6),
+        np.arange(4 * 6).reshape(4, 6),
+        np.arange(4 * 6).reshape(4, 6),
+        np.arange(0, 1.2, 0.05).reshape(4, 6),
+        (2, 3),
+        ("y", "x"),
+        "mean",
+        "sum",
+        np.array([[4, 7], [16, 19]]),
+        np.array([[24, 42], [96, 114]]),
+        np.array([[0, 0.15], [0.6, 0.75]]),
+    ),
+)
+
+
+@pytest.mark.parametrize(
+    "data, data_vx, data_vy, data_qual, window_size, dim, method, velocity_method, expected, expected_v, expected_qual",
+    test_data_agg_w_velocity,
+)
+def test_aggregate_fields_w_velocity(
+    data,
+    data_vx,
+    data_vy,
+    data_qual,
+    window_size,
+    dim,
+    method,
+    velocity_method,
+    expected,
+    expected_v,
+    expected_qual,
+):
+    """
+    Test the aggregate_fields function for dataset with velocity information.
+    The windows size must divide exactly the data dimensions.
+    Internally, additional test are generated for situations where the
+    windows size does not divide the data dimensions.
+    The length of each dimension should be larger than 2.
+    """
+    dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata)
+    dataset = dataset.assign(
+        {
+            "velocity_x": (("y", "x"), data_vx),
+            "velocity_y": (("y", "x"), data_vy),
+            "quality": (("y", "x"), data_qual),
+        }
+    )
+
+    actual = dimension.aggregate_fields(
+        dataset, window_size, dim=dim, method=method, velocity_method=velocity_method
+    )
+    assert_array_equal(actual["precip_intensity"].values, expected)
+    assert_array_equal(actual["velocity_x"].values, expected_v)
+    assert_array_equal(actual["velocity_y"].values, expected_v)
+    assert_array_almost_equal(actual["quality"].values, expected_qual)
 
 
 def test_aggregate_fields_errors():
@@ -93,80 +190,126 @@ def test_aggregate_fields_errors():
     function.
     """
     data = np.arange(4 * 6).reshape(4, 6)
+    dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata)
 
     with raises(ValueError):
-        dimension.aggregate_fields(data, -1, axis=0)
+        dimension.aggregate_fields(dataset, -1, dim="y")
     with raises(ValueError):
-        dimension.aggregate_fields(data, 0, axis=0)
+        dimension.aggregate_fields(dataset, 0, dim="y")
     with raises(ValueError):
-        dimension.aggregate_fields(data, 1, method="invalid")
+        dimension.aggregate_fields(dataset, 1, method="invalid")
 
     with raises(TypeError):
-        dimension.aggregate_fields(data, (1, 1), axis=0)
+        dimension.aggregate_fields(dataset, (1, 1), dim="y")
 
 
 # aggregate_fields_time
-timestamps = [dt.datetime.now() + dt.timedelta(minutes=t) for t in range(10)]
-test_data = [
+now = dt.datetime.now()
+timestamps = [now + dt.timedelta(minutes=t) for t in range(10)]
+test_data_time = [
     (
-        np.ones((10, 1, 1)),
+        np.ones((2, 2)),
         {"unit": "mm/h", "timestamps": timestamps},
         2,
         False,
-        np.ones((5, 1, 1)),
+        np.ones((5, 2, 2)),
     ),
     (
-        np.ones((10, 1, 1)),
+        np.ones((2, 2)),
         {"unit": "mm", "timestamps": timestamps},
         2,
         False,
-        2 * np.ones((5, 1, 1)),
+        2 * np.ones((5, 2, 2)),
     ),
 ]
 
 
 @pytest.mark.parametrize(
-    "R, metadata, time_window_min, ignore_nan, expected", test_data
+    "data, metadata, time_window_min, ignore_nan, expected", test_data_time
 )
-def test_aggregate_fields_time(R, metadata, time_window_min, ignore_nan, expected):
+def test_aggregate_fields_time(data, metadata, time_window_min, ignore_nan, expected):
     """Test the aggregate_fields_time."""
+    dataset_ref = convert_input_to_xarray_dataset(
+        data, None, {**fillvalues_metadata, **metadata}
+    )
+    datasets = []
+    for timestamp in metadata["timestamps"]:
+        dataset_ = dataset_ref.copy(deep=True)
+        dataset_ = dataset_.expand_dims(dim="time", axis=0)
+        dataset_ = dataset_.assign_coords(time=("time", [timestamp]))
+        datasets.append(dataset_)
+
+    dataset = xr.concat(datasets, dim="time")
     assert_array_equal(
-        dimension.aggregate_fields_time(R, metadata, time_window_min, ignore_nan)[0],
+        dimension.aggregate_fields_time(dataset, time_window_min, ignore_nan)[
+            "precip_intensity" if metadata["unit"] == "mm/h" else "precip_accum"
+        ].values,
         expected,
     )
 
 
 # aggregate_fields_space
-test_data = [
+test_data_space = [
     (
-        np.ones((1, 10, 10)),
-        {"unit": "mm/h", "xpixelsize": 1, "ypixelsize": 1},
+        np.ones((10, 10)),
+        {
+            "unit": "mm/h",
+            "x1": 0,
+            "x2": 10,
+            "y1": 0,
+            "y2": 10,
+            "xpixelsize": 1,
+            "ypixelsize": 1,
+        },
         2,
         False,
-        np.ones((1, 5, 5)),
+        np.ones((5, 5)),
     ),
     (
-        np.ones((1, 10, 10)),
-        {"unit": "mm", "xpixelsize": 1, "ypixelsize": 1},
+        np.ones((10, 10)),
+        {
+            "unit": "mm",
+            "x1": 0,
+            "x2": 10,
+            "y1": 0,
+            "y2": 10,
+            "xpixelsize": 1,
+            "ypixelsize": 1,
+        },
         2,
         False,
-        np.ones((1, 5, 5)),
+        np.ones((5, 5)),
     ),
     (
-        np.ones((1, 10, 10)),
-        {"unit": "mm/h", "xpixelsize": 1, "ypixelsize": 2},
-        (2, 4),
+        np.ones((10, 10)),
+        {
+            "unit": "mm/h",
+            "x1": 0,
+            "x2": 10,
+            "y1": 0,
+            "y2": 20,
+            "xpixelsize": 1,
+            "ypixelsize": 2,
+        },
+        (4, 2),
         False,
-        np.ones((1, 5, 5)),
+        np.ones((5, 5)),
     ),
 ]
 
 
-@pytest.mark.parametrize("R, metadata, space_window, ignore_nan, expected", test_data)
-def test_aggregate_fields_space(R, metadata, space_window, ignore_nan, expected):
+@pytest.mark.parametrize(
+    "data, metadata, space_window, ignore_nan, expected", test_data_space
+)
+def test_aggregate_fields_space(data, metadata, space_window, ignore_nan, expected):
     """Test the aggregate_fields_space."""
+    dataset = convert_input_to_xarray_dataset(
+        data, None, {**fillvalues_metadata, **metadata}
+    )
     assert_array_equal(
-        dimension.aggregate_fields_space(R, metadata, space_window, ignore_nan)[0],
+        dimension.aggregate_fields_space(dataset, space_window, ignore_nan)[
+            "precip_intensity" if metadata["unit"] == "mm/h" else "precip_accum"
+        ].values,
         expected,
     )
 
@@ -174,64 +317,42 @@ def test_aggregate_fields_space(R, metadata, space_window, ignore_nan, expected)
 # clip_domain
 R = np.zeros((4, 4))
 R[:2, :] = 1
-test_data = [
+test_data_clip_domain = [
     (
         R,
-        {
-            "x1": 0,
-            "x2": 4,
-            "y1": 0,
-            "y2": 4,
-            "xpixelsize": 1,
-            "ypixelsize": 1,
-            "zerovalue": 0,
-            "yorigin": "upper",
-        },
+        {"yorigin": "lower"},
         None,
         R,
     ),
     (
         R,
-        {
-            "x1": 0,
-            "x2": 4,
-            "y1": 0,
-            "y2": 4,
-            "xpixelsize": 1,
-            "ypixelsize": 1,
-            "zerovalue": 0,
-            "yorigin": "lower",
-        },
+        {"yorigin": "lower"},
         (2, 4, 2, 4),
         np.zeros((2, 2)),
     ),
     (
         R,
-        {
-            "x1": 0,
-            "x2": 4,
-            "y1": 0,
-            "y2": 4,
-            "xpixelsize": 1,
-            "ypixelsize": 1,
-            "zerovalue": 0,
-            "yorigin": "upper",
-        },
+        {"yorigin": "upper"},
         (2, 4, 2, 4),
         np.ones((2, 2)),
     ),
 ]
 
 
-@pytest.mark.parametrize("R, metadata, extent, expected", test_data)
+@pytest.mark.parametrize("R, metadata, extent, expected", test_data_clip_domain)
 def test_clip_domain(R, metadata, extent, expected):
     """Test the clip_domain."""
-    assert_array_equal(dimension.clip_domain(R, metadata, extent)[0], expected)
+    dataset = convert_input_to_xarray_dataset(
+        R, None, {**fillvalues_metadata, **metadata}
+    )
+    assert_array_equal(
+        dimension.clip_domain(dataset, extent)["precip_intensity"].values, expected
+    )
 
 
 # square_domain
 R = np.zeros((4, 2))
-test_data = [
+test_data_square = [
     # square by padding
     (
         R,
@@ -258,7 +379,7 @@ def test_clip_domain(R, metadata, extent, expected):
             "y2": 4,
             "xpixelsize": 1,
             "ypixelsize": 1,
-            "orig_domain": (4, 2),
+            "orig_domain": (np.array([0.5, 1.5, 2.5, 3.5]), np.array([0.5, 1.5])),
             "square_method": "pad",
         },
         "pad",
@@ -275,7 +396,7 @@ def test_clip_domain(R, metadata, extent, expected):
             "y2": 3,
             "xpixelsize": 1,
             "ypixelsize": 1,
-            "orig_domain": (4, 2),
+            "orig_domain": (np.array([0.5, 1.5, 2.5, 3.5]), np.array([0.5, 1.5])),
             "square_method": "crop",
         },
         "crop",
@@ -285,9 +406,70 @@ def test_clip_domain(R, metadata, extent, expected):
 ]
 
 
-@pytest.mark.parametrize("R, metadata, method, inverse, expected", test_data)
-def test_square_domain(R, metadata, method, inverse, expected):
+@pytest.mark.parametrize("data, metadata, method, inverse, expected", test_data_square)
+def test_square_domain(data, metadata, method, inverse, expected):
     """Test the square_domain."""
+    dataset = convert_input_to_xarray_dataset(
+        data, None, {**fillvalues_metadata, **metadata}
+    )
+    if "square_method" in metadata:
+        dataset.attrs["square_method"] = metadata["square_method"]
+    if "orig_domain" in metadata:
+        dataset.attrs["orig_domain"] = metadata["orig_domain"]
+    assert_array_equal(
+        dimension.square_domain(dataset, method, inverse)["precip_intensity"].values,
+        expected,
+    )
+
+
+# square_domain
+R = np.ones((4, 2))
+test_data_square_w_velocity = [
+    # square by padding
+    (
+        R,
+        {"x1": 0, "x2": 2, "y1": 0, "y2": 4, "xpixelsize": 1, "ypixelsize": 1},
+        "pad",
+        False,
+        np.array([[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]),
+        np.array([[0, 1, 1, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 1, 1, 0]]),
+    )
+]
+
+
+@pytest.mark.parametrize(
+    "data, metadata, method, inverse, expected, expected_velqual",
+    test_data_square_w_velocity,
+)
+def test_square_w_velocity(data, metadata, method, inverse, expected, expected_velqual):
+    """Test the square_domain."""
+    dataset = convert_input_to_xarray_dataset(
+        data, None, {**fillvalues_metadata, **metadata}
+    )
+    dataset = dataset.assign(
+        {
+            "velocity_x": (("y", "x"), data),
+            "velocity_y": (("y", "x"), data),
+            "quality": (("y", "x"), data),
+        }
+    )
+    if "square_method" in metadata:
+        dataset.attrs["square_method"] = metadata["square_method"]
+    if "orig_domain" in metadata:
+        dataset.attrs["orig_domain"] = metadata["orig_domain"]
+    assert_array_equal(
+        dimension.square_domain(dataset, method, inverse)["precip_intensity"].values,
+        expected,
+    )
+    assert_array_equal(
+        dimension.square_domain(dataset, method, inverse)["velocity_x"].values,
+        expected_velqual,
+    )
+    assert_array_equal(
+        dimension.square_domain(dataset, method, inverse)["velocity_y"].values,
+        expected_velqual,
+    )
     assert_array_equal(
-        dimension.square_domain(R, metadata, method, inverse)[0], expected
+        dimension.square_domain(dataset, method, inverse)["quality"].values,
+        expected_velqual,
     )
diff --git a/pysteps/tests/test_utils_transformation.py b/pysteps/tests/test_utils_transformation.py
index 101e6b9d5..29e6e639c 100644
--- a/pysteps/tests/test_utils_transformation.py
+++ b/pysteps/tests/test_utils_transformation.py
@@ -1,190 +1,392 @@
 # -*- coding: utf-8 -*-
-
 import numpy as np
 import pytest
-from numpy.testing import assert_array_almost_equal
+import xarray as xr
 
+from pysteps.tests.helpers import assert_dataset_equivalent
 from pysteps.utils import transformation
 
 # boxcox_transform
-test_data = [
+test_data_boxcox_transform = [
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": None,
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": np.e,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
         None,
         None,
         None,
         False,
-        np.array([0]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([0.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "BoxCox",
+                        "accutime": 5,
+                        "threshold": 1,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "BoxCox",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "BoxCox",
+                        "accutime": 5,
+                        "threshold": 1,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
         None,
         None,
         None,
         True,
-        np.array([np.exp(1)]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([np.exp(1.0)]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": np.e,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": None,
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": np.e,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
         1.0,
         None,
         None,
         False,
-        np.array([0]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([np.e - 2]),
+                    {
+                        "units": "mm/h",
+                        "transform": "BoxCox",
+                        "accutime": 5,
+                        "threshold": np.e - 1,
+                        "zerovalue": np.e - 2,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "BoxCox",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([np.e - 2]),
+                    {
+                        "units": "mm/h",
+                        "transform": "BoxCox",
+                        "accutime": 5,
+                        "threshold": np.e - 1,
+                        "zerovalue": np.e - 2,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
         1.0,
         None,
         None,
         True,
-        np.array([2.0]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([0.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": np.e,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
 ]
 
 
 @pytest.mark.parametrize(
-    "R, metadata, Lambda, threshold, zerovalue, inverse, expected", test_data
+    "dataset, Lambda, threshold, zerovalue, inverse, expected",
+    test_data_boxcox_transform,
 )
-def test_boxcox_transform(R, metadata, Lambda, threshold, zerovalue, inverse, expected):
+def test_boxcox_transform(dataset, Lambda, threshold, zerovalue, inverse, expected):
     """Test the boxcox_transform."""
-    assert_array_almost_equal(
-        transformation.boxcox_transform(
-            R, metadata, Lambda, threshold, zerovalue, inverse
-        )[0],
-        expected,
+    actual = transformation.boxcox_transform(
+        dataset, Lambda, threshold, zerovalue, inverse
     )
+    assert_dataset_equivalent(actual, expected)
 
 
 # dB_transform
-test_data = [
+test_data_dB_transform = [
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": None,
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1,
+                        "zerovalue": 1,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
         None,
         None,
         False,
-        np.array([0]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([0.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 0,
+                        "zerovalue": -5,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "dB",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([0.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "dB",
+                        "accutime": 5,
+                        "threshold": 0,
+                        "zerovalue": -5,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
         None,
         None,
         True,
-        np.array([1.25892541]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 1,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
 ]
 
 
 @pytest.mark.parametrize(
-    "R, metadata, threshold, zerovalue, inverse, expected", test_data
+    "dataset, threshold, zerovalue, inverse, expected", test_data_dB_transform
 )
-def test_dB_transform(R, metadata, threshold, zerovalue, inverse, expected):
+def test_dB_transform(dataset, threshold, zerovalue, inverse, expected):
     """Test the dB_transform."""
-    assert_array_almost_equal(
-        transformation.dB_transform(R, metadata, threshold, zerovalue, inverse)[0],
-        expected,
-    )
+    actual = transformation.dB_transform(dataset, threshold, zerovalue, inverse)
+    assert_dataset_equivalent(actual, expected)
 
 
 # NQ_transform
-test_data = [
+test_data_NQ_transform = [
     (
-        np.array([1, 2]),
-        {
-            "accutime": 5,
-            "transform": None,
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0, 2.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 0,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
         False,
-        np.array([-0.4307273, 0.4307273]),
-    )
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([-0.4307273, 0.4307273]),
+                    {
+                        "units": "mm/h",
+                        "transform": "NQT",
+                        "accutime": 5,
+                        "threshold": 0.4307273,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
+    ),
 ]
 
 
-@pytest.mark.parametrize("R, metadata, inverse, expected", test_data)
-def test_NQ_transform(R, metadata, inverse, expected):
+@pytest.mark.parametrize("dataset, inverse, expected", test_data_NQ_transform)
+def test_NQ_transform(dataset, inverse, expected):
     """Test the NQ_transform."""
-    assert_array_almost_equal(
-        transformation.NQ_transform(R, metadata, inverse)[0], expected
-    )
+    actual = transformation.NQ_transform(dataset, inverse)
+    assert_dataset_equivalent(actual, expected)
 
 
 # sqrt_transform
-test_data = [
+test_data_sqrt_transform = [
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": None,
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0, 4.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 4,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
         False,
-        np.array([1]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0, 2.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "sqrt",
+                        "accutime": 5,
+                        "threshold": 2,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
     (
-        np.array([1]),
-        {
-            "accutime": 5,
-            "transform": "sqrt",
-            "unit": "mm/h",
-            "threshold": 0,
-            "zerovalue": 0,
-        },
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0, 2.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": "sqrt",
+                        "accutime": 5,
+                        "threshold": 2,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
         True,
-        np.array([1]),
+        xr.Dataset(
+            data_vars={
+                "precip_intensity": (
+                    ["x"],
+                    np.array([1.0, 4.0]),
+                    {
+                        "units": "mm/h",
+                        "transform": None,
+                        "accutime": 5,
+                        "threshold": 4,
+                        "zerovalue": 0,
+                    },
+                )
+            },
+            attrs={"precip_var": "precip_intensity"},
+        ),
     ),
 ]
 
 
-@pytest.mark.parametrize("R, metadata, inverse, expected", test_data)
-def test_sqrt_transform(R, metadata, inverse, expected):
+@pytest.mark.parametrize("dataset, inverse, expected", test_data_sqrt_transform)
+def test_sqrt_transform(dataset, inverse, expected):
     """Test the sqrt_transform."""
-    assert_array_almost_equal(
-        transformation.sqrt_transform(R, metadata, inverse)[0], expected
-    )
+    actual = transformation.sqrt_transform(dataset, inverse)
+    assert_dataset_equivalent(actual, expected)
diff --git a/pysteps/utils/conversion.py b/pysteps/utils/conversion.py
index f8dfae23b..2ea6a3a12 100644
--- a/pysteps/utils/conversion.py
+++ b/pysteps/utils/conversion.py
@@ -14,6 +14,9 @@
 """
 
 import warnings
+
+import xarray as xr
+
 from . import transformation
 
 # TODO: This should not be done. Instead fix the code so that it doesn't
@@ -22,17 +25,53 @@
 warnings.filterwarnings("ignore", category=RuntimeWarning)
 
 
-def to_rainrate(R, metadata, zr_a=None, zr_b=None):
+def cf_parameters_from_unit(unit: str) -> tuple[str, dict[str, str | None]]:
+    if unit == "mm/h":
+        var_name = "precip_intensity"
+        var_standard_name = None
+        var_long_name = "instantaneous precipitation rate"
+        var_unit = "mm/h"
+    elif unit == "mm":
+        var_name = "precip_accum"
+        var_standard_name = None
+        var_long_name = "accumulated precipitation"
+        var_unit = "mm"
+    elif unit == "dBZ":
+        var_name = "reflectivity"
+        var_long_name = "equivalent reflectivity factor"
+        var_standard_name = "equivalent_reflectivity_factor"
+        var_unit = "dBZ"
+    else:
+        raise ValueError(f"unknown unit {unit}")
+
+    return var_name, {
+        "standard_name": var_standard_name,
+        "long_name": var_long_name,
+        "units": var_unit,
+    }
+
+
+def _change_unit(dataset: xr.Dataset, precip_var: str, new_unit: str) -> xr.Dataset:
+    new_var, new_attrs = cf_parameters_from_unit(new_unit)
+    dataset = dataset.rename_vars({precip_var: new_var})
+    dataset.attrs["precip_var"] = new_var
+
+    dataset[new_var].attrs = {
+        **dataset[new_var].attrs,
+        **new_attrs,
+    }
+
+    return dataset
+
+
+def to_rainrate(dataset: xr.Dataset, zr_a=None, zr_b=None):
     """
     Convert to rain rate [mm/h].
 
     Parameters
     ----------
-    R: array-like
-        Array of any shape to be (back-)transformed.
-    metadata: dict
-        Metadata dictionary containing the accutime, transform, unit, threshold
-        and zerovalue attributes as described in the documentation of
+    dataset: xarray.Dataset
+        Dataset to be (back-)transformed as described in the documentation of
         :py:mod:`pysteps.io.importers`.
 
         Additionally, in case of conversion to/from reflectivity units, the
@@ -45,46 +84,49 @@ def to_rainrate(R, metadata, zr_a=None, zr_b=None):
 
     Returns
     -------
-    R: array-like
-        Array of any shape containing the converted units.
-    metadata: dict
-        The metadata with updated attributes.
+    dataset: xarray.Dataset
+        Dataset containing the converted units.
     """
 
-    R = R.copy()
-    metadata = metadata.copy()
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
 
     if metadata["transform"] is not None:
         if metadata["transform"] == "dB":
-            R, metadata = transformation.dB_transform(R, metadata, inverse=True)
+            dataset = transformation.dB_transform(dataset, inverse=True)
 
         elif metadata["transform"] in ["BoxCox", "log"]:
-            R, metadata = transformation.boxcox_transform(R, metadata, inverse=True)
+            dataset = transformation.boxcox_transform(dataset, inverse=True)
 
         elif metadata["transform"] == "NQT":
-            R, metadata = transformation.NQ_transform(R, metadata, inverse=True)
+            dataset = transformation.NQ_transform(dataset, inverse=True)
 
         elif metadata["transform"] == "sqrt":
-            R, metadata = transformation.sqrt_transform(R, metadata, inverse=True)
+            dataset = transformation.sqrt_transform(dataset, inverse=True)
 
         else:
-            raise ValueError("Unknown transformation %s" % metadata["transform"])
+            raise ValueError(f'Unknown transformation {metadata["transform"]}')
+
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
+    precip_data = dataset[precip_var].values
 
-    if metadata["unit"] == "mm/h":
+    if metadata["units"] == "mm/h":
         pass
 
-    elif metadata["unit"] == "mm":
+    elif metadata["units"] == "mm":
         threshold = metadata["threshold"]  # convert the threshold, too
         zerovalue = metadata["zerovalue"]  # convert the zerovalue, too
 
-        R = R / float(metadata["accutime"]) * 60.0
+        precip_data = precip_data / float(metadata["accutime"]) * 60.0
         threshold = threshold / float(metadata["accutime"]) * 60.0
         zerovalue = zerovalue / float(metadata["accutime"]) * 60.0
 
         metadata["threshold"] = threshold
         metadata["zerovalue"] = zerovalue
 
-    elif metadata["unit"] == "dBZ":
+    elif metadata["units"] == "dBZ":
         threshold = metadata["threshold"]  # convert the threshold, too
         zerovalue = metadata["zerovalue"]  # convert the zerovalue, too
 
@@ -93,7 +135,7 @@ def to_rainrate(R, metadata, zr_a=None, zr_b=None):
             zr_a = metadata.get("zr_a", 200.0)  # default to Marshall–Palmer
         if zr_b is None:
             zr_b = metadata.get("zr_b", 1.6)  # default to Marshall–Palmer
-        R = (R / zr_a) ** (1.0 / zr_b)
+        precip_data = (precip_data / zr_a) ** (1.0 / zr_b)
         threshold = (threshold / zr_a) ** (1.0 / zr_b)
         zerovalue = (zerovalue / zr_a) ** (1.0 / zr_b)
 
@@ -104,26 +146,22 @@ def to_rainrate(R, metadata, zr_a=None, zr_b=None):
 
     else:
         raise ValueError(
-            "Cannot convert unit %s and transform %s to mm/h"
-            % (metadata["unit"], metadata["transform"])
+            f'Cannot convert unit {metadata["units"]} and transform {metadata["transform"]} to mm/h'
         )
 
-    metadata["unit"] = "mm/h"
+    dataset[precip_var].data[:] = precip_data
+    dataset = _change_unit(dataset, precip_var, "mm/h")
+    return dataset
 
-    return R, metadata
 
-
-def to_raindepth(R, metadata, zr_a=None, zr_b=None):
+def to_raindepth(dataset: xr.Dataset, zr_a=None, zr_b=None):
     """
     Convert to rain depth [mm].
 
     Parameters
     ----------
-    R: array-like
-        Array of any shape to be (back-)transformed.
-    metadata: dict
-        Metadata dictionary containing the accutime, transform, unit, threshold
-        and zerovalue attributes as described in the documentation of
+    dataset: xarray.Dataset
+        Dataset to be (back-)transformed as described in the documentation of
         :py:mod:`pysteps.io.importers`.
 
         Additionally, in case of conversion to/from reflectivity units, the
@@ -136,46 +174,49 @@ def to_raindepth(R, metadata, zr_a=None, zr_b=None):
 
     Returns
     -------
-    R: array-like
-        Array of any shape containing the converted units.
-    metadata: dict
-        The metadata with updated attributes.
+    dataset: xarray.Dataset
+        Dataset containing the converted units.
     """
 
-    R = R.copy()
-    metadata = metadata.copy()
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
 
     if metadata["transform"] is not None:
         if metadata["transform"] == "dB":
-            R, metadata = transformation.dB_transform(R, metadata, inverse=True)
+            dataset = transformation.dB_transform(dataset, inverse=True)
 
         elif metadata["transform"] in ["BoxCox", "log"]:
-            R, metadata = transformation.boxcox_transform(R, metadata, inverse=True)
+            dataset = transformation.boxcox_transform(dataset, inverse=True)
 
         elif metadata["transform"] == "NQT":
-            R, metadata = transformation.NQ_transform(R, metadata, inverse=True)
+            dataset = transformation.NQ_transform(dataset, inverse=True)
 
         elif metadata["transform"] == "sqrt":
-            R, metadata = transformation.sqrt_transform(R, metadata, inverse=True)
+            dataset = transformation.sqrt_transform(dataset, inverse=True)
 
         else:
-            raise ValueError("Unknown transformation %s" % metadata["transform"])
+            raise ValueError(f'Unknown transformation {metadata["transform"]}')
+
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
+    precip_data = dataset[precip_var].values
 
-    if metadata["unit"] == "mm" and metadata["transform"] is None:
+    if metadata["units"] == "mm" and metadata["transform"] is None:
         pass
 
-    elif metadata["unit"] == "mm/h":
+    elif metadata["units"] == "mm/h":
         threshold = metadata["threshold"]  # convert the threshold, too
         zerovalue = metadata["zerovalue"]  # convert the zerovalue, too
 
-        R = R / 60.0 * metadata["accutime"]
+        precip_data = precip_data / 60.0 * metadata["accutime"]
         threshold = threshold / 60.0 * metadata["accutime"]
         zerovalue = zerovalue / 60.0 * metadata["accutime"]
 
         metadata["threshold"] = threshold
         metadata["zerovalue"] = zerovalue
 
-    elif metadata["unit"] == "dBZ":
+    elif metadata["units"] == "dBZ":
         threshold = metadata["threshold"]  # convert the threshold, too
         zerovalue = metadata["zerovalue"]  # convert the zerovalue, too
 
@@ -184,7 +225,7 @@ def to_raindepth(R, metadata, zr_a=None, zr_b=None):
             zr_a = metadata.get("zr_a", 200.0)  # Default to Marshall–Palmer
         if zr_b is None:
             zr_b = metadata.get("zr_b", 1.6)  # Default to Marshall–Palmer
-        R = (R / zr_a) ** (1.0 / zr_b) / 60.0 * metadata["accutime"]
+        precip_data = (precip_data / zr_a) ** (1.0 / zr_b) / 60.0 * metadata["accutime"]
         threshold = (threshold / zr_a) ** (1.0 / zr_b) / 60.0 * metadata["accutime"]
         zerovalue = (zerovalue / zr_a) ** (1.0 / zr_b) / 60.0 * metadata["accutime"]
 
@@ -195,26 +236,22 @@ def to_raindepth(R, metadata, zr_a=None, zr_b=None):
 
     else:
         raise ValueError(
-            "Cannot convert unit %s and transform %s to mm"
-            % (metadata["unit"], metadata["transform"])
+            f'Cannot convert unit {metadata["units"]} and transform {metadata["transform"]} to mm'
         )
 
-    metadata["unit"] = "mm"
-
-    return R, metadata
+    dataset[precip_var].data[:] = precip_data
+    dataset = _change_unit(dataset, precip_var, "mm")
+    return dataset
 
 
-def to_reflectivity(R, metadata, zr_a=None, zr_b=None):
+def to_reflectivity(dataset: xr.Dataset, zr_a=None, zr_b=None):
     """
     Convert to reflectivity [dBZ].
 
     Parameters
     ----------
-    R: array-like
-        Array of any shape to be (back-)transformed.
-    metadata: dict
-        Metadata dictionary containing the accutime, transform, unit, threshold
-        and zerovalue attributes as described in the documentation of
+    dataset: xarray.Dataset
+        Dataset to be (back-)transformed as described in the documentation of
         :py:mod:`pysteps.io.importers`.
 
         Additionally, in case of conversion to/from reflectivity units, the
@@ -227,73 +264,82 @@ def to_reflectivity(R, metadata, zr_a=None, zr_b=None):
 
     Returns
     -------
-    R: array-like
-        Array of any shape containing the converted units.
-    metadata: dict
-        The metadata with updated attributes.
+    dataset: xarray.Dataset
+        Dataset containing the converted units.
     """
 
-    R = R.copy()
-    metadata = metadata.copy()
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
 
     if metadata["transform"] is not None:
         if metadata["transform"] == "dB":
-            R, metadata = transformation.dB_transform(R, metadata, inverse=True)
+            dataset = transformation.dB_transform(dataset, inverse=True)
 
         elif metadata["transform"] in ["BoxCox", "log"]:
-            R, metadata = transformation.boxcox_transform(R, metadata, inverse=True)
+            dataset = transformation.boxcox_transform(dataset, inverse=True)
 
         elif metadata["transform"] == "NQT":
-            R, metadata = transformation.NQ_transform(R, metadata, inverse=True)
+            dataset = transformation.NQ_transform(dataset, inverse=True)
 
         elif metadata["transform"] == "sqrt":
-            R, metadata = transformation.sqrt_transform(R, metadata, inverse=True)
+            dataset = transformation.sqrt_transform(dataset, inverse=True)
 
         else:
-            raise ValueError("Unknown transformation %s" % metadata["transform"])
+            raise ValueError(f'Unknown transformation {metadata["transform"]}')
 
-    if metadata["unit"] == "mm/h":
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
+    precip_data = dataset[precip_var].values
+
+    if metadata["units"] == "mm/h":
         # Z to R
         if zr_a is None:
             zr_a = metadata.get("zr_a", 200.0)  # Default to Marshall–Palmer
         if zr_b is None:
             zr_b = metadata.get("zr_b", 1.6)  # Default to Marshall–Palmer
 
-        R = zr_a * R**zr_b
+        precip_data = zr_a * precip_data**zr_b
         metadata["threshold"] = zr_a * metadata["threshold"] ** zr_b
         metadata["zerovalue"] = zr_a * metadata["zerovalue"] ** zr_b
         metadata["zr_a"] = zr_a
         metadata["zr_b"] = zr_b
 
-        # Z to dBZ
-        R, metadata = transformation.dB_transform(R, metadata)
-
-    elif metadata["unit"] == "mm":
+    elif metadata["units"] == "mm":
         # depth to rate
-        R, metadata = to_rainrate(R, metadata)
+        dataset = to_rainrate(dataset)
+
+        precip_var = dataset.attrs["precip_var"]
+        metadata = dataset[precip_var].attrs
+        precip_data = dataset[precip_var].values
 
         # Z to R
         if zr_a is None:
             zr_a = metadata.get("zr_a", 200.0)  # Default to Marshall-Palmer
         if zr_b is None:
             zr_b = metadata.get("zr_b", 1.6)  # Default to Marshall-Palmer
-        R = zr_a * R**zr_b
+        precip_data = zr_a * precip_data**zr_b
         metadata["threshold"] = zr_a * metadata["threshold"] ** zr_b
         metadata["zerovalue"] = zr_a * metadata["zerovalue"] ** zr_b
         metadata["zr_a"] = zr_a
         metadata["zr_b"] = zr_b
 
-        # Z to dBZ
-        R, metadata = transformation.dB_transform(R, metadata)
-
-    elif metadata["unit"] == "dBZ":
-        # Z to dBZ
-        R, metadata = transformation.dB_transform(R, metadata)
+    elif metadata["units"] == "dBZ":
+        pass
 
     else:
         raise ValueError(
-            "Cannot convert unit %s and transform %s to mm/h"
-            % (metadata["unit"], metadata["transform"])
+            f'Cannot convert unit {metadata["units"]} and transform {metadata["transform"]} to dBZ'
         )
-    metadata["unit"] = "dBZ"
-    return R, metadata
+
+    dataset[precip_var].data[:] = precip_data
+    # Z to dBZ
+    dataset = transformation.dB_transform(dataset)
+
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
+    precip_data = dataset[precip_var].values
+
+    dataset[precip_var].data[:] = precip_data
+    dataset = _change_unit(dataset, precip_var, "dBZ")
+    return dataset
diff --git a/pysteps/utils/dimension.py b/pysteps/utils/dimension.py
index 43b7e2ca5..d039d8ef0 100644
--- a/pysteps/utils/dimension.py
+++ b/pysteps/utils/dimension.py
@@ -14,26 +14,43 @@
     clip_domain
     square_domain
 """
+from typing import Any, Callable
 
 import numpy as np
+import xarray as xr
 
-_aggregation_methods = dict(
-    sum=np.sum, mean=np.mean, nanmean=np.nanmean, nansum=np.nansum
-)
+from pysteps.xarray_helpers import compute_lat_lon
 
+_aggregation_methods: dict[str, Callable[..., Any]] = {
+    "sum": np.sum,
+    "mean": np.mean,
+    "min": np.min,
+    "max": np.max,
+    "nanmean": np.nanmean,
+    "nansum": np.nansum,
+    "nanmin": np.nanmin,
+    "nanmax": np.nanmax,
+}
 
-def aggregate_fields_time(R, metadata, time_window_min, ignore_nan=False):
+
+def aggregate_fields_time(
+    dataset: xr.Dataset, time_window_min, ignore_nan=False
+) -> xr.Dataset:
     """Aggregate fields in time.
 
+    It attempts to aggregate the given dataset in the time direction in an integer
+    number of sections of length = ``time_window_min``.
+    If such a aggregation is not possible, an error is raised.
+    The data is aggregated by a method chosen based on the unit of the precipitation
+    data in the dataset. ``mean`` is used when the unit is ``mm/h`` and ``sum``
+    is used when the unit is ``mm``. For other units an error is raised.
+
     Parameters
     ----------
-    R: array-like
-        Array of shape (t,m,n) or (l,t,m,n) containing
-        a time series of (ensemble) input fields.
+    dataset: xarray.Dataset
+        Dataset containing a time series of (ensemble) input fields
+        as described in the documentation of :py:mod:`pysteps.io.importers`.
         They must be evenly spaced in time.
-    metadata: dict
-        Metadata dictionary containing the timestamps and unit attributes as
-        described in the documentation of :py:mod:`pysteps.io.importers`.
     time_window_min: float or None
         The length in minutes of the time window that is used to
         aggregate the fields.
@@ -45,12 +62,8 @@ def aggregate_fields_time(R, metadata, time_window_min, ignore_nan=False):
 
     Returns
     -------
-    outputarray: array-like
-        The new array of aggregated fields of shape (k,m,n) or (l,k,m,n), where
-        k = t*delta/time_window_min and delta is the time interval between two
-        successive timestamps.
-    metadata: dict
-        The metadata with updated attributes.
+    dataset: xarray.Dataset
+        The new dataset.
 
     See also
     --------
@@ -58,40 +71,24 @@ def aggregate_fields_time(R, metadata, time_window_min, ignore_nan=False):
     pysteps.utils.dimension.aggregate_fields
     """
 
-    R = R.copy()
-    metadata = metadata.copy()
-
     if time_window_min is None:
-        return R, metadata
-
-    unit = metadata["unit"]
-    timestamps = metadata["timestamps"]
-    if "leadtimes" in metadata:
-        leadtimes = metadata["leadtimes"]
-
-    if len(R.shape) < 3:
-        raise ValueError("The number of dimension must be > 2")
-    if len(R.shape) == 3:
-        axis = 0
-    if len(R.shape) == 4:
-        axis = 1
-    if len(R.shape) > 4:
-        raise ValueError("The number of dimension must be <= 4")
-
-    if R.shape[axis] != len(timestamps):
-        raise ValueError(
-            "The list of timestamps has length %i, " % len(timestamps)
-            + "but R contains %i frames" % R.shape[axis]
-        )
+        return dataset
+
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
+
+    unit = metadata["units"]
+
+    timestamps = dataset["time"].values
 
     # assumes that frames are evenly spaced
-    delta = (timestamps[1] - timestamps[0]).seconds / 60
+    delta = (timestamps[1] - timestamps[0]) / np.timedelta64(1, "m")
     if delta == time_window_min:
-        return R, metadata
-    if (R.shape[axis] * delta) % time_window_min:
-        raise ValueError("time_window_size does not equally split R")
+        return dataset
+    if time_window_min % delta:
+        raise ValueError("time_window_size does not equally split dataset")
 
-    nframes = int(time_window_min / delta)
+    window_size = int(time_window_min / delta)
 
     # specify the operator to be used to aggregate
     # the values within the time window
@@ -100,55 +97,49 @@ def aggregate_fields_time(R, metadata, time_window_min, ignore_nan=False):
     elif unit == "mm":
         method = "sum"
     else:
-        raise ValueError(
-            "can only aggregate units of 'mm/h' or 'mm'" + " not %s" % unit
-        )
+        raise ValueError(f"can only aggregate units of 'mm/h' or 'mm' not {unit}")
 
     if ignore_nan:
         method = "".join(("nan", method))
 
-    R = aggregate_fields(R, nframes, axis=axis, method=method)
-
-    metadata["accutime"] = time_window_min
-    metadata["timestamps"] = timestamps[nframes - 1 :: nframes]
-    if "leadtimes" in metadata:
-        metadata["leadtimes"] = leadtimes[nframes - 1 :: nframes]
-
-    return R, metadata
+    return aggregate_fields(
+        dataset, window_size, dim="time", method=method, velocity_method="sum"
+    )
 
 
-def aggregate_fields_space(R, metadata, space_window, ignore_nan=False):
+def aggregate_fields_space(
+    dataset: xr.Dataset, space_window, ignore_nan=False
+) -> xr.Dataset:
     """
     Upscale fields in space.
 
+    It attempts to aggregate the given dataset in y and x direction in an integer
+    number of sections of length = ``(window_size_y, window_size_x)``.
+    If such a aggregation is not possible, an error is raised.
+    The data is aggregated by computing the mean. Only datasets with precipitation
+    data in the ``mm`` or ``mm/h`` unit are currently supported.
+
     Parameters
     ----------
-    R: array-like
-        Array of shape (m,n), (t,m,n) or (l,t,m,n) containing a single field or
-        a time series of (ensemble) input fields.
-    metadata: dict
-        Metadata dictionary containing the xpixelsize, ypixelsize and unit
-        attributes as described in the documentation of
+    dataset: xarray.Dataset
+        Dataset containing a single field or
+        a time series of (ensemble) input fields as described in the documentation of
         :py:mod:`pysteps.io.importers`.
     space_window: float, tuple or None
         The length of the space window that is used to upscale the fields.
         If a float is given, the same window size is used for the x- and
         y-directions. Separate window sizes are used for x- and y-directions if
-        a two-element tuple is given. The space_window unit is the same used in
-        the geographical projection of R and hence the same as for the xpixelsize
-        and ypixelsize attributes. The space spanned by the n- and m-dimensions
-        of R must be a multiple of space_window. If set to None, the function
-        returns a copy of the original R and metadata.
+        a two-element tuple is given (y, x). The space_window unit is the same
+        as the unit of x and y in the input dataset. The space spanned by the
+        n- and m-dimensions of the dataset content must be a multiple of space_window.
+        If set to None, the function returns a copy of the original dataset.
     ignore_nan: bool, optional
         If True, ignore nan values.
 
     Returns
     -------
-    outputarray: array-like
-        The new array of aggregated fields of shape (k,j), (t,k,j) or (l,t,k,j),
-        where k = m*ypixelsize/space_window[1] and j = n*xpixelsize/space_window[0].
-    metadata: dict
-        The metadata with updated attributes.
+    dataset: xarray.Dataset
+        The new dataset.
 
     See also
     --------
@@ -156,110 +147,93 @@ def aggregate_fields_space(R, metadata, space_window, ignore_nan=False):
     pysteps.utils.dimension.aggregate_fields
     """
 
-    R = R.copy()
-    metadata = metadata.copy()
-
     if space_window is None:
-        return R, metadata
-
-    unit = metadata["unit"]
-    ypixelsize = metadata["ypixelsize"]
-    xpixelsize = metadata["xpixelsize"]
-
-    if len(R.shape) < 2:
-        raise ValueError("The number of dimensions must be >= 2")
-    if len(R.shape) == 2:
-        axes = [0, 1]
-    if len(R.shape) == 3:
-        axes = [1, 2]
-    if len(R.shape) == 4:
-        axes = [2, 3]
-    if len(R.shape) > 4:
-        raise ValueError("The number of dimensions must be <= 4")
+        return dataset
 
-    if np.isscalar(space_window):
-        space_window = (space_window, space_window)
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
 
-    # assumes that frames are evenly spaced
-    if ypixelsize == space_window[1] and xpixelsize == space_window[0]:
-        return R, metadata
+    unit = metadata["units"]
 
-    ysize = R.shape[axes[0]] * ypixelsize
-    xsize = R.shape[axes[1]] * xpixelsize
+    if np.isscalar(space_window):
+        space_window = (space_window, space_window)
 
-    if (
-        abs(ysize / space_window[1] - round(ysize / space_window[1])) > 1e-10
-        or abs(xsize / space_window[0] - round(xsize / space_window[0])) > 1e-10
-    ):
-        raise ValueError("space_window does not equally split R")
+    ydelta = dataset["y"].attrs["stepsize"]
+    xdelta = dataset["x"].attrs["stepsize"]
 
-    nframes = [int(space_window[1] / ypixelsize), int(space_window[0] / xpixelsize)]
+    if space_window[0] % ydelta > 1e-10 or space_window[1] % xdelta > 1e-10:
+        raise ValueError("space_window does not equally split dataset")
 
     # specify the operator to be used to aggregate the values
     # within the space window
     if unit == "mm/h" or unit == "mm":
         method = "mean"
     else:
-        raise ValueError(
-            "can only aggregate units of 'mm/h' or 'mm' " + "not %s" % unit
-        )
+        raise ValueError(f"can only aggregate units of 'mm/h' or 'mm' not {unit}")
 
     if ignore_nan:
         method = "".join(("nan", method))
 
-    R = aggregate_fields(R, nframes[0], axis=axes[0], method=method)
-    R = aggregate_fields(R, nframes[1], axis=axes[1], method=method)
+    window_size = (int(space_window[0] / ydelta), int(space_window[1] / xdelta))
 
-    metadata["ypixelsize"] = space_window[1]
-    metadata["xpixelsize"] = space_window[0]
+    return aggregate_fields(dataset, window_size, ["y", "x"], method, "mean")
 
-    return R, metadata
 
-
-def aggregate_fields(data, window_size, axis=0, method="mean", trim=False):
+def aggregate_fields(
+    dataset: xr.Dataset,
+    window_size,
+    dim="x",
+    method="mean",
+    velocity_method="mean",
+    trim=False,
+) -> xr.Dataset:
     """Aggregate fields along a given direction.
 
-    It attempts to aggregate the given R axis in an integer number of sections
+    It attempts to aggregate the given dataset dim in an integer number of sections
     of length = ``window_size``.
     If such a aggregation is not possible, an error is raised unless ``trim``
-    set to True, in which case the axis is trimmed (from the end)
+    set to True, in which case the dim is trimmed (from the end)
     to make it perfectly divisible".
 
     Parameters
     ----------
-    data: array-like
-        Array of any shape containing the input fields.
-    window_size: int or tuple of ints
+    dataset: xarray.Dataset
+        Dataset containing the input fields as described in the documentation of
+        :py:mod:`pysteps.io.importers`.
+    window_size: int or array-like of ints
         The length of the window that is used to aggregate the fields.
         If a single integer value is given, the same window is used for
-        all the selected axis.
+        all the selected dim.
 
         If ``window_size`` is a 1D array-like,
         each element indicates the length of the window that is used
-        to aggregate the fields along each axis. In this case,
+        to aggregate the fields along each dim. In this case,
         the number of elements of 'window_size' must be the same as the elements
-        in the ``axis`` argument.
-    axis: int or array-like of ints
-        Axis or axes where to perform the aggregation.
-        If this is a tuple of ints, the aggregation is performed over multiple
-        axes, instead of a single axis
+        in the ``dim`` argument.
+    dim: str or array-like of strs
+        Dim or dims where to perform the aggregation.
+        If this is an array-like of strs, the aggregation is performed over multiple
+        dims, instead of a single dim
     method: string, optional
         Optional argument that specifies the operation to use
-        to aggregate the values within the window.
+        to aggregate the precipitation values within the window.
+        Default to mean operator.
+    velocity_method: string, optional
+        Optional argument that specifies the operation to use
+        to aggregate the velocity values within the window.
         Default to mean operator.
     trim: bool
          In case that the ``data`` is not perfectly divisible by
-         ``window_size`` along the selected axis:
+         ``window_size`` along the selected dim:
 
          - trim=True: the data will be trimmed (from the end) along that
-           axis to make it perfectly divisible.
+           dim to make it perfectly divisible.
          - trim=False: a ValueError exception is raised.
 
     Returns
     -------
-    new_array: array-like
-        The new aggregated array with shape[axis] = k,
-        where k = R.shape[axis] / window_size.
+    dataset: xarray.Dataset
+        The new dataset.
 
     See also
     --------
@@ -267,90 +241,103 @@ def aggregate_fields(data, window_size, axis=0, method="mean", trim=False):
     pysteps.utils.dimension.aggregate_fields_space
     """
 
-    if np.ndim(axis) > 1:
+    if np.ndim(dim) > 1:
         raise TypeError(
             "Only integers or integer 1D arrays can be used for the " "'axis' argument."
         )
 
-    if np.ndim(axis) == 1:
-        axis = np.asarray(axis)
-        if np.ndim(window_size) == 0:
-            window_size = (window_size,) * axis.size
+    if np.ndim(dim) == 0:
+        dim = [dim]
 
-        window_size = np.asarray(window_size, dtype="int")
-
-        if window_size.shape != axis.shape:
-            raise ValueError(
-                "The 'window_size' and 'axis' shapes are incompatible."
-                f"window_size.shape: {str(window_size.shape)}, "
-                f"axis.shape: {str(axis.shape)}, "
-            )
-
-        new_data = data.copy()
-        for i in range(axis.size):
-            # Recursively call the aggregate_fields function
-            new_data = aggregate_fields(
-                new_data, window_size[i], axis=axis[i], method=method, trim=trim
-            )
-
-        return new_data
-
-    if np.ndim(window_size) != 0:
-        raise TypeError(
-            "A single axis was selected for the aggregation but several"
-            f"of window_sizes were given: {str(window_size)}."
-        )
+    if np.ndim(window_size) == 0:
+        window_size = [window_size for _ in dim]
 
-    data = np.asarray(data).copy()
-    orig_shape = data.shape
+    if len(window_size) != len(dim):
+        raise TypeError("The length of window size does not to match the length of dim")
 
     if method not in _aggregation_methods:
         raise ValueError(
             "Aggregation method not recognized. "
             f"Available methods: {str(list(_aggregation_methods.keys()))}"
         )
+    for ws in window_size:
+        if ws <= 0:
+            raise ValueError("'window_size' must be strictly positive")
 
-    if window_size <= 0:
-        raise ValueError("'window_size' must be strictly positive")
+    for d, ws in zip(dim, window_size):
+        if (dataset.sizes[d] % ws) and (not trim):
+            raise ValueError(
+                f"Since 'trim' argument was set to False,"
+                f"the 'window_size' {ws} must exactly divide"
+                f"the dimension along the selected axis:"
+                f"dataset.sizes[dim]={dataset.sizes[d]}"
+            )
 
-    if (orig_shape[axis] % window_size) and (not trim):
-        raise ValueError(
-            f"Since 'trim' argument was set to False,"
-            f"the 'window_size' {window_size} must exactly divide"
-            f"the dimension along the selected axis:"
-            f"data.shape[axis]={orig_shape[axis]}"
+    dataset_ref = dataset
+    dataset = (
+        dataset.rolling(dict(zip(dim, window_size)))
+        .reduce(_aggregation_methods[method])
+        .isel(
+            {
+                d: slice(ws - 1, dataset.sizes[d] - dataset.sizes[d] % ws, ws)
+                for d, ws in zip(dim, window_size)
+            }
+        )
+    )
+    if "velocity_x" in dataset_ref:
+        dataset["velocity_x"] = (
+            dataset_ref["velocity_x"]
+            .rolling(dict(zip(dim, window_size)))
+            .reduce(_aggregation_methods[velocity_method])
+            .isel(
+                {
+                    d: slice(
+                        ws - 1, dataset_ref.sizes[d] - dataset_ref.sizes[d] % ws, ws
+                    )
+                    for d, ws in zip(dim, window_size)
+                }
+            )
+        )
+    if "velocity_y" in dataset_ref:
+        dataset["velocity_y"] = (
+            dataset_ref["velocity_y"]
+            .rolling(dict(zip(dim, window_size)))
+            .reduce(_aggregation_methods[velocity_method])
+            .isel(
+                {
+                    d: slice(
+                        ws - 1, dataset_ref.sizes[d] - dataset_ref.sizes[d] % ws, ws
+                    )
+                    for d, ws in zip(dim, window_size)
+                }
+            )
+        )
+    if "quality" in dataset_ref:
+        dataset["quality"] = (
+            dataset_ref["quality"]
+            .rolling(dict(zip(dim, window_size)))
+            .reduce(_aggregation_methods["min"])
+            .isel(
+                {
+                    d: slice(
+                        ws - 1, dataset_ref.sizes[d] - dataset_ref.sizes[d] % ws, ws
+                    )
+                    for d, ws in zip(dim, window_size)
+                }
+            )
         )
 
-    new_data = data.swapaxes(axis, 0)
-    if trim:
-        trim_size = data.shape[axis] % window_size
-        if trim_size > 0:
-            new_data = new_data[:-trim_size]
-
-    new_data_shape = list(new_data.shape)
-    new_data_shape[0] //= window_size  # Final shape
-
-    new_data = new_data.reshape(new_data_shape[0], window_size, -1)
-
-    new_data = _aggregation_methods[method](new_data, axis=1)
-
-    new_data = new_data.reshape(new_data_shape).swapaxes(axis, 0)
-
-    return new_data
+    return dataset
 
 
-def clip_domain(R, metadata, extent=None):
+def clip_domain(dataset: xr.Dataset, extent=None):
     """
     Clip the field domain by geographical coordinates.
 
     Parameters
     ----------
-    R: array-like
-        Array of shape (m,n) or (t,m,n) containing the input fields.
-    metadata: dict
-        Metadata dictionary containing the x1, x2, y1, y2,
-        xpixelsize, ypixelsize,
-        zerovalue and yorigin attributes as described in the documentation of
+    dataset: xarray.Dataset
+        Dataset containing the input fields as described in the documentation of
         :py:mod:`pysteps.io.importers`.
     extent: scalars (left, right, bottom, top), optional
         The extent of the bounding box in data coordinates to be used to clip
@@ -362,238 +349,165 @@ def clip_domain(R, metadata, extent=None):
 
     Returns
     -------
-    R: array-like
-        the clipped array
-    metadata: dict
-        the metadata with updated attributes.
+    dataset: xarray.Dataset
+        The clipped dataset
     """
+    if extent is None:
+        return dataset
+    return dataset.sel(x=slice(extent[0], extent[1]), y=slice(extent[2], extent[3]))
 
-    R = R.copy()
-    R_shape = np.array(R.shape)
-    metadata = metadata.copy()
 
-    if extent is None:
-        return R, metadata
-
-    if len(R.shape) < 2:
-        raise ValueError("The number of dimension must be > 1")
-    if len(R.shape) == 2:
-        R = R[None, None, :, :]
-    if len(R.shape) == 3:
-        R = R[None, :, :, :]
-    if len(R.shape) > 4:
-        raise ValueError("The number of dimension must be <= 4")
-
-    # extract original domain coordinates
-    left = metadata["x1"]
-    right = metadata["x2"]
-    bottom = metadata["y1"]
-    top = metadata["y2"]
-
-    # extract bounding box coordinates
-    left_ = extent[0]
-    right_ = extent[1]
-    bottom_ = extent[2]
-    top_ = extent[3]
-
-    # compute its extent in pixels
-    dim_x_ = int((right_ - left_) / metadata["xpixelsize"])
-    dim_y_ = int((top_ - bottom_) / metadata["ypixelsize"])
-    R_ = np.ones((R.shape[0], R.shape[1], dim_y_, dim_x_)) * metadata["zerovalue"]
-
-    # build set of coordinates for the original domain
-    y_coord = (
-        np.linspace(bottom, top - metadata["ypixelsize"], R.shape[2])
-        + metadata["ypixelsize"] / 2.0
-    )
-    x_coord = (
-        np.linspace(left, right - metadata["xpixelsize"], R.shape[3])
-        + metadata["xpixelsize"] / 2.0
+def _pad_domain(
+    dataset: xr.Dataset, dim_to_pad: str, idx_buffer: int, zerovalue: float
+) -> xr.Dataset:
+    delta = dataset[dim_to_pad].attrs["stepsize"]
+    end_values = (
+        dataset[dim_to_pad].values[0] - delta * idx_buffer,
+        dataset[dim_to_pad].values[-1] + delta * idx_buffer,
     )
 
-    # build set of coordinates for the new domain
-    y_coord_ = (
-        np.linspace(bottom_, top_ - metadata["ypixelsize"], R_.shape[2])
-        + metadata["ypixelsize"] / 2.0
+    dataset_ref = dataset
+
+    dataset = dataset_ref.pad({dim_to_pad: idx_buffer}, constant_values=zerovalue)
+    dataset[dim_to_pad] = dataset_ref[dim_to_pad].pad(
+        {dim_to_pad: idx_buffer},
+        mode="linear_ramp",
+        end_values={dim_to_pad: end_values},
     )
-    x_coord_ = (
-        np.linspace(left_, right_ - metadata["xpixelsize"], R_.shape[3])
-        + metadata["xpixelsize"] / 2.0
+    dataset.lat.data[:], dataset.lon.data[:] = compute_lat_lon(
+        dataset.x.values, dataset.y.values, dataset.attrs["projection"]
     )
-
-    # origin='upper' reverses the vertical axes direction
-    if metadata["yorigin"] == "upper":
-        y_coord = y_coord[::-1]
-        y_coord_ = y_coord_[::-1]
-
-    # extract original domain
-    idx_y = np.where(np.logical_and(y_coord < top_, y_coord > bottom_))[0]
-    idx_x = np.where(np.logical_and(x_coord < right_, x_coord > left_))[0]
-
-    # extract new domain
-    idx_y_ = np.where(np.logical_and(y_coord_ < top, y_coord_ > bottom))[0]
-    idx_x_ = np.where(np.logical_and(x_coord_ < right, x_coord_ > left))[0]
-
-    # compose the new array
-    R_[:, :, idx_y_[0] : (idx_y_[-1] + 1), idx_x_[0] : (idx_x_[-1] + 1)] = R[
-        :, :, idx_y[0] : (idx_y[-1] + 1), idx_x[0] : (idx_x[-1] + 1)
-    ]
-
-    # update coordinates
-    metadata["y1"] = bottom_
-    metadata["y2"] = top_
-    metadata["x1"] = left_
-    metadata["x2"] = right_
-
-    R_shape[-2] = R_.shape[-2]
-    R_shape[-1] = R_.shape[-1]
-
-    return R_.reshape(R_shape), metadata
+    if "velocity_x" in dataset_ref:
+        dataset["velocity_x"].data = (
+            dataset_ref["velocity_x"]
+            .pad({dim_to_pad: idx_buffer}, constant_values=0.0)
+            .values
+        )
+    if "velocity_y" in dataset_ref:
+        dataset["velocity_y"].data = (
+            dataset_ref["velocity_y"]
+            .pad({dim_to_pad: idx_buffer}, constant_values=0.0)
+            .values
+        )
+    if "quality" in dataset_ref:
+        dataset["quality"].data = (
+            dataset_ref["quality"]
+            .pad({dim_to_pad: idx_buffer}, constant_values=0.0)
+            .values
+        )
+    return dataset
 
 
-def square_domain(R, metadata, method="pad", inverse=False):
+def square_domain(dataset: xr.Dataset, method="pad", inverse=False):
     """
     Either pad or crop a field to obtain a square domain.
 
     Parameters
     ----------
-    R: array-like
-        Array of shape (m,n) or (t,m,n) containing the input fields.
-    metadata: dict
-        Metadata dictionary containing the x1, x2, y1, y2,
-        xpixelsize, ypixelsize,
-        attributes as described in the documentation of
+    dataset: xarray.Dataset
+        Dataset containing the input fields as described in the documentation of
         :py:mod:`pysteps.io.importers`.
     method: {'pad', 'crop'}, optional
         Either pad or crop.
-        If pad, an equal number of zeros is added to both ends of its shortest
-        side in order to produce a square domain.
+        If pad, an equal number of pixels
+        filled with the minimum value of the precipitation
+        field is added to both ends of the precipitation fields shortest
+        side in order to produce a square domain. The quality and velocity fields
+        are always padded with zeros.
         If crop, an equal number of pixels is removed
         to both ends of its longest side in order to produce a square domain.
         Note that the crop method involves an irreversible loss of data.
     inverse: bool, optional
         Perform the inverse method to recover the original domain shape.
-        After a crop, the inverse is performed by padding the field with zeros.
+        After a crop, the inverse is performed by doing a pad.
 
     Returns
     -------
-    R: array-like
-        the reshape dataset
-    metadata: dict
-        the metadata with updated attributes.
+    dataset: xarray.Dataset
+        the reshaped dataset
     """
 
-    R = R.copy()
-    R_shape = np.array(R.shape)
-    metadata = metadata.copy()
-
-    if not inverse:
-        if len(R.shape) < 2:
-            raise ValueError("The number of dimension must be > 1")
-        if len(R.shape) == 2:
-            R = R[None, None, :]
-        if len(R.shape) == 3:
-            R = R[None, :]
-        if len(R.shape) > 4:
-            raise ValueError("The number of dimension must be <= 4")
-
-        if R.shape[2] == R.shape[3]:
-            return R.squeeze()
-
-        orig_dim = R.shape
-        orig_dim_n = orig_dim[0]
-        orig_dim_t = orig_dim[1]
-        orig_dim_y = orig_dim[2]
-        orig_dim_x = orig_dim[3]
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    precip_data = dataset[precip_var].values
+
+    x_len = len(dataset.x.values)
+    y_len = len(dataset.y.values)
+
+    if inverse:
+        if "orig_domain" not in dataset.attrs or "square_method" not in dataset.attrs:
+            raise ValueError("Attempting to inverse a non squared dataset")
+        method = dataset.attrs.pop("square_method")
+        orig_domain = dataset.attrs.pop("orig_domain")
 
         if method == "pad":
-            new_dim = np.max(orig_dim[2:])
-            R_ = np.ones((orig_dim_n, orig_dim_t, new_dim, new_dim)) * R.min()
-
-            if orig_dim_x < new_dim:
-                idx_buffer = int((new_dim - orig_dim_x) / 2.0)
-                R_[:, :, :, idx_buffer : (idx_buffer + orig_dim_x)] = R
-                metadata["x1"] -= idx_buffer * metadata["xpixelsize"]
-                metadata["x2"] += idx_buffer * metadata["xpixelsize"]
-
-            elif orig_dim_y < new_dim:
-                idx_buffer = int((new_dim - orig_dim_y) / 2.0)
-                R_[:, :, idx_buffer : (idx_buffer + orig_dim_y), :] = R
-                metadata["y1"] -= idx_buffer * metadata["ypixelsize"]
-                metadata["y2"] += idx_buffer * metadata["ypixelsize"]
-
-        elif method == "crop":
-            new_dim = np.min(orig_dim[2:])
-            R_ = np.zeros((orig_dim_n, orig_dim_t, new_dim, new_dim))
-
-            if orig_dim_x > new_dim:
-                idx_buffer = int((orig_dim_x - new_dim) / 2.0)
-                R_ = R[:, :, :, idx_buffer : (idx_buffer + new_dim)]
-                metadata["x1"] += idx_buffer * metadata["xpixelsize"]
-                metadata["x2"] -= idx_buffer * metadata["xpixelsize"]
-
-            elif orig_dim_y > new_dim:
-                idx_buffer = int((orig_dim_y - new_dim) / 2.0)
-                R_ = R[:, :, idx_buffer : (idx_buffer + new_dim), :]
-                metadata["y1"] += idx_buffer * metadata["ypixelsize"]
-                metadata["y2"] -= idx_buffer * metadata["ypixelsize"]
-
-        else:
-            raise ValueError("Unknown type")
-
-        metadata["orig_domain"] = (orig_dim_y, orig_dim_x)
-        metadata["square_method"] = method
-
-        R_shape[-2] = R_.shape[-2]
-        R_shape[-1] = R_.shape[-1]
-
-        return R_.reshape(R_shape), metadata
-
-    elif inverse:
-        if len(R.shape) < 2:
-            raise ValueError("The number of dimension must be > 2")
-        if len(R.shape) == 2:
-            R = R[None, None, :]
-        if len(R.shape) == 3:
-            R = R[None, :]
-        if len(R.shape) > 4:
-            raise ValueError("The number of dimension must be <= 4")
-
-        method = metadata.pop("square_method")
-        shape = metadata.pop("orig_domain")
-
-        if R.shape[2] == shape[0] and R.shape[3] == shape[1]:
-            return R.squeeze(), metadata
-
-        R_ = np.zeros((R.shape[0], R.shape[1], shape[0], shape[1]))
+            if x_len > len(orig_domain[1]):
+                extent = (
+                    orig_domain[1].min(),
+                    orig_domain[1].max(),
+                    dataset.y.values.min(),
+                    dataset.y.values.max(),
+                )
+            elif y_len > len(orig_domain[0]):
+                extent = (
+                    dataset.x.values.min(),
+                    dataset.x.values.max(),
+                    orig_domain[0].min(),
+                    orig_domain[0].max(),
+                )
+            else:
+                return dataset
+            return clip_domain(dataset, extent)
+
+        if method == "crop":
+            if x_len < len(orig_domain[1]):
+                dim_to_pad = "x"
+                idx_buffer = int((len(orig_domain[1]) - x_len) / 2.0)
+            elif y_len < len(orig_domain[0]):
+                dim_to_pad = "y"
+                idx_buffer = int((len(orig_domain[0]) - y_len) / 2.0)
+            else:
+                return dataset
+            return _pad_domain(dataset, dim_to_pad, idx_buffer, np.nanmin(precip_data))
+
+        raise ValueError(f"Unknown square method: {method}")
+
+    else:
+        if "orig_domain" in dataset.attrs and "square_method" in dataset.attrs:
+            raise ValueError("Attempting to square an already squared dataset")
+        dataset.attrs["orig_domain"] = (dataset.y.values, dataset.x.values)
+        dataset.attrs["square_method"] = method
 
         if method == "pad":
-            if R.shape[2] == shape[0]:
-                idx_buffer = int((R.shape[3] - shape[1]) / 2.0)
-                R_ = R[:, :, :, idx_buffer : (idx_buffer + shape[1])]
-                metadata["x1"] += idx_buffer * metadata["xpixelsize"]
-                metadata["x2"] -= idx_buffer * metadata["xpixelsize"]
-
-            elif R.shape[3] == shape[1]:
-                idx_buffer = int((R.shape[2] - shape[0]) / 2.0)
-                R_ = R[:, :, idx_buffer : (idx_buffer + shape[0]), :]
-                metadata["y1"] += idx_buffer * metadata["ypixelsize"]
-                metadata["y2"] -= idx_buffer * metadata["ypixelsize"]
-
-        elif method == "crop":
-            if R.shape[2] == shape[0]:
-                idx_buffer = int((shape[1] - R.shape[3]) / 2.0)
-                R_[:, :, :, idx_buffer : (idx_buffer + R.shape[3])] = R
-                metadata["x1"] -= idx_buffer * metadata["xpixelsize"]
-                metadata["x2"] += idx_buffer * metadata["xpixelsize"]
-
-            elif R.shape[3] == shape[1]:
-                idx_buffer = int((shape[0] - R.shape[2]) / 2.0)
-                R_[:, :, idx_buffer : (idx_buffer + R.shape[2]), :] = R
-                metadata["y1"] -= idx_buffer * metadata["ypixelsize"]
-                metadata["y2"] += idx_buffer * metadata["ypixelsize"]
-
-        R_shape[-2] = R_.shape[-2]
-        R_shape[-1] = R_.shape[-1]
-
-        return R_.reshape(R_shape), metadata
+            if x_len > y_len:
+                dim_to_pad = "y"
+                idx_buffer = int((x_len - y_len) / 2.0)
+            elif y_len > x_len:
+                dim_to_pad = "x"
+                idx_buffer = int((y_len - x_len) / 2.0)
+            else:
+                return dataset
+            return _pad_domain(dataset, dim_to_pad, idx_buffer, np.nanmin(precip_data))
+
+        if method == "crop":
+            if x_len > y_len:
+                idx_buffer = int((x_len - y_len) / 2.0)
+                extent = (
+                    dataset.x.values[idx_buffer],
+                    dataset.x.values[-idx_buffer - 1],
+                    dataset.y.values.min(),
+                    dataset.y.values.max(),
+                )
+            elif y_len > x_len:
+                idx_buffer = int((y_len - x_len) / 2.0)
+                extent = (
+                    dataset.x.values.min(),
+                    dataset.x.values.max(),
+                    dataset.y.values[idx_buffer],
+                    dataset.y.values[-idx_buffer - 1],
+                )
+            else:
+                return dataset
+            return clip_domain(dataset, extent)
+
+        raise ValueError(f"Unknown square method: {method}")
diff --git a/pysteps/utils/transformation.py b/pysteps/utils/transformation.py
index 87ac9adc7..3e48fe0d8 100644
--- a/pysteps/utils/transformation.py
+++ b/pysteps/utils/transformation.py
@@ -14,9 +14,11 @@
     sqrt_transform
 """
 
+import warnings
+
 import numpy as np
 import scipy.stats as scipy_stats
-import warnings
+import xarray as xr
 from scipy.interpolate import interp1d
 
 warnings.filterwarnings(
@@ -25,8 +27,8 @@
 
 
 def boxcox_transform(
-    R, metadata=None, Lambda=None, threshold=None, zerovalue=None, inverse=False
-):
+    dataset: xr.Dataset, Lambda=None, threshold=None, zerovalue=None, inverse=False
+) -> xr.Dataset:
     """
     The one-parameter Box-Cox transformation.
 
@@ -39,11 +41,8 @@ def boxcox_transform(
 
     Parameters
     ----------
-    R: array-like
-        Array of any shape to be transformed.
-    metadata: dict, optional
-        Metadata dictionary containing the transform, zerovalue and threshold
-        attributes as described in the documentation of
+    dataset: xarray.Dataset
+        Dataset to be transformed as described in the documentation of
         :py:mod:`pysteps.io.importers`.
     Lambda: float, optional
         Parameter Lambda of the Box-Cox transformation.
@@ -52,7 +51,7 @@ def boxcox_transform(
         Choose Lambda < 1 for positively skewed data, Lambda > 1 for negatively
         skewed data.
     threshold: float, optional
-        The value that is used for thresholding with the same units as R.
+        The value that is used for thresholding with the same units as in the dataset.
         If None, the threshold contained in metadata is used.
         If no threshold is found in the metadata,
         a value of 0.1 is used as default.
@@ -64,10 +63,8 @@ def boxcox_transform(
 
     Returns
     -------
-    R: array-like
-        Array of any shape containing the (back-)transformed units.
-    metadata: dict
-        The metadata with updated attributes.
+    dataset: xarray.Dataset
+        Dataset containing the (back-)transformed units.
 
     References
     ----------
@@ -76,20 +73,14 @@ def boxcox_transform(
     doi:10.1111/j.2517-6161.1964.tb00553.x
     """
 
-    R = R.copy()
-
-    if metadata is None:
-        if inverse:
-            metadata = {"transform": "BoxCox"}
-        else:
-            metadata = {"transform": None}
-
-    else:
-        metadata = metadata.copy()
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
+    precip_data = dataset[precip_var].values
 
     if not inverse:
         if metadata["transform"] == "BoxCox":
-            return R, metadata
+            return dataset
 
         if Lambda is None:
             Lambda = metadata.get("BoxCox_lambda", 0.0)
@@ -97,21 +88,21 @@ def boxcox_transform(
         if threshold is None:
             threshold = metadata.get("threshold", 0.1)
 
-        zeros = R < threshold
+        zeros = precip_data < threshold
 
         # Apply Box-Cox transform
         if Lambda == 0.0:
-            R[~zeros] = np.log(R[~zeros])
+            precip_data[~zeros] = np.log(precip_data[~zeros])
             threshold = np.log(threshold)
 
         else:
-            R[~zeros] = (R[~zeros] ** Lambda - 1) / Lambda
+            precip_data[~zeros] = (precip_data[~zeros] ** Lambda - 1) / Lambda
             threshold = (threshold**Lambda - 1) / Lambda
 
         # Set value for zeros
         if zerovalue is None:
             zerovalue = threshold - 1  # TODO: set to a more meaningful value
-        R[zeros] = zerovalue
+        precip_data[zeros] = zerovalue
 
         metadata["transform"] = "BoxCox"
         metadata["BoxCox_lambda"] = Lambda
@@ -120,7 +111,7 @@ def boxcox_transform(
 
     elif inverse:
         if metadata["transform"] not in ["BoxCox", "log"]:
-            return R, metadata
+            return precip_data, metadata
 
         if Lambda is None:
             Lambda = metadata.pop("BoxCox_lambda", 0.0)
@@ -131,35 +122,36 @@ def boxcox_transform(
 
         # Apply inverse Box-Cox transform
         if Lambda == 0.0:
-            R = np.exp(R)
+            precip_data = np.exp(precip_data)
             threshold = np.exp(threshold)
 
         else:
-            R = np.exp(np.log(Lambda * R + 1) / Lambda)
+            precip_data = np.exp(np.log(Lambda * precip_data + 1) / Lambda)
             threshold = np.exp(np.log(Lambda * threshold + 1) / Lambda)
 
-        R[R < threshold] = zerovalue
+        precip_data[precip_data < threshold] = zerovalue
 
         metadata["transform"] = None
         metadata["zerovalue"] = zerovalue
         metadata["threshold"] = threshold
 
-    return R, metadata
+    dataset[precip_var].data[:] = precip_data
+
+    return dataset
 
 
-def dB_transform(R, metadata=None, threshold=None, zerovalue=None, inverse=False):
+def dB_transform(
+    dataset: xr.Dataset, threshold=None, zerovalue=None, inverse=False
+) -> xr.Dataset:
     """Methods to transform precipitation intensities to/from dB units.
 
     Parameters
     ----------
-    R: array-like
-        Array of any shape to be (back-)transformed.
-    metadata: dict, optional
-        Metadata dictionary containing the transform, zerovalue and threshold
-        attributes as described in the documentation of
+    dataset: xarray.Dataset
+        Dataset to be (back-)transformed as described in the documentation of
         :py:mod:`pysteps.io.importers`.
     threshold: float, optional
-        Optional value that is used for thresholding with the same units as R.
+        Optional value that is used for thresholding with the same units as in the dataset.
         If None, the threshold contained in metadata is used.
         If no threshold is found in the metadata,
         a value of 0.1 is used as default.
@@ -171,81 +163,70 @@ def dB_transform(R, metadata=None, threshold=None, zerovalue=None, inverse=False
 
     Returns
     -------
-    R: array-like
-        Array of any shape containing the (back-)transformed units.
-    metadata: dict
-        The metadata with updated attributes.
+    dataset: xarray.Dataset
+        Dataset containing the (back-)transformed units.
     """
 
-    R = R.copy()
-
-    if metadata is None:
-        if inverse:
-            metadata = {"transform": "dB"}
-        else:
-            metadata = {"transform": None}
-
-    else:
-        metadata = metadata.copy()
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
+    precip_data = dataset[precip_var].values
 
     # to dB units
     if not inverse:
         if metadata["transform"] == "dB":
-            return R, metadata
+            return dataset
 
         if threshold is None:
             threshold = metadata.get("threshold", 0.1)
 
-        zeros = R < threshold
+        zeros = precip_data < threshold
 
         # Convert to dB
-        R[~zeros] = 10.0 * np.log10(R[~zeros])
+        precip_data[~zeros] = 10.0 * np.log10(precip_data[~zeros])
         threshold = 10.0 * np.log10(threshold)
 
         # Set value for zeros
         if zerovalue is None:
             zerovalue = threshold - 5  # TODO: set to a more meaningful value
-        R[zeros] = zerovalue
+        precip_data[zeros] = zerovalue
 
         metadata["transform"] = "dB"
         metadata["zerovalue"] = zerovalue
         metadata["threshold"] = threshold
 
-        return R, metadata
-
     # from dB units
     elif inverse:
         if metadata["transform"] != "dB":
-            return R, metadata
+            return dataset
 
         if threshold is None:
             threshold = metadata.get("threshold", -10.0)
         if zerovalue is None:
             zerovalue = 0.0
 
-        R = 10.0 ** (R / 10.0)
+        precip_data = 10.0 ** (precip_data / 10.0)
         threshold = 10.0 ** (threshold / 10.0)
-        R[R < threshold] = zerovalue
+        precip_data[precip_data < threshold] = zerovalue
 
         metadata["transform"] = None
         metadata["threshold"] = threshold
         metadata["zerovalue"] = zerovalue
 
-        return R, metadata
+    dataset[precip_var].data[:] = precip_data
+
+    return dataset
 
 
-def NQ_transform(R, metadata=None, inverse=False, **kwargs):
+def NQ_transform(dataset: xr.Dataset, inverse: bool = False, **kwargs) -> xr.Dataset:
     """
     The normal quantile transformation as in Bogner et al (2012).
     Zero rain vales are set to zero in norm space.
 
     Parameters
     ----------
-    R: array-like
-        Array of any shape to be transformed.
-    metadata: dict, optional
-        Metadata dictionary containing the transform, zerovalue and threshold
-        attributes as described in the documentation of
+    dataset: xarray.Dataset
+       Dataset to be transformed as described in the documentation of
         :py:mod:`pysteps.io.importers`.
     inverse: bool, optional
         If set to True, it performs the inverse transform. False by default.
@@ -260,10 +241,8 @@ def NQ_transform(R, metadata=None, inverse=False, **kwargs):
 
     Returns
     -------
-    R: array-like
-        Array of any shape containing the (back-)transformed units.
-    metadata: dict
-        The metadata with updated attributes.
+    dataset: xarray.Dataset
+        Dataset containing the (back-)transformed units.
 
     References
     ----------
@@ -276,105 +255,96 @@ def NQ_transform(R, metadata=None, inverse=False, **kwargs):
     # defaults
     a = kwargs.get("a", 0.0)
 
-    R = R.copy()
-    shape0 = R.shape
-    R = R.ravel().astype(float)
-    idxNan = np.isnan(R)
-    R_ = R[~idxNan]
-
-    if metadata is None:
-        if inverse:
-            metadata = {"transform": "NQT"}
-        else:
-            metadata = {"transform": None}
-        metadata["zerovalue"] = np.min(R_)
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
+    precip_data = dataset[precip_var].values
 
-    else:
-        metadata = metadata.copy()
+    shape0 = precip_data.shape
+    precip_data = precip_data.ravel().astype(float)
+    idxNan = np.isnan(precip_data)
+    precip_data_ = precip_data[~idxNan]
 
     if not inverse:
         # Plotting positions
         # https://en.wikipedia.org/wiki/Q%E2%80%93Q_plot#Plotting_position
-        n = R_.size
-        Rpp = ((np.arange(n) + 1 - a) / (n + 1 - 2 * a)).reshape(R_.shape)
+        n = precip_data_.size
+        Rpp = ((np.arange(n) + 1 - a) / (n + 1 - 2 * a)).reshape(precip_data_.shape)
 
         # NQ transform
         Rqn = scipy_stats.norm.ppf(Rpp)
-        R__ = np.interp(R_, R_[np.argsort(R_)], Rqn)
+        precip_data__ = np.interp(
+            precip_data_, precip_data_[np.argsort(precip_data_)], Rqn
+        )
 
         # set zero rain to 0 in norm space
-        R__[R[~idxNan] == metadata["zerovalue"]] = 0
+        precip_data__[precip_data[~idxNan] == metadata["zerovalue"]] = 0
 
         # build inverse transform
         metadata["inqt"] = interp1d(
-            Rqn, R_[np.argsort(R_)], bounds_error=False, fill_value=(R_.min(), R_.max())
+            Rqn,
+            precip_data_[np.argsort(precip_data_)],
+            bounds_error=False,
+            fill_value=(precip_data_.min(), precip_data_.max()),
         )
 
         metadata["transform"] = "NQT"
         metadata["zerovalue"] = 0
-        metadata["threshold"] = R__[R__ > 0].min()
+        metadata["threshold"] = precip_data__[precip_data__ > 0].min()
 
     else:
         f = metadata.pop("inqt")
-        R__ = f(R_)
+        precip_data__ = f(precip_data_)
         metadata["transform"] = None
-        metadata["zerovalue"] = R__.min()
-        metadata["threshold"] = R__[R__ > R__.min()].min()
+        metadata["zerovalue"] = precip_data__.min()
+        metadata["threshold"] = precip_data__[precip_data__ > precip_data__.min()].min()
 
-    R[~idxNan] = R__
+    precip_data[~idxNan] = precip_data__
 
-    return R.reshape(shape0), metadata
+    dataset[precip_var].data[:] = precip_data.reshape(shape0)
 
+    return dataset
 
-def sqrt_transform(R, metadata=None, inverse=False, **kwargs):
+
+def sqrt_transform(dataset: xr.Dataset, inverse: bool = False, **kwargs) -> xr.Dataset:
     """
     Square-root transform.
 
     Parameters
     ----------
-    R: array-like
-        Array of any shape to be transformed.
-    metadata: dict, optional
-        Metadata dictionary containing the transform, zerovalue and threshold
-        attributes as described in the documentation of
+    dataset: xarray.Dataset
+        Dataset to be transformed as described in the documentation of
         :py:mod:`pysteps.io.importers`.
     inverse: bool, optional
         If set to True, it performs the inverse transform. False by default.
 
     Returns
     -------
-    R: array-like
-        Array of any shape containing the (back-)transformed units.
-    metadata: dict
-        The metadata with updated attributes.
+    dataset: xarray.Dataset
+        Dataset containing the (back-)transformed units.
 
     """
 
-    R = R.copy()
-
-    if metadata is None:
-        if inverse:
-            metadata = {"transform": "sqrt"}
-        else:
-            metadata = {"transform": None}
-        metadata["zerovalue"] = np.nan
-        metadata["threshold"] = np.nan
-    else:
-        metadata = metadata.copy()
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
+    precip_data = dataset[precip_var].values
 
     if not inverse:
         # sqrt transform
-        R = np.sqrt(R)
+        precip_data = np.sqrt(precip_data)
 
         metadata["transform"] = "sqrt"
         metadata["zerovalue"] = np.sqrt(metadata["zerovalue"])
         metadata["threshold"] = np.sqrt(metadata["threshold"])
     else:
         # inverse sqrt transform
-        R = R**2
+        precip_data = precip_data**2
 
         metadata["transform"] = None
         metadata["zerovalue"] = metadata["zerovalue"] ** 2
         metadata["threshold"] = metadata["threshold"] ** 2
 
-    return R, metadata
+    dataset[precip_var].data[:] = precip_data
+
+    return dataset
diff --git a/pysteps/xarray_helpers.py b/pysteps/xarray_helpers.py
new file mode 100644
index 000000000..33ec2f40c
--- /dev/null
+++ b/pysteps/xarray_helpers.py
@@ -0,0 +1,323 @@
+# -*- coding: utf-8 -*-
+"""
+pysteps.converters
+==================
+
+Module with xarray helper functions.
+
+.. autosummary::
+    :toctree: ../generated/
+
+    convert_to_xarray_dataset
+"""
+
+from datetime import datetime, timedelta
+
+import numpy as np
+import numpy.typing as npt
+import pyproj
+import xarray as xr
+
+from pysteps.utils.conversion import cf_parameters_from_unit
+
+# TODO(converters): Write methods for converting Proj.4 projection definitions
+# into CF grid mapping attributes. Currently this has been implemented for
+# the stereographic projection.
+# The conversions implemented here are take from:
+# https://github.com/cf-convention/cf-convention.github.io/blob/master/wkt-proj-4.md
+
+
+def _convert_proj4_to_grid_mapping(proj4str):
+    tokens = proj4str.split("+")
+
+    d = {}
+    for t in tokens[1:]:
+        t = t.split("=")
+        if len(t) > 1:
+            d[t[0]] = t[1].strip()
+
+    params = {}
+    # TODO(exporters): implement more projection types here
+    if d["proj"] == "stere":
+        grid_mapping_var_name = "polar_stereographic"
+        grid_mapping_name = "polar_stereographic"
+        v = d["lon_0"] if d["lon_0"][-1] not in ["E", "W"] else d["lon_0"][:-1]
+        params["straight_vertical_longitude_from_pole"] = float(v)
+        v = d["lat_0"] if d["lat_0"][-1] not in ["N", "S"] else d["lat_0"][:-1]
+        params["latitude_of_projection_origin"] = float(v)
+        if "lat_ts" in list(d.keys()):
+            params["standard_parallel"] = float(d["lat_ts"])
+        elif "k_0" in list(d.keys()):
+            params["scale_factor_at_projection_origin"] = float(d["k_0"])
+        params["false_easting"] = float(d["x_0"])
+        params["false_northing"] = float(d["y_0"])
+    elif d["proj"] == "aea":  # Albers Conical Equal Area
+        grid_mapping_var_name = "proj"
+        grid_mapping_name = "albers_conical_equal_area"
+        params["false_easting"] = float(d["x_0"]) if "x_0" in d else float(0)
+        params["false_northing"] = float(d["y_0"]) if "y_0" in d else float(0)
+        v = d["lon_0"] if "lon_0" in d else float(0)
+        params["longitude_of_central_meridian"] = float(v)
+        v = d["lat_0"] if "lat_0" in d else float(0)
+        params["latitude_of_projection_origin"] = float(v)
+        v1 = d["lat_1"] if "lat_1" in d else float(0)
+        v2 = d["lat_2"] if "lat_2" in d else float(0)
+        params["standard_parallel"] = (float(v1), float(v2))
+    else:
+        print("unknown projection", d["proj"])
+        return None, None, None
+
+    return grid_mapping_var_name, grid_mapping_name, params
+
+
+def compute_lat_lon(
+    x_r: npt.ArrayLike, y_r: npt.ArrayLike, projection: str
+) -> tuple[npt.ArrayLike, npt.ArrayLike]:
+    x_2d, y_2d = np.meshgrid(x_r, y_r)
+    pr = pyproj.Proj(projection)
+    lon, lat = pr(x_2d.flatten(), y_2d.flatten(), inverse=True)
+    return lat.reshape(x_2d.shape), lon.reshape(x_2d.shape)
+
+
+def convert_input_to_xarray_dataset(
+    precip: np.ndarray,
+    quality: np.ndarray | None,
+    metadata: dict[str, str | float | None],
+    startdate: datetime | None = None,
+) -> xr.Dataset:
+    """
+    Read a precip, quality, metadata tuple as returned by the importers
+    (:py:mod:`pysteps.io.importers`) and return an xarray dataset containing
+    this data.
+
+    Parameters
+    ----------
+    precip: array
+        2D array containing imported precipitation data.
+    quality: array, None
+        2D array containing the quality values of the imported precipitation
+        data, can be None.
+    metadata: dict
+        Metadata dictionary containing the attributes described in the
+        documentation of :py:mod:`pysteps.io.importers`.
+    startdate: datetime, None
+        Datetime object containing the start date and time for the nowcast
+
+    Returns
+    -------
+    out: Dataset
+        A CF compliant xarray dataset, which contains all data and metadata.
+
+    """
+    var_name, attrs = cf_parameters_from_unit(metadata["unit"])
+
+    dims = None
+    timesteps = None
+    ens_number = None
+
+    if precip.ndim == 4:
+        ens_number, timesteps, h, w = precip.shape
+        dims = ["ens_number", "time", "y", "x"]
+
+        if startdate is None:
+            raise Exception("startdate missing")
+
+    elif precip.ndim == 3:
+        timesteps, h, w = precip.shape
+        dims = ["time", "y", "x"]
+
+        if startdate is None:
+            raise Exception("startdate missing")
+
+    elif precip.ndim == 2:
+        h, w = precip.shape
+        dims = ["y", "x"]
+    else:
+        raise Exception(f"Precip field shape: {precip.shape} not supported")
+
+    x_r = np.linspace(metadata["x1"], metadata["x2"], w + 1)[:-1]
+    x_r += 0.5 * (x_r[1] - x_r[0])
+    y_r = np.linspace(metadata["y1"], metadata["y2"], h + 1)[:-1]
+    y_r += 0.5 * (y_r[1] - y_r[0])
+
+    if "xpixelsize" in metadata:
+        xpixelsize = metadata["xpixelsize"]
+    else:
+        xpixelsize = x_r[1] - x_r[0]
+
+    if "ypixelsize" in metadata:
+        ypixelsize = metadata["ypixelsize"]
+    else:
+        ypixelsize = y_r[1] - y_r[0]
+
+    if x_r[1] - x_r[0] != xpixelsize:
+        raise ValueError("xpixelsize does not match x1, x2 and array shape")
+    if y_r[1] - y_r[0] != ypixelsize:
+        raise ValueError("ypixelsize does not match y1, y2 and array shape")
+
+    # flip yr vector if yorigin is upper
+    if metadata["yorigin"] == "upper":
+        y_r = np.flip(y_r)
+
+    lat, lon = compute_lat_lon(x_r, y_r, metadata["projection"])
+
+    (
+        grid_mapping_var_name,
+        grid_mapping_name,
+        grid_mapping_params,
+    ) = _convert_proj4_to_grid_mapping(metadata["projection"])
+
+    data_vars = {
+        var_name: (
+            dims,
+            precip,
+            {
+                "units": attrs["units"],
+                "standard_name": attrs["standard_name"],
+                "long_name": attrs["long_name"],
+                "grid_mapping": "projection",
+            },
+        )
+    }
+
+    metadata_keys = [
+        "transform",
+        "accutime",
+        "threshold",
+        "zerovalue",
+        "zr_a",
+        "zr_b",
+    ]
+
+    for metadata_field in metadata_keys:
+        if metadata_field in metadata:
+            data_vars[var_name][2][metadata_field] = metadata[metadata_field]
+
+    if quality is not None:
+        data_vars["quality"] = (
+            dims,
+            quality,
+            {
+                "units": "1",
+                "standard_name": "quality_flag",
+                "grid_mapping": "projection",
+            },
+        )
+    coords = {
+        "y": (
+            ["y"],
+            y_r,
+            {
+                "axis": "Y",
+                "long_name": "y-coordinate in Cartesian system",
+                "standard_name": "projection_y_coordinate",
+                "units": metadata["cartesian_unit"],
+                "stepsize": ypixelsize,
+            },
+        ),
+        "x": (
+            ["x"],
+            x_r,
+            {
+                "axis": "X",
+                "long_name": "x-coordinate in Cartesian system",
+                "standard_name": "projection_x_coordinate",
+                "units": metadata["cartesian_unit"],
+                "stepsize": xpixelsize,
+            },
+        ),
+        "lon": (
+            ["y", "x"],
+            lon,
+            {
+                "long_name": "longitude coordinate",
+                "standard_name": "longitude",
+                "units": "degrees_east",
+            },
+        ),
+        "lat": (
+            ["y", "x"],
+            lat,
+            {
+                "long_name": "latitude coordinate",
+                "standard_name": "latitude",
+                "units": "degrees_north",
+            },
+        ),
+    }
+
+    if ens_number is not None:
+        coords["ens_number"] = (
+            ["ens_number"],
+            list(range(1, ens_number + 1, 1)),
+            {
+                "long_name": "ensemble member",
+                "standard_name": "realization",
+                "units": "",
+            },
+        )
+
+    if timesteps is not None:
+        startdate_str = datetime.strftime(startdate, "%Y-%m-%d %H:%M:%S")
+
+        coords["time"] = (
+            ["time"],
+            list(range(1, timesteps + 1, 1)),
+            {"long_name": "forecast time", "units": "seconds since %s" % startdate_str},
+        )
+    if grid_mapping_var_name is not None:
+        coords[grid_mapping_name] = (
+            [],
+            None,
+            {"grid_mapping_name": grid_mapping_name, **grid_mapping_params},
+        )
+    attrs = {
+        "Conventions": "CF-1.7",
+        "institution": metadata["institution"],
+        "projection": metadata["projection"],
+        "precip_var": var_name,
+    }
+    dataset = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs)
+    return dataset.sortby(dims)
+
+
+def convert_output_to_xarray_dataset(
+    dataset: xr.Dataset, timesteps: int | list[int], output: np.ndarray
+) -> xr.Dataset:
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
+
+    last_timestamp = (
+        dataset["time"][-1].values.astype("datetime64[us]").astype(datetime)
+    )
+    time_metadata = dataset["time"].attrs
+    timestep_seconds = dataset["time"].attrs["stepsize"]
+    dataset = dataset.drop_vars([precip_var]).drop_dims(["time"])
+    if isinstance(timesteps, int):
+        timesteps = list(range(1, timesteps + 1))
+    next_timestamps = [
+        last_timestamp + timedelta(seconds=timestep_seconds * i) for i in timesteps
+    ]
+    dataset = dataset.assign_coords(
+        {"time": (["time"], next_timestamps, time_metadata)}
+    )
+
+    if output.ndim == 4:
+        dataset = dataset.assign_coords(
+            {
+                "ens_number": (
+                    ["ens_number"],
+                    list(range(1, output.shape[0] + 1)),
+                    {
+                        "long_name": "ensemble member",
+                        "standard_name": "realization",
+                        "units": "",
+                    },
+                )
+            }
+        )
+        dataset[precip_var] = (["ens_number", "time", "y", "x"], output, metadata)
+    else:
+        dataset[precip_var] = (["time", "y", "x"], output, metadata)
+
+    return dataset
diff --git a/requirements.txt b/requirements.txt
index 1804df1d9..b5075ad35 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ matplotlib
 jsmin
 jsonschema
 netCDF4
+xarray
diff --git a/requirements_dev.txt b/requirements_dev.txt
index 84cf372b1..2899e560d 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -9,6 +9,7 @@ matplotlib
 jsmin
 jsonschema
 netCDF4
+xarray
 
 # Optional dependencies
 dask