scikit-learn-contrib
diff --git a/‎examples/benchmark.md
Lines changed: 3 additions & 3 deletions b/‎examples/benchmark.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/tutorials/plot_tuto_diffusion_models.py
Lines changed: 7 additions & 11 deletions b/‎examples/tutorials/plot_tuto_diffusion_models.py
Lines changed: 7 additions & 11 deletions
diff --git a/‎qolmat/imputations/diffusions/ddpms.py
Lines changed: 25 additions & 5 deletions b/‎qolmat/imputations/diffusions/ddpms.py
Lines changed: 25 additions & 5 deletions
diff --git a/‎qolmat/imputations/imputers.py
Lines changed: 0 additions & 33 deletions b/‎qolmat/imputations/imputers.py
Lines changed: 0 additions & 33 deletions
@@ -311,7 +311,7 @@ from qolmat.imputations.imputers_pytorch import ImputerDiffusion
 from qolmat.imputations.diffusions.ddpms import TabDDPM
 
 X = np.array([[1, 1, 1, 1], [np.nan, np.nan, 3, 2], [1, 2, 2, 1], [2, 2, 2, 2]])
-imputer = ImputerDiffusion(model=TabDDPM(random_state=11), epochs=50, batch_size=1)
+imputer = ImputerDiffusion(epochs=50, batch_size=1, random_state=11)
 
 imputer.fit_transform(X)
 ```
@@ -322,7 +322,7 @@ from qolmat.imputations.imputers_pytorch import ImputerDiffusion
 from qolmat.imputations.diffusions.ddpms import TabDDPM
 
 X = np.array([[1, 1, 1, 1], [np.nan, np.nan, 3, 2], [1, 2, 2, 1], [2, 2, 2, 2]])
-imputer = ImputerDiffusion(model=TabDDPM(random_state=11), epochs=50, batch_size=1)
+imputer = ImputerDiffusion(epochs=50, batch_size=1, random_state=11)
 
 imputer.fit_transform(X)
 ```
@@ -358,7 +358,7 @@ encoder, decoder  = imputers_pytorch.build_autoencoder(input_dim=n_variables,lat
 ```python
 dict_imputers["MLP"] = imputer_mlp = imputers_pytorch.ImputerRegressorPyTorch(estimator=estimator, groups=('station',), epochs=500)
 dict_imputers["Autoencoder"] = imputer_autoencoder = imputers_pytorch.ImputerAutoencoder(encoder, decoder, max_iterations=100, epochs=100)
-dict_imputers["Diffusion"] = imputer_diffusion = imputers_pytorch.ImputerDiffusion(model=TabDDPM(num_sampling=5), epochs=100, batch_size=100)
+dict_imputers["Diffusion"] = imputer_diffusion = imputers_pytorch.ImputerDiffusion(epochs=100, batch_size=100, num_sampling=5)
 ```
 
 We can re-run the imputation model benchmark as before.
 
@@ -71,7 +71,6 @@
 df_data_valid = df_data.iloc[:500]
 
 tabddpm = ImputerDiffusion(
-    model=TabDDPM(),
     epochs=10,
     batch_size=100,
     x_valid=df_data_valid,
@@ -160,12 +159,8 @@
 # reconstruction errors (mae) but increases distribution distance (kl_columnwise).
 
 dict_imputers = {
-    "num_sampling=5": ImputerDiffusion(
-        model=TabDDPM(num_sampling=5), epochs=10, batch_size=100
-    ),
-    "num_sampling=10": ImputerDiffusion(
-        model=TabDDPM(num_sampling=10), epochs=10, batch_size=100
-    ),
+    "num_sampling=5": ImputerDiffusion(epochs=10, batch_size=100, num_sampling=5),
+    "num_sampling=10": ImputerDiffusion(epochs=10, batch_size=100, num_sampling=10),
 }
 
 comparison = comparator.Comparator(
@@ -187,7 +182,7 @@
 #
 # Two important hyperparameters for processing time-series data are ``index_datetime``
 # and ``freq_str``.
-# E.g., ``ImputerDiffusion(model=TabDDPM(), index_datetime='datetime', freq_str='1D')``,
+# E.g., ``ImputerDiffusion(index_datetime='datetime', freq_str='1D')``,
 #
 # * ``index_datetime``: the column name of datetime in index. It must be a pandas datetime object.
 #
@@ -210,15 +205,16 @@
 #   but requires a longer training/inference time.
 
 dict_imputers = {
-    "tabddpm": ImputerDiffusion(
-        model=TabDDPM(num_sampling=5), epochs=10, batch_size=100
+    "tabddpm": ImputerDiffusion(model="TabDDPM", epochs=10, batch_size=100, num_sampling=5
     ),
     "tsddpm": ImputerDiffusion(
-        model=TsDDPM(num_sampling=5, is_rolling=False),
+        model="TsDDPM",
         epochs=10,
         batch_size=5,
         index_datetime="date",
         freq_str="5D",
+        num_sampling=5,
+        is_rolling=False
     ),
 }
 
 
@@ -47,7 +47,7 @@ def __init__(
         beta_start: float = 1e-4,
         beta_end: float = 0.02,
         lr: float = 0.001,
-        ratio_nan: float = 0.1,
+        ratio_masked: float = 0.1,
         dim_embedding: int = 128,
         num_blocks: int = 1,
         p_dropout: float = 0.0,
@@ -67,7 +67,7 @@ def __init__(
             Range of beta (noise scale value), by default 0.02
         lr : float, optional
             Learning rate, by default 0.001
-        ratio_nan : float, optional
+        ratio_masked : float, optional
             Ratio of artificial nan for training and validation, by default 0.1
         dim_embedding : int, optional
             Embedding dimension, by default 128
@@ -119,7 +119,7 @@ def __init__(
         self.loss_func = torch.nn.MSELoss(reduction="none")
 
         self.lr = lr
-        self.ratio_nan = ratio_nan
+        self.ratio_masked = ratio_masked
         self.num_noise_steps = num_noise_steps
         self.dim_embedding = dim_embedding
         self.num_blocks = num_blocks
@@ -132,6 +132,21 @@ def __init__(
         seed_torch = self.random_state.randint(2**31 - 1)
         torch.manual_seed(seed_torch)
 
+    def __getstate__(self) -> str:
+        """Hashing method used in sklearn check tests.
+
+        Returns
+        -------
+        ________
+        str
+            Hashed object containing the underlying model weights
+
+        """
+        state = self.__dict__.copy()
+        if "optimiser" in state:
+            state.pop("optimiser")
+        return state
+
     def _q_sample(
         self, x: torch.Tensor, t: torch.Tensor
     ) -> Tuple[torch.Tensor, torch.Tensor]:
@@ -448,6 +463,9 @@ def fit(
             Return Self
 
         """
+        seed_torch = self.random_state.randint(2**31 - 1)
+        torch.manual_seed(seed_torch)
+
         self.dim_input = len(x.columns)
         self.epochs = epochs
         self.batch_size = batch_size
@@ -486,7 +504,7 @@ def fit(
             # (with one mask)
             # in validation dataset
             x_valid_mask = missing_patterns.UniformHoleGenerator(
-                n_splits=1, ratio_masked=self.ratio_nan
+                n_splits=1, ratio_masked=self.ratio_masked
             ).split(x_valid)[0]
             # x_valid_obs_mask is the mask for observed values
             x_valid_obs_mask = ~x_valid_mask
@@ -520,7 +538,7 @@ def fit(
             for id_batch, (x_batch, mask_x_batch) in enumerate(dataloader):
                 mask_obs_rand = (
                     torch.FloatTensor(mask_x_batch.size()).uniform_()
-                    > self.ratio_nan
+                    > self.ratio_masked
                 )
                 for col in self.cols_idx_not_imputed:
                     mask_obs_rand[:, col] = 0.0
@@ -576,6 +594,8 @@ def predict(self, x: pd.DataFrame) -> pd.DataFrame:
             Imputed data
 
         """
+        seed_torch = self.random_state.randint(2**31 - 1)
+        torch.manual_seed(seed_torch)
         self._eps_model.eval()
 
         x_processed, x_mask, x_indices = self._process_data(
 
@@ -2003,39 +2003,6 @@ def get_model(self, **hyperparams) -> softimpute.SoftImpute:
 
         return model
 
-    # def _fit_element(
-    #     self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
-    # ) -> softimpute.SoftImpute:
-    #     """
-    #     Fits the imputer on `df`, at the group and/or column level depending
-    #     on self.groups and self.columnwise.
-
-    #     Parameters
-    #     ----------
-    #     df : pd.DataFrame
-    #         Dataframe on which the imputer is fitted
-    #     col : str, optional
-    #         Column on which the imputer is fitted, by default "__all__"
-    #     ngroup : int, optional
-    #         Id of the group on which the method is applied
-
-    #     Returns
-    #     -------
-    #     Any
-    #         Return fitted SoftImpute model
-
-    #     Raises
-    #     ------
-    #     NotDataFrame
-    #         Input has to be a pandas.DataFrame.
-    #     """
-    #     self._check_dataframe(df)
-    #     assert col == "__all__"
-    #     hyperparams = self.get_hyperparams()
-    #     model = softimpute.SoftImpute(random_state=self._rng, **hyperparams)
-    #     model = model.fit(df.values)
-    #     return model
-
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame: