Merge pull request #4 from HiDiHlabs/dev

shashwatsahay · web-flow · commit c7455c0541b1 · 2024-04-16T14:51:14.000+02:00
Dev
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@
 [![Checked with mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/)
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
 
-`multispaeti` is an implementation of
+`multispaeti` is a Python implementation of
 [MULTISPATI-PCA](https://doi.org/10.3170/2007-8-18312) /
 [spatialPCA](https://doi.org/10.1038/hdy.2008.34).
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -34,7 +34,7 @@
 
 autodoc_typehints = "none"
 autodoc_typehints_format = "short"
-autoclass_content = "both"
+autoclass_content = "class"
 autodoc_member_order = "groupwise"
 
 python_use_unqualified_type_names = True  # still experimental
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -1,7 +1,7 @@
 What is ``multiSPAETI``?
 ========================
 
-``multispaeti`` is an  implementation of
+``multispaeti`` is a Python implementation of
 `MULTISPATI-PCA <https://doi.org/10.3170/2007-8-18312>`_ /
 `spatialPCA <https://doi.org/10.1038/hdy.2008.34>`_.
 
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
@@ -51,10 +51,3 @@ Alternatively, this can be achieved in one step by
 .. .. code-block:: python
 
 ..     X_transformed = msPCA.moransI_bounds()
-
-.. and :py:meth:`multispaeti.MultispatiPCA.variance_moransI_decomposition` which allows
-.. to decompose the extracted eigenvalues into a variance and Moran's `I` contribution.
-
-.. .. code-block:: python
-
-..     var, moranI = msPCA.variance_moransI_decomposition(X)
diff --git a/multispaeti/_multispati_pca.py b/multispaeti/_multispati_pca.py
@@ -31,6 +31,53 @@ class MultispatiPCA:
     :math:`H=1/(2n)*X^t(W+W^t)X` where `X` is matrix of `n` observations :math:`\\times`
     `d` features, and `W` is a matrix of the connectivity between observations.
 
+    Parameters
+    ----------
+    n_components : int or tuple[int, int], optional
+        Number of components to keep.
+        If None, will keep all components (only supported for non-sparse `X`).
+        If an int, it will keep the top `n_components`.
+        If a tuple, it will keep the top and bottom `n_components` respectively.
+    connectivity : scipy.sparse.sparray or scipy.sparse.spmatrix
+        Matrix of row-wise neighbor definitions i.e. c\ :sub:`ij` is the connectivity of
+        i :math:`\\to` j. The matrix does not have to be symmetric. It can be a
+        binary adjacency matrix or a matrix of connectivities in which case
+        c\ :sub:`ij` should be larger if i and j are close.
+        A distance matrix should be transformed to connectivities by e.g.
+        calculating :math:`1-d/d_{max}` beforehand.
+
+    Raises
+    ------
+    ValueError
+        If connectivity is not a square matrix.
+    ZeroDivisionError
+        If one of the observations has no neighbors.
+
+    Attributes
+    ----------
+    components_ : numpy.ndarray
+        The estimated components: Array of shape `(n_components, n_features)`.
+
+    variance_ : numpy.ndarray
+        The estimated variance part of the eigenvalues. Array of shape `(n_components,)`.
+
+    moransI_ : numpy.ndarray
+        The estimated Moran's I part of the eigenvalues. Array of shape `(n_components,)`.
+
+    eigenvalues_ : numpy.ndarray
+        The eigenvalues corresponding to each of the selected components. Array of shape
+        `(n_components,)`.
+
+    n_components_ : int
+        The estimated number of components.
+
+    n_samples_ : int
+        Number of samples in the training data.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+
     References
     ----------
     `Dray, Stéphane, Sonia Saïd, and Françis Débias. "Spatial ordination of vegetation
@@ -47,29 +94,6 @@ def __init__(
         *,
         connectivity: sparray | spmatrix,
     ):
-        """
-        Parameters
-        ----------
-
-        n_components : int or tuple[int, int], optional
-            Number of components to keep.
-            If None, will keep all components (only supported for non-sparse `X`).
-            If an int, it will keep the top `n_components`.
-            If a tuple, it will keep the top and bottom `n_components` respectively.
-        connectivity : scipy.sparse.sparray or scipy.sparse.spmatrix
-            Matrix of row-wise neighbor definitions i.e. c\ :sub:`ij` is the connectivity of
-            i :math:`\\to` j. The matrix does not have to be symmetric. It can be a
-            binary adjacency matrix or a matrix of connectivities in which case
-            c\ :sub:`ij` should be larger if i and j are close.
-            A distance matrix should be transformed to connectivities by e.g.
-            calculating :math:`1-d/d_{max}` beforehand.
-        Raises
-        ------
-        ValueError
-            If connectivity is not a square matrix.
-        ZeroDivisionError
-            If one of the observations has no neighbors.
-        """
         self._fitted = False
         W = csr_array(connectivity)
         if W.shape[0] != W.shape[1]:
@@ -247,7 +271,10 @@ def transform(self, X: _X) -> np.ndarray:
             self._not_fitted()
 
         X = scale(X, with_mean=not issparse(X))
-        return X @ self.components_
+        X_t = X @ self.components_
+        self.variance_, self.moransI_ = self._variance_moransI_decomposition(X_t)
+
+        return X_t
 
     def fit_transform(self, X: _X) -> np.ndarray:
         """
@@ -291,36 +318,17 @@ def transform_spatial_lag(self, X: _X) -> np.ndarray:
     def _spatial_lag(self, X: np.ndarray) -> np.ndarray:
         return self.W @ X
 
-    def variance_moransI_decomposition(self, X: _X) -> tuple[np.ndarray, np.ndarray]:
-        """
-        Calculate the decomposition of the variance and Moran's I for `n_components`.
-
-        Parameters
-        ----------
-        X : numpy.ndarray or scipy.sparse.csr_array or scipy.sparse.csc_array
-            Array of observations x features.
-
-        Returns
-        -------
-        tuple[numpy.ndarray, numpy.ndarray]
-            Variance and Moran's I.
-
-        Raises
-        ------
-        ValueError
-            If instance has not been fitted.
-        """
-        if not self._fitted:
-            self._not_fitted()
-        transformed = self.transform(X)
-        lag = self._spatial_lag(transformed)
+    def _variance_moransI_decomposition(
+        self, X_t: np.ndarray
+    ) -> tuple[np.ndarray, np.ndarray]:
+        lag = self._spatial_lag(X_t)
 
         # vector of row_Weights from dudi.PCA
-        # (we only use default row_weights i.e. 1/n anyways)
-        w = 1 / X.shape[0]
+        # (we only use default row_weights i.e. 1/n)
+        w = 1 / X_t.shape[0]
 
-        variance = np.sum(transformed * transformed * w, axis=0)
-        moran = np.sum(transformed * lag * w, axis=0) / variance
+        variance = np.sum(X_t * X_t * w, axis=0)
+        moran = np.sum(X_t * lag * w, axis=0) / variance
 
         return variance, moran
 
diff --git a/multispaeti/_plotting.py b/multispaeti/_plotting.py
@@ -13,6 +13,9 @@ def plot_eigenvalues(msPCA: MultispatiPCA, *, n_top: int | None = None) -> Figur
     Parameters
     ----------
     msPCA : MultispatiPCA
+        An instance of MultispatiPCA that has already been used for
+        :py:meth:`multispaeti.MultispatiPCA.fit` so that eigenvalues have already been
+        calculated.
     n_top : int, optional
         Plot the `n_top` highest and `n_top` lowest eigenvalues in a zoomed in view.
 
@@ -52,7 +55,7 @@ def plot_eigenvalues(msPCA: MultispatiPCA, *, n_top: int | None = None) -> Figur
 
 
 def plot_variance_moransI_decomposition(
-    msPCA: MultispatiPCA, X, *, sparse_approx: bool = True, **kwargs
+    msPCA: MultispatiPCA, *, sparse_approx: bool = True, **kwargs
 ) -> Figure:
     """
     Plot the decomposition of variance and Moran's I of the MULTISPATI-PCA eigenvalues.
@@ -63,8 +66,9 @@ def plot_variance_moransI_decomposition(
     Parameters
     ----------
     msPCA : multispaeti.MultispatiPCA
-    X : numpy.ndarray or scipy.sparse.csr_array or scipy.sparse.csc_array
-        TODO Data to calculate the decomposition for.
+        An instance of MultispatiPCA that has already been used for
+        :py:meth:`multispaeti.MultispatiPCA.transform` so that variance and Moran's I
+        contributions to the eigenvalues have already been calculated.
     sparse_approx : bool
         Whether to use a sparse approximation to calculate the decomposition.
 
@@ -73,11 +77,10 @@ def plot_variance_moransI_decomposition(
     matplotlib.figure.Figure
     """
 
-    variance, moranI = msPCA.variance_moransI_decomposition(X)
     I_min, I_max, I_0 = msPCA.moransI_bounds(sparse_approx=sparse_approx)
 
     fig, ax = plt.subplots(1)
-    _ = ax.scatter(x=variance, y=moranI, **kwargs)
+    _ = ax.scatter(x=msPCA.variance_, y=msPCA.moransI_, **kwargs)
 
     plt.axhline(y=I_0, ls="--")
     plt.axhline(y=I_min, ls="--")