fully passing tests

Arina Danilina · Arina Danilina · commit 4d3c8dbef8a9 · 2024-01-17T17:54:47.000+01:00
diff --git a/src/moscot/base/problems/_mixins.py b/src/moscot/base/problems/_mixins.py
@@ -310,8 +310,9 @@ def _annotation_mapping(
         target: K,
         key: str | None = None,
         forward: bool = True,
-        other_adata: Optional[str] = None,
+        other_adata: str | None = None,
         scale_by_marginals: bool = True,
+        batch_size: int | None = None,
         cell_transition_kwargs: Mapping[str, Any] = types.MappingProxyType({}),
     ) -> pd.DataFrame:
         if mapping_mode == "sum":
@@ -321,37 +322,67 @@ def _annotation_mapping(
             cell_transition_kwargs.setdefault("source", source)
             cell_transition_kwargs.setdefault("target", target)
             cell_transition_kwargs.setdefault("other_adata", other_adata)
-            cell_transition_kwargs.setdefault("forward", forward)
+            cell_transition_kwargs.setdefault("forward", not forward)
             if forward:
-                cell_transition_kwargs.setdefault("source_groups", None)
-                cell_transition_kwargs.setdefault("target_groups", annotation_label)
-                axis = 1  # columns
-            else:
                 cell_transition_kwargs.setdefault("source_groups", annotation_label)
                 cell_transition_kwargs.setdefault("target_groups", None)
                 axis = 0  # rows
+            else:
+                cell_transition_kwargs.setdefault("source_groups", None)
+                cell_transition_kwargs.setdefault("target_groups", annotation_label)
+                axis = 1  # columns
             out: pd.DataFrame = self._cell_transition(**cell_transition_kwargs)
             return out.idxmax(axis=axis).to_frame(name=annotation_label)
         if mapping_mode == "max":
+            out = []
             if forward:
                 source_df = _get_df_cell_transition(
                     self.adata,
                     annotation_keys=[annotation_label],
                     filter_key=key,
                     filter_value=source,
                 )
-                dummy = pd.get_dummies(source_df, prefix="", prefix_sep="")
-                out: ArrayLike = self[(source, target)].push(dummy, scale_by_marginals=scale_by_marginals)
+                out_len = self[(source, target)].solution.shape[1]
+                batch_size = batch_size if batch_size is not None else out_len
+                for batch in range(0, out_len, batch_size):
+                    tm_batch = self.push(
+                        source=source,
+                        target=target,
+                        data=None,
+                        subset=(batch, batch_size),
+                        normalize=True,
+                        return_all=False,
+                        scale_by_marginals=scale_by_marginals,
+                        split_mass=True,
+                        key_added=None,
+                    )
+                    v = np.array(tm_batch.argmax(1))
+                    out.extend(source_df[annotation_label][v[i]] for i in range(len(v)))
+
             else:
                 target_df = _get_df_cell_transition(
                     self.adata if other_adata is None else other_adata,
                     annotation_keys=[annotation_label],
                     filter_key=key,
                     filter_value=target,
                 )
-                dummy = pd.get_dummies(target_df, prefix="", prefix_sep="")
-                out: ArrayLike = self[(source, target)].pull(dummy, scale_by_marginals=scale_by_marginals)
-            categories = pd.Categorical([dummy.columns[i] for i in np.array(out.argmax(1))])
+                out_len = self[(source, target)].solution.shape[0]
+                batch_size = batch_size if batch_size is not None else out_len
+                for batch in range(0, out_len, batch_size):
+                    tm_batch = self.pull(
+                        source=source,
+                        target=target,
+                        data=None,
+                        subset=(batch, batch_size),
+                        normalize=True,
+                        return_all=False,
+                        scale_by_marginals=scale_by_marginals,
+                        split_mass=True,
+                        key_added=None,
+                    )
+                    v = np.array(tm_batch.argmax(1))
+                    out.extend(target_df[annotation_label][v[i]] for i in range(len(v)))
+            categories = pd.Categorical(out)
             return pd.DataFrame(categories, columns=[annotation_label])
         raise NotImplementedError(f"Mapping mode `{mapping_mode!r}` is not yet implemented.")
 
@@ -507,7 +538,7 @@ def _cell_aggregation_transition(
         if batch_size is None:
             batch_size = len(df_2)
         for batch in range(0, len(df_2), batch_size):
-            result = func(  # TODO(@MUCDK) check how to make compatiAnalysisMixinProtocolcelltyble with all policies
+            result = func(  # TODO(@MUCDK) check how to make compatible with all policies
                 source=source,
                 target=target,
                 data=None,
diff --git a/src/moscot/problems/cross_modality/_mixins.py b/src/moscot/problems/cross_modality/_mixins.py
@@ -202,7 +202,7 @@ def annotation_mapping(
             target=target,
             key=self.batch_key,
             forward=forward,
-            other_adata=self.adata_tgt if forward else self.adata_src,
+            other_adata=self.adata_tgt,
             scale_by_marginals=scale_by_marginals,
             cell_transition_kwargs=cell_transition_kwargs,
         )
diff --git a/src/moscot/problems/space/_mixins.py b/src/moscot/problems/space/_mixins.py
@@ -604,26 +604,12 @@ def annotation_mapping(
         scale_by_marginals: bool = True,
         cell_transition_kwargs: Mapping[str, Any] = types.MappingProxyType({}),
     ) -> pd.DataFrame:
-        """
-
-        Notes
-        -----
-        If forward is True, it means that the annotation columns (annotation label) needs to be in the target adata,
-        If forward is False, it means that the annotation column (annotation label) needs to be in the source adata.
-        """
-        cell_transition_kwargs = dict(cell_transition_kwargs)
-        if forward:
-            cell_transition_kwargs.setdefault("source_groups", annotation_label)
-            cell_transition_kwargs.setdefault("target_groups", None)
-        else:
-            cell_transition_kwargs.setdefault("source_groups", None)
-            cell_transition_kwargs.setdefault("target_groups", annotation_label)
         return self._annotation_mapping(
             mapping_mode=mapping_mode,
             annotation_label=annotation_label,
             source=source,
             target=target,
-            forward=not forward if mapping_mode == "sum" else forward,
+            forward=forward,
             key=self.batch_key,
             other_adata=self.adata_sc,
             scale_by_marginals=scale_by_marginals,
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,5 +1,5 @@
 from math import cos, sin
-from typing import Literal, Optional, Tuple
+from typing import Literal, Optional, Tuple, Union
 
 import pytest
 
@@ -211,15 +211,22 @@ def adata_translation_split(adata_translation) -> Tuple[AnnData, AnnData]:
 @pytest.fixture()
 def adata_anno(
     problem_kind: Literal["temporal", "cross_modality", "alignment", "mapping"],
-    # forward: bool
-) -> AnnData | Tuple[AnnData, AnnData]:
+) -> Union[AnnData, Tuple[AnnData, AnnData]]:
     rng = np.random.RandomState(31)
     adata_src = AnnData(X=csr_matrix(rng.normal(size=(10, 60))))
-    adata_src.obs["celltype"] = _gt_source_annotation
-    adata_src.obs["celltype"] = adata_src.obs["celltype"].astype("category")
-    adata_src.uns["expected_max"] = _gt_target_max_annotation
-    adata_src.uns["expected_sum"] = _gt_target_sum_annotation
+    rng_src = rng.choice(["A", "B", "C"], size=5).tolist()
+    adata_src.obs["celltype1"] = ["C", "C", "A", "B", "B"] + rng_src
+    adata_src.obs["celltype1"] = adata_src.obs["celltype1"].astype("category")
+    adata_src.uns["expected_max1"] = ["C", "C", "A", "B", "B"] + rng_src + rng_src
+    adata_src.uns["expected_sum1"] = ["C", "C", "B", "B", "B"] + rng_src + rng_src
+
     adata_tgt = AnnData(X=csr_matrix(rng.normal(size=(15, 60))))
+    rng_tgt = rng.choice(["A", "B", "C"], size=5).tolist()
+    adata_tgt.obs["celltype2"] = ["C", "C", "A", "B", "B"] + rng_tgt + rng_tgt
+    adata_tgt.obs["celltype2"] = adata_tgt.obs["celltype2"].astype("category")
+    adata_tgt.uns["expected_max2"] = ["C", "C", "A", "B", "B"] + rng_tgt
+    adata_tgt.uns["expected_sum2"] = ["C", "C", "B", "B", "B"] + rng_tgt
+
     if problem_kind == "cross_modality":
         adata_src.obs["batch"] = "0"
         adata_tgt.obs["batch"] = "1"
@@ -228,32 +235,33 @@ def adata_anno(
         sc.pp.pca(adata_src)
         sc.pp.pca(adata_tgt)
         return adata_src, adata_tgt
-    if problem_kind in ["alignment", "mapping"]:
+    if problem_kind == "mapping":
+        adata_src.obs["batch"] = "0"
+        adata_tgt.obs["batch"] = "1"
+        sc.pp.pca(adata_src)
+        sc.pp.pca(adata_tgt)
+        adata_tgt.obsm["spatial"] = rng.normal(size=(adata_tgt.n_obs, 2))
+        return adata_src, adata_tgt
+    if problem_kind == "alignment":
         adata_src.obsm["spatial"] = rng.normal(size=(adata_src.n_obs, 2))
         adata_tgt.obsm["spatial"] = rng.normal(size=(adata_tgt.n_obs, 2))
     key = "day" if problem_kind == "temporal" else "batch"
-    adatas = [adata_src, adata_tgt]  # if forward else [adata_tgt, adata_src]
+    adatas = [adata_src, adata_tgt]
     adata = ad.concat(adatas, join="outer", label=key, index_unique="-", uns_merge="unique")
     adata.obs[key] = (pd.to_numeric(adata.obs[key]) if key == "day" else adata.obs[key]).astype("category")
     adata.layers["counts"] = adata.X.A
     sc.pp.pca(adata)
     return adata
 
 
-_gt_source_annotation = np.array(["A", "A", "B", "A", "B", "C", "A", "A", "A", "A"], dtype="U1")
-
-_gt_target_max_annotation = np.array(["A", "A", "B", "A", "B", "C", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
-
-_gt_target_sum_annotation = np.array(["A", "A", "B", "A", "B", "C", "A", "A", "A", "A", "A", "A", "A", "A", "A"])
-
-
 @pytest.fixture()
 def gt_tm_annotation() -> np.ndarray:
     tm = np.zeros((10, 15))
     for i in range(10):
         tm[i][i] = 1
     for i in range(10, 15):
-        tm[0][i] = 0.3
-        tm[1][i] = 0.3
-        tm[2][i] = 0.4
+        tm[i-5][i] = 1
+    for j in range(2,5):
+        for i in range(2,5):
+            tm[i][j] = 0.3 if i != j else 0.4
     return tm
diff --git a/tests/problems/cross_modality/test_mixins.py b/tests/problems/cross_modality/test_mixins.py
@@ -108,29 +108,28 @@ def test_cell_transition_pipeline(
             pd.testing.assert_frame_equal(result1, result2)
 
     @pytest.mark.fast()
-    @pytest.mark.parametrize("forward", [True])  # , False])
-    @pytest.mark.parametrize(
-        "mapping_mode",
-        [
-            "max",
-        ],
-    )  # "sum"])
+    @pytest.mark.parametrize("forward", [True, False])
+    @pytest.mark.parametrize("mapping_mode",["max", "sum"])
     @pytest.mark.parametrize("problem_kind", ["cross_modality"])
     def test_annotation_mapping(
         self, adata_anno: Tuple[AnnData, AnnData], forward: bool, mapping_mode, gt_tm_annotation
     ):
-        rng = np.random.RandomState(0)
         adata_src, adata_tgt = adata_anno
         tp = TranslationProblem(adata_src, adata_tgt)
         tp = tp.prepare(src_attr="emb_src", tgt_attr="emb_tgt")
         problem_keys = ("src", "tgt")
         assert set(tp.problems.keys()) == {problem_keys}
         tp[problem_keys].set_solution(MockSolverOutput(gt_tm_annotation), overwrite=True)
-
+        annotation_label = "celltype1" if forward else "celltype2"
         result = tp.annotation_mapping(
             mapping_mode=mapping_mode,
-            annotation_label="celltype",
+            annotation_label=annotation_label,
             forward=forward,
+            source="src",
+            target="tgt"
         )
-        expected_result = adata_src.uns["expected_max"] if mapping_mode == "max" else adata_src.uns["expected_sum"]
-        assert (result["celltype"] == expected_result).all()
+        if forward:
+            expected_result = adata_src.uns["expected_max1"] if mapping_mode == "max" else adata_src.uns["expected_sum1"]
+        else:
+            expected_result = adata_tgt.uns["expected_max2"] if mapping_mode == "max" else adata_tgt.uns["expected_sum2"]
+        assert (result[annotation_label] == expected_result).all()
diff --git a/tests/problems/space/test_mixins.py b/tests/problems/space/test_mixins.py
@@ -93,6 +93,29 @@ def test_cell_transition_pipeline(self, adata_space_rotate: AnnData, forward: bo
         assert isinstance(result, pd.DataFrame)
         assert result.shape == (3, 3)
 
+    @pytest.mark.fast()
+    @pytest.mark.parametrize("forward", [True, False])
+    @pytest.mark.parametrize("mapping_mode", ["max", "sum"])
+    @pytest.mark.parametrize("problem_kind", ["alignment"])
+    def test_annotation_mapping(self, adata_anno: AnnData, forward: bool, mapping_mode, gt_tm_annotation):
+        ap = AlignmentProblem(adata=adata_anno)
+        ap = ap.prepare(batch_key="batch", joint_attr={"attr": "X"})
+        problem_keys = ("0", "1")
+        assert set(ap.problems.keys()) == {problem_keys}
+        ap[problem_keys].set_solution(MockSolverOutput(gt_tm_annotation))
+        annotation_label = "celltype1" if forward else "celltype2"
+        result = ap.annotation_mapping(
+            mapping_mode=mapping_mode,
+            annotation_label=annotation_label,
+            source="0",
+            target="1",
+            forward=forward,
+        )
+        if forward:
+            expected_result = adata_anno.uns["expected_max1"] if mapping_mode == "max" else adata_anno.uns["expected_sum1"]
+        else:
+            expected_result = adata_anno.uns["expected_max2"] if mapping_mode == "max" else adata_anno.uns["expected_sum2"]
+        assert (result[annotation_label] == expected_result).all()
 
 class TestSpatialMappingAnalysisMixin:
     @pytest.mark.parametrize("sc_attr", [{"attr": "X"}, {"attr": "obsm", "key": "X_pca"}])
@@ -177,28 +200,25 @@ def test_cell_transition_pipeline(self, adata_mapping: AnnData, forward: bool, n
         assert result.shape == (3, 4)
 
     @pytest.mark.fast()
-    @pytest.mark.parametrize(
-        "forward",
-        [
-            False,
-        ],
-    )  # True])
+    @pytest.mark.parametrize("forward", [True, False])
     @pytest.mark.parametrize("mapping_mode", ["max", "sum"])
     @pytest.mark.parametrize("problem_kind", ["mapping"])
     def test_annotation_mapping(self, adata_anno: AnnData, forward: bool, mapping_mode, gt_tm_annotation):
-        rng = np.random.RandomState(0)
-        adataref, adatasp = _adata_spatial_split(adata_anno)
+        adataref, adatasp = adata_anno
         mp = MappingProblem(adataref, adatasp)
         mp = mp.prepare(sc_attr={"attr": "obsm", "key": "X_pca"}, joint_attr={"attr": "X"})
         problem_keys = ("src", "tgt")
         assert set(mp.problems.keys()) == {problem_keys}
         mp[problem_keys].set_solution(MockSolverOutput(gt_tm_annotation.T))
-
+        annotation_label = "celltype1" if not forward else "celltype2"
         result = mp.annotation_mapping(
             mapping_mode=mapping_mode,
-            annotation_label="celltype",
+            annotation_label=annotation_label,
             source="src",
             forward=forward,
         )
-        expected_result = adataref.uns["expected_max"] if mapping_mode == "max" else adataref.uns["expected_sum"]
-        assert (result["celltype"] == expected_result).all()
+        if not forward:
+            expected_result = adataref.uns["expected_max1"] if mapping_mode == "max" else adataref.uns["expected_sum1"]
+        else:
+            expected_result = adatasp.uns["expected_max2"] if mapping_mode == "max" else adatasp.uns["expected_sum2"]
+        assert (result[annotation_label] == expected_result).all()
diff --git a/tests/problems/time/test_mixins.py b/tests/problems/time/test_mixins.py
@@ -51,25 +51,24 @@ def test_cell_transition_full_pipeline(self, gt_temporal_adata: AnnData, forward
         np.testing.assert_allclose(present_cell_type_marginal, 1.0)
 
     @pytest.mark.fast()
-    @pytest.mark.parametrize(
-        "forward",
-        [
-            True,
-        ],
-    )  # False])
-    @pytest.mark.parametrize("mapping_mode", ["max"])  # , "sum"])
+    @pytest.mark.parametrize("forward",[True, False])
+    @pytest.mark.parametrize("mapping_mode", ["max", "sum"])
     @pytest.mark.parametrize("problem_kind", ["temporal"])
     def test_annotation_mapping(self, adata_anno: AnnData, forward: bool, mapping_mode, gt_tm_annotation):
         problem = TemporalProblem(adata_anno)
         problem_keys = (0, 1)
         problem = problem.prepare(time_key="day", joint_attr="X_pca")
         assert set(problem.problems.keys()) == {problem_keys}
         problem[problem_keys]._solution = MockSolverOutput(gt_tm_annotation)
+        annotation_label = "celltype1" if forward else "celltype2"
         result = problem.annotation_mapping(
-            mapping_mode=mapping_mode, annotation_label="celltype", forward=forward, source=0, target=1
-        )
-        expected_result = adata_anno.uns["expected_max"] if mapping_mode == "max" else adata_anno.uns["expected_sum"]
-        assert (result["celltype"] == expected_result).all()
+            mapping_mode=mapping_mode, annotation_label=annotation_label, forward=forward, source=0, target=1
+            )
+        if forward:
+            expected_result = adata_anno.uns["expected_max1"] if mapping_mode == "max" else adata_anno.uns["expected_sum1"]
+        else:
+            expected_result = adata_anno.uns["expected_max2"] if mapping_mode == "max" else adata_anno.uns["expected_sum2"]
+        assert (result[annotation_label] == expected_result).all()
 
     @pytest.mark.fast()
     @pytest.mark.parametrize("forward", [True, False])

Original file line number	Diff line number	Diff line change
`@@ -202,7 +202,7 @@ def annotation_mapping(`
`202`	`202`	`target=target,`
`203`	`203`	`key=self.batch_key,`
`204`	`204`	`forward=forward,`
`205`		`- other_adata=self.adata_tgt if forward else self.adata_src,`
	`205`	`+ other_adata=self.adata_tgt,`
`206`	`206`	`scale_by_marginals=scale_by_marginals,`
`207`	`207`	`cell_transition_kwargs=cell_transition_kwargs,`
`208`	`208`	`)`