Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add parameter for numerical stability in conditional entropy #658

Merged
merged 2 commits into from
Feb 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/moscot/base/problems/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,7 @@ def compute_entropy(
forward: bool = True,
key_added: Optional[str] = "conditional_entropy",
batch_size: Optional[int] = None,
c: float = 0.0,
) -> Optional[pd.DataFrame]:
"""Compute the conditional entropy per cell.

Expand All @@ -685,6 +686,8 @@ def compute_entropy(
Key in :attr:`~anndata.AnnData.obs` where the entropy is stored.
batch_size
Batch size for the computation of the entropy. If :obj:`None`, the entire dataset is used.
c
Constant added to each row of the transport matrix to avoid numerical instability.

Returns
-------
Expand All @@ -710,7 +713,7 @@ def compute_entropy(
split_mass=True,
key_added=None,
)
df.iloc[range(batch, min(batch + batch_size, len(df))), 0] = _compute_conditional_entropy(cond_dists) # type: ignore[arg-type]
df.iloc[range(batch, min(batch + batch_size, len(df))), 0] = _compute_conditional_entropy(cond_dists + c) # type: ignore[arg-type, operator]
if key_added is not None:
self.adata.obs[key_added] = df
return df if key_added is None else None
2 changes: 1 addition & 1 deletion src/moscot/base/problems/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,5 +721,5 @@ def _get_n_cores(n_cores: Optional[int], n_jobs: Optional[int]) -> int:
return n_cores


def _compute_conditional_entropy(p_xy: ArrayLike) -> ArrayLike:
def _compute_conditional_entropy(p_xy: ArrayLike, c: float = 0.0) -> ArrayLike:
return -np.sum(p_xy * np.log(p_xy / p_xy.sum(axis=0)), axis=0)
7 changes: 5 additions & 2 deletions tests/problems/generic/test_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,9 @@ def test_compute_feature_correlation_transcription_factors(
@pytest.mark.parametrize("forward", [True, False])
@pytest.mark.parametrize("key_added", [None, "test"])
@pytest.mark.parametrize("batch_size", [None, 2])
@pytest.mark.parametrize("c", [0.0, 0.1])
def test_compute_entropy_pipeline(
self, adata_time: AnnData, forward: bool, key_added: Optional[str], batch_size: int
self, adata_time: AnnData, forward: bool, key_added: Optional[str], batch_size: int, c: float
):
rng = np.random.RandomState(42)
adata_time = adata_time[adata_time.obs["time"].isin((0, 1))].copy()
Expand All @@ -295,7 +296,9 @@ def test_compute_entropy_pipeline(
problem = problem.prepare(key="time", xy_callback="local-pca", policy="sequential")
problem[0, 1]._solution = MockSolverOutput(tmap)

out = problem.compute_entropy(source=0, target=1, forward=forward, key_added=key_added, batch_size=batch_size)
out = problem.compute_entropy(
source=0, target=1, forward=forward, key_added=key_added, batch_size=batch_size, c=c
)
if key_added is None:
assert isinstance(out, pd.DataFrame)
assert len(out) == n0
Expand Down
Loading