Skip to content

Commit 2fdf149

Browse files
committed
Do not use mismatch in ancestor matching when using infer()
Fixes #980
1 parent 0a83c7b commit 2fdf149

File tree

3 files changed

+24
-8
lines changed

3 files changed

+24
-8
lines changed

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
- Document that the `zarr-vcf` dataset can be either a path or an in-memory zarr group.
88
(feature introduced in {pr}`966`, documented in {pr}`974`, {user}`hyanwong`)
99

10+
**Breaking Changes**
11+
12+
- If a mismatch ratio is provided to the `infer` command, it only applies during the
13+
`match_samples` phase ({issue}`980`, {pr}`981`, {user}`hyanwong`)
14+
1015
**Fixes**
1116

1217
- Properly account for "N" as an unknown ancestral state, and ban "" from being

tests/test_inference.py

+11
Original file line numberDiff line numberDiff line change
@@ -4309,6 +4309,17 @@ def test_zero_recombination(self):
43094309
extended_checks=True,
43104310
)
43114311

4312+
def test_no_ancestor_mismatch_in_basic_infer(self, small_sd_anc_fixture):
4313+
# Check we are not using mismatch in match_ancestors, by
4314+
# passing a value that fails in the ma phase
4315+
sd, anc = small_sd_anc_fixture
4316+
rho = 0
4317+
with pytest.raises(_tsinfer.MatchImpossible):
4318+
# rho=0 fails if mismatch is used in match_ancestors
4319+
tsinfer.match_ancestors(sd, anc, recombination_rate=rho)
4320+
for e in [tsinfer.PY_ENGINE, tsinfer.C_ENGINE]:
4321+
tsinfer.infer(sd, recombination_rate=rho, engine=e)
4322+
43124323

43134324
class TestAlgorithmResults:
43144325
"""

tsinfer/inference.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -295,8 +295,8 @@ def infer(
295295
and ``path_compression``.
296296
297297
.. note::
298-
For finer grained control over inference, for example to set different mismatch
299-
ratios when matching ancestors versus samples, run
298+
For finer grained control over inference, for example to set mismatch
299+
ratios when matching ancestors as well as when matching samples, run
300300
:func:`tsinfer.generate_ancestors`, :func:`tsinfer.match_ancestors` and
301301
:func:`tsinfer.match_samples` separately.
302302
@@ -305,14 +305,14 @@ def infer(
305305
:param recombination_rate: Either a floating point value giving a constant rate
306306
:math:`\\rho` per unit length of genome, or an :class:`msprime.RateMap`
307307
object. This is used to calculate the probability of recombination between
308-
adjacent sites. If ``None``, all matching conflicts are resolved by
309-
recombination and all inference sites will have a single mutation
310-
(equivalent to mismatch_ratio near zero)
308+
adjacent sites in the match_samples stage. If ``None``, all matching
309+
conflicts are resolved by recombination and all inference sites will have
310+
a single mutation (equivalent to mismatch_ratio near zero).
311311
:type recombination_rate: float, msprime.RateMap
312312
:param float mismatch_ratio: The probability of a mismatch relative to the median
313313
probability of recombination between adjacent sites: can only be used if a
314314
recombination rate has been set (default: ``None`` treated as 1 if
315-
``recombination_rate`` is set).
315+
``recombination_rate`` is set). This is only applied in the match_samples stage.
316316
:param bool path_compression: Whether to merge edges that share identical
317317
paths (essentially taking advantage of shared recombination breakpoints).
318318
:param bool post_process: Whether to run the :func:`post_process` method on the
@@ -348,13 +348,13 @@ def infer(
348348
progress_monitor=progress_monitor,
349349
record_provenance=False,
350350
)
351+
# NB: do not pass or encourage use of the mismatch ratio / recombination rate in
352+
# the ancestor matching phase. See https://github.com/tskit-dev/tsinfer/issues/980
351353
ancestors_ts = match_ancestors(
352354
sample_data,
353355
ancestor_data,
354356
engine=engine,
355357
num_threads=num_threads,
356-
recombination_rate=recombination_rate,
357-
mismatch_ratio=mismatch_ratio,
358358
precision=precision,
359359
path_compression=path_compression,
360360
progress_monitor=progress_monitor,

0 commit comments

Comments
 (0)