Skip to content

Commit

Permalink
Better formatted error message for ancestral alleles
Browse files Browse the repository at this point in the history
  • Loading branch information
hyanwong committed Jan 20, 2024
1 parent 7db1d38 commit af170e1
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
5 changes: 4 additions & 1 deletion tests/test_sgkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,10 @@ def test_ancestral_missingness(tmp_path):
)
ds = sgkit.load_dataset(str(zarr_path) + ".tmp")
sd = tsinfer.SgkitSampleData(str(zarr_path) + ".tmp")
with pytest.warns(UserWarning, match="The following alleles were not found"):
with pytest.warns(
UserWarning,
match="Ancestral alleles not found in the variant_allele array for 3 sites",
):
inf_ts = tsinfer.infer(sd)
for i, (
inf_var,
Expand Down
13 changes: 9 additions & 4 deletions tsinfer/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2405,11 +2405,16 @@ def sites_ancestral_allele(self):
except IndexError:
unknown_alleles[allele] += 1
ret[i] = allele_index
if sum(unknown_alleles.values()) > 0:
tot = sum(unknown_alleles.values())
if tot > 0:
warnings.warn(
"The following alleles were not found in the variant_allele array "
"and will be treated as unknown:\n"
f"{unknown_alleles}"
"Ancestral alleles not found in the variant_allele "
f"array for the {tot} sites ({tot/len(string_allele)*100:.2f}%) "
"listed below. They will be treated as of unknown ancestral state:\n "
+ "\n ".join(
f"'{k}': {v} ({v/len(string_allele)*100:.2f}% of sites)"
for k, v in unknown_alleles.items()
)
)
return ret

Expand Down

0 comments on commit af170e1

Please sign in to comment.