Skip to content

Commit fd50a6f

Browse files
committed
Better formatted error message for ancestral alleles
1 parent 7db1d38 commit fd50a6f

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

tests/test_sgkit.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,10 @@ def test_ancestral_missingness(tmp_path):
538538
)
539539
ds = sgkit.load_dataset(str(zarr_path) + ".tmp")
540540
sd = tsinfer.SgkitSampleData(str(zarr_path) + ".tmp")
541-
with pytest.warns(UserWarning, match="The following alleles were not found"):
541+
with pytest.warns(
542+
UserWarning,
543+
match="Ancestral alleles not found in the variant_allele array for the 3 sites",
544+
):
542545
inf_ts = tsinfer.infer(sd)
543546
for i, (
544547
inf_var,

tsinfer/formats.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -2405,11 +2405,16 @@ def sites_ancestral_allele(self):
24052405
except IndexError:
24062406
unknown_alleles[allele] += 1
24072407
ret[i] = allele_index
2408-
if sum(unknown_alleles.values()) > 0:
2408+
tot = sum(unknown_alleles.values())
2409+
if tot > 0:
24092410
warnings.warn(
2410-
"The following alleles were not found in the variant_allele array "
2411-
"and will be treated as unknown:\n"
2412-
f"{unknown_alleles}"
2411+
"Ancestral alleles not found in the variant_allele "
2412+
f"array for the {tot} sites ({tot/len(string_allele)*100:.2f}%) "
2413+
"listed below. They will be treated as of unknown ancestral state:\n "
2414+
+ "\n ".join(
2415+
f"'{k}': {v} ({v/len(string_allele)*100:.2f}% of sites)"
2416+
for k, v in unknown_alleles.items()
2417+
)
24132418
)
24142419
return ret
24152420

0 commit comments

Comments
 (0)