Skip to content

Commit 335cdad

Browse files
authored
Validate GT value in stats ingestion (#546)
1 parent 267c8de commit 335cdad

File tree

3 files changed

+34
-2
lines changed

3 files changed

+34
-2
lines changed

libtiledbvcf/src/stats/allele_count.cc

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -407,9 +407,24 @@ void AlleleCount::process(
407407
}
408408

409409
int gt0 = bcf_gt_allele(dst_[0]);
410-
int gt1 = bcf_gt_allele(dst_[1]);
410+
int gt1 = ngt == 2 ? bcf_gt_allele(dst_[1]) : 0;
411411
int gt0_missing = bcf_gt_is_missing(dst_[0]);
412-
int gt1_missing = bcf_gt_is_missing(dst_[1]);
412+
int gt1_missing = ngt == 2 ? bcf_gt_is_missing(dst_[1]) : 1;
413+
int n_allele = rec->n_allele;
414+
415+
// Skip if GT value is not a valid allele
416+
if (gt0 >= n_allele || gt1 >= n_allele) {
417+
LOG_WARN(
418+
"AlleleCount: skipping invalid GT value: sample={} locus={}:{} "
419+
"gt={}/{} n_allele={}",
420+
sample_name,
421+
contig,
422+
pos + 1,
423+
gt0,
424+
gt1,
425+
n_allele);
426+
return;
427+
}
413428

414429
// Only haploid and diploid are supported
415430
if (ngt > 2) {

libtiledbvcf/src/stats/variant_stats.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,21 @@ void VariantStats::process(
410410
int gt1 = ngt == 2 ? bcf_gt_allele(dst_[1]) : 0;
411411
int gt0_missing = bcf_gt_is_missing(dst_[0]);
412412
int gt1_missing = ngt == 2 ? bcf_gt_is_missing(dst_[1]) : 1;
413+
int n_allele = rec->n_allele;
414+
415+
// Skip if GT value is not a valid allele
416+
if (gt0 >= n_allele || gt1 >= n_allele) {
417+
LOG_WARN(
418+
"VariantStats: skipping invalid GT value: sample={} locus={}:{} "
419+
"gt={}/{} n_allele={}",
420+
sample_name,
421+
contig,
422+
pos + 1,
423+
gt0,
424+
gt1,
425+
n_allele);
426+
return;
427+
}
413428

414429
// Skip if alleles are missing
415430
if (ngt == 1) {

libtiledbvcf/test/inputs/stats/first.vcf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,5 @@ chr1 3 . C A,G 10.032 . ExcessHet=2.0134 GT 2/2
1313
chr1 4 . G GTTTA,<NON_REF> 1042.73 . ExcessHet=4.8532 GT 1/1
1414
chr2 1 . G GTTTA 1042.73 . ExcessHet=4.8532 GT ./1
1515
chr2 1 . G GTTTA 1042.73 . ExcessHet=4.8532 GT 1/.
16+
chr2 3 . G GTTTA 1042.73 . ExcessHet=4.8532 GT 1/1
17+
chrX 3 BadGT G GTTTA 1042.73 . ExcessHet=4.8532 GT 1/2

0 commit comments

Comments
 (0)