Skip to content

Commit 6a0fb00

Browse files
committed
update to have proper binary representation
1 parent 5177190 commit 6a0fb00

File tree

4 files changed

+34
-11
lines changed

4 files changed

+34
-11
lines changed

Diff for: htslib/vcf.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -1617,12 +1617,14 @@ static inline int bcf_format_gt1(const bcf_hdr_t *hdr, bcf_fmt_t *fmt, int isamp
16171617
need to specify explicitly */
16181618
e |= (ploidy > 1 && anyunphased) ?
16191619
(kputc('|', &tmp2) < 0) :
1620-
0;
1620+
(ploidy <= 1 && !((val0 >> 1)) ? //|. needs explicit o/p
1621+
(kputc('|', &tmp2) < 0) :
1622+
0);
16211623
} else {
16221624
/* 1st allele is unphased, if ploidy is = 1 or allele is '.' or
16231625
ploidy > 1 and no other unphased allele exist, need to specify
16241626
explicitly */
1625-
e |= ((ploidy <= 1) || (ploidy > 1 && !anyunphased)) ?
1627+
e |= ((ploidy <= 1 && val0 != 0) || (ploidy > 1 && !anyunphased)) ?
16261628
(kputc('/', &tmp2) < 0) :
16271629
0;
16281630
}

Diff for: test/vcf44_1.expected

+11-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
##fileformat=VCFv4.4
22
##FILTER=<ID=PASS,Description="All filters passed">
33
##contig=<ID=1,length=1000>
4+
##reference=file://test
45
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
56
##failue="test file on explicit and implicit phasing markers in 4.4"
67
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097
@@ -22,6 +23,13 @@
2223
1 61494 rs56992752 T A 100 PASS . GT /0|0 1/0
2324
1 61495 rs56992752 T A 100 PASS . GT 0|0 |1/0
2425
1 61496 rs56992752 T A 100 PASS . GT . .
25-
1 61497 rs56992752 T A 100 PASS . GT ./1 .|1
26-
1 61498 rs56992752 T A 100 PASS . GT 1/. 1|.
27-
1 61499 rs56992752 T A 100 PASS . GT ./. .|.
26+
1 61497 rs56992752 T A 100 PASS . GT . |.
27+
1 61498 rs56992752 T A 100 PASS . GT ./1 .|1
28+
1 61499 rs56992752 T A 100 PASS . GT ./1 .|1
29+
1 61500 rs56992752 T A 100 PASS . GT |./1 /.|1
30+
1 61501 rs56992752 T A 100 PASS . GT 1/. 1|.
31+
1 61502 rs56992752 T A 100 PASS . GT 1/. /1|.
32+
1 61503 rs56992752 T A 100 PASS . GT |1/. 1|.
33+
1 61504 rs56992752 T A 100 PASS . GT ./. .|.
34+
1 61505 rs56992752 T A 100 PASS . GT ./. .|.
35+
1 61506 rs56992752 T A 100 PASS . GT |./. /.|.

Diff for: test/vcf44_1.vcf

+11-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
##fileformat=VCFv4.4
22
##contig=<ID=1,length=1000>
3+
##reference=file://test
34
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
45
##failue="test file on explicit and implicit phasing markers in 4.4"
56
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097
@@ -21,6 +22,13 @@
2122
1 61494 rs56992752 T A 100 PASS . GT /0|0 1/0
2223
1 61495 rs56992752 T A 100 PASS . GT 0|0 |1/0
2324
1 61496 rs56992752 T A 100 PASS . GT . .
24-
1 61497 rs56992752 T A 100 PASS . GT ./1 .|1
25-
1 61498 rs56992752 T A 100 PASS . GT 1/. 1|.
26-
1 61499 rs56992752 T A 100 PASS . GT ./. .|.
25+
1 61497 rs56992752 T A 100 PASS . GT /. |.
26+
1 61498 rs56992752 T A 100 PASS . GT ./1 .|1
27+
1 61499 rs56992752 T A 100 PASS . GT /./1 |.|1
28+
1 61500 rs56992752 T A 100 PASS . GT |./1 /.|1
29+
1 61501 rs56992752 T A 100 PASS . GT 1/. 1|.
30+
1 61502 rs56992752 T A 100 PASS . GT /1/. /1|.
31+
1 61503 rs56992752 T A 100 PASS . GT |1/. |1|.
32+
1 61504 rs56992752 T A 100 PASS . GT ./. .|.
33+
1 61505 rs56992752 T A 100 PASS . GT /./. |.|.
34+
1 61506 rs56992752 T A 100 PASS . GT |./. /.|.

Diff for: vcf.c

+8-3
Original file line numberDiff line numberDiff line change
@@ -3111,7 +3111,7 @@ static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v,
31113111
uint32_t unreadable = 0;
31123112
uint32_t max = 0;
31133113
int overflow = 0, ploidy = 0, anyunphased = 0, \
3114-
phasingprfx = 0;
3114+
phasingprfx = 0, unknown1 = 0;
31153115

31163116
/* with prefixed phasing, it is explicitly given for 1st one
31173117
with non-prefixed, set based on ploidy and phasing of other
@@ -3126,6 +3126,9 @@ static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v,
31263126
ploidy++;
31273127
if (*t == '.') {
31283128
++t, x[l++] = is_phased;
3129+
if (l==1) { //for 1st allele only
3130+
unknown1 = 1;
3131+
}
31293132
} else {
31303133
const char *tt = t;
31313134
uint32_t val;
@@ -3151,9 +3154,11 @@ static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v,
31513154
/* no explicit phasing for 1st allele, set based on
31523155
other alleles and ploidy */
31533156
if (ploidy == 1) { //implicitly phased
3154-
x[0]|= 1;
3157+
if (!unknown1) {
3158+
x[0] |= 1;
3159+
}
31553160
} else { //set by other unphased alleles
3156-
x[0] |= anyunphased ? 0 : 1;
3161+
x[0] |= (anyunphased)? 0 : 1;
31573162
}
31583163
}
31593164
// Possibly check max against v->n_allele instead?

0 commit comments

Comments
 (0)