@@ -1501,6 +1501,34 @@ static inline int bcf_float_is_vector_end(float f)
15011501 return u .i == bcf_float_vector_end ? 1 : 0 ;
15021502}
15031503
1504+ typedef enum bcf_version {v41 = 1 , v42 , v43 , v44 } bcf_version ;
1505+ /**
1506+ * bcf_get_version - get the version as bcf_version enumeration
1507+ * @param hdr - bcf header, to get version
1508+ * @param ipver - pointer to return version
1509+ * Returns 0 on success and -1 on failure
1510+ */
1511+ static inline int bcf_get_version (const bcf_hdr_t * hdr , bcf_version * ver )
1512+ {
1513+ const char * version = NULL ;
1514+
1515+ if (!hdr || !ver ) {
1516+ return -1 ;
1517+ }
1518+
1519+ version = bcf_hdr_get_version (hdr );
1520+ if (!strcmp ("VCFv4.1" , version )) {
1521+ * ver = v41 ;
1522+ } else if (!strcmp ("VCFv4.2" , version )) {
1523+ * ver = v42 ;
1524+ } else if (!strcmp ("VCFv4.3" , version )) {
1525+ * ver = v43 ;
1526+ } else {
1527+ * ver = v44 ;
1528+ }
1529+ return 0 ;
1530+ }
1531+
15041532static inline int bcf_format_gt (bcf_fmt_t * fmt , int isample , kstring_t * str )
15051533{
15061534 uint32_t e = 0 ;
@@ -1528,6 +1556,86 @@ static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str)
15281556 return e == 0 ? 0 : -1 ;
15291557}
15301558
1559+ /**
1560+ * bcf_format_gt1 - formats GT information on a string
1561+ * @param hdr - bcf header, to get version
1562+ * @param fmt - pointer to bcf format data
1563+ * @param isample - position of interested sample in data
1564+ * @param str - pointer to output string
1565+ * Returns 0 on success and -1 on failure
1566+ * This method is extended from bcf_format_gt to output phasing information
1567+ * in accordance with v4.4 format, which supports explicit / prefixed phasing
1568+ * for 1st allele.
1569+ * Explicit / prefixed phasing for 1st allele is used only when it is a must to
1570+ * correctly express phasing.
1571+ */
1572+ static inline int bcf_format_gt1 (const bcf_hdr_t * hdr , bcf_fmt_t * fmt , int isample , kstring_t * str )
1573+ {
1574+ uint32_t e = 0 ;
1575+ bcf_version ver = v42 ;
1576+ int ploidy = 1 , anyunphased = 0 ;
1577+ int32_t val0 = 0 ;
1578+ kstring_t tmp1 = KS_INITIALIZE , tmp2 = KS_INITIALIZE ;
1579+
1580+ if (bcf_get_version (hdr , & ver )) {
1581+ hts_log_error ("Failed to get version information" );
1582+ return -1 ;
1583+ }
1584+ #define BRANCH (type_t , convert , missing , vector_end ) { \
1585+ uint8_t *ptr = fmt->p + isample*fmt->size; \
1586+ int i; \
1587+ for (i=0; i<fmt->n; i++, ptr += sizeof(type_t)) \
1588+ { \
1589+ type_t val = convert(ptr); \
1590+ if ( val == vector_end ) break; \
1591+ if (!i) { val0 = val; } \
1592+ if (i) { \
1593+ e |= kputc("/|"[val & 1], &tmp1) < 0; \
1594+ anyunphased |= !(val & 1); \
1595+ } \
1596+ if (!(val >> 1)) e |= kputc('.', &tmp1) < 0; \
1597+ else e |= kputw((val >> 1) - 1, &tmp1) < 0; \
1598+ } \
1599+ if (i == 0) e |= kputc('.', &tmp1) < 0; \
1600+ ploidy = i; \
1601+ }
1602+ switch (fmt -> type ) {
1603+ case BCF_BT_INT8 : BRANCH (int8_t , le_to_i8 , bcf_int8_missing , bcf_int8_vector_end ); break ;
1604+ case BCF_BT_INT16 : BRANCH (int16_t , le_to_i16 , bcf_int16_missing , bcf_int16_vector_end ); break ;
1605+ case BCF_BT_INT32 : BRANCH (int32_t , le_to_i32 , bcf_int32_missing , bcf_int32_vector_end ); break ;
1606+ case BCF_BT_NULL : e |= kputc ('.' , & tmp1 ) < 0 ; break ;
1607+ default : hts_log_error ("Unexpected type %d" , fmt -> type ); return -2 ;
1608+ }
1609+ #undef BRANCH
1610+
1611+ if (ver >= v44 ) { //output which supports prefixed phasing
1612+ /* update 1st allele's phasing if required and append rest to it.
1613+ use prefixed phasing only when it is a must. i.e. without which the
1614+ inferred value will be incorrect */
1615+ if (val0 & 1 ) {
1616+ /* 1st one is phased, if ploidy is > 1 and an unphased allele exists
1617+ need to specify explicitly */
1618+ e |= (ploidy > 1 && anyunphased ) ?
1619+ (kputc ('|' , & tmp2 ) < 0 ) :
1620+ 0 ;
1621+ } else {
1622+ /* 1st allele is unphased, if ploidy is = 1 or allele is '.' or
1623+ ploidy > 1 and no other unphased allele exist, need to specify
1624+ explicitly */
1625+ e |= ((ploidy <= 1 ) || (ploidy > 1 && !anyunphased )) ?
1626+ (kputc ('/' , & tmp2 ) < 0 ) :
1627+ 0 ;
1628+ }
1629+ e |= kputsn (tmp1 .s , tmp1 .l , & tmp2 ) < 0 ; //append rest with updated one
1630+ ks_free (& tmp1 );
1631+ tmp1 = tmp2 ;
1632+ }
1633+ //updated v44 string or <v44 without any update
1634+ e |= kputsn (tmp1 .s , tmp1 .l , str ) < 0 ;
1635+ ks_free (& tmp1 );
1636+ return e == 0 ? 0 : -1 ;
1637+ }
1638+
15311639static inline int bcf_enc_size (kstring_t * s , int size , int type )
15321640{
15331641 // Most common case is first
0 commit comments