|
2 | 2 | /// High-level VCF/BCF variant calling file operations.
|
3 | 3 | /*
|
4 | 4 | Copyright (C) 2012, 2013 Broad Institute.
|
5 |
| - Copyright (C) 2012-2020, 2022-2023 Genome Research Ltd. |
| 5 | + Copyright (C) 2012-2020, 2022-2025 Genome Research Ltd. |
6 | 6 |
|
7 | 7 | Author: Heng Li <[email protected]>
|
8 | 8 |
|
@@ -1501,141 +1501,25 @@ static inline int bcf_float_is_vector_end(float f)
|
1501 | 1501 | return u.i==bcf_float_vector_end ? 1 : 0;
|
1502 | 1502 | }
|
1503 | 1503 |
|
1504 |
| -typedef enum bcf_version {v41 = 1, v42, v43, v44} bcf_version; |
1505 |
| -/** |
1506 |
| - * bcf_get_version - get the version as bcf_version enumeration |
1507 |
| - * @param hdr - bcf header, to get version |
1508 |
| - * @param ipver - pointer to return version |
1509 |
| - * Returns 0 on success and -1 on failure |
1510 |
| - */ |
1511 |
| -static inline int bcf_get_version(const bcf_hdr_t *hdr, bcf_version *ver) |
1512 |
| -{ |
1513 |
| - const char *version = NULL; |
1514 |
| - |
1515 |
| - if (!hdr || !ver) { |
1516 |
| - return -1; |
1517 |
| - } |
1518 |
| - |
1519 |
| - version = bcf_hdr_get_version(hdr); |
1520 |
| - if (!strcmp("VCFv4.1", version)) { |
1521 |
| - *ver = v41; |
1522 |
| - } else if (!strcmp("VCFv4.2", version)) { |
1523 |
| - *ver = v42; |
1524 |
| - } else if (!strcmp("VCFv4.3", version)) { |
1525 |
| - *ver = v43; |
1526 |
| - } else { |
1527 |
| - *ver = v44; |
1528 |
| - } |
1529 |
| - return 0; |
1530 |
| -} |
1531 |
| - |
1532 |
| -static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str) |
1533 |
| -{ |
1534 |
| - uint32_t e = 0; |
1535 |
| - #define BRANCH(type_t, convert, missing, vector_end) { \ |
1536 |
| - uint8_t *ptr = fmt->p + isample*fmt->size; \ |
1537 |
| - int i; \ |
1538 |
| - for (i=0; i<fmt->n; i++, ptr += sizeof(type_t)) \ |
1539 |
| - { \ |
1540 |
| - type_t val = convert(ptr); \ |
1541 |
| - if ( val == vector_end ) break; \ |
1542 |
| - if ( i ) e |= kputc("/|"[val&1], str) < 0; \ |
1543 |
| - if ( !(val>>1) ) e |= kputc('.', str) < 0; \ |
1544 |
| - else e |= kputw((val>>1) - 1, str) < 0; \ |
1545 |
| - } \ |
1546 |
| - if (i == 0) e |= kputc('.', str) < 0; \ |
1547 |
| - } |
1548 |
| - switch (fmt->type) { |
1549 |
| - case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break; |
1550 |
| - case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break; |
1551 |
| - case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break; |
1552 |
| - case BCF_BT_NULL: e |= kputc('.', str) < 0; break; |
1553 |
| - default: hts_log_error("Unexpected type %d", fmt->type); return -2; |
1554 |
| - } |
1555 |
| - #undef BRANCH |
1556 |
| - return e == 0 ? 0 : -1; |
1557 |
| -} |
1558 | 1504 |
|
1559 | 1505 | /**
|
1560 |
| - * bcf_format_gt1 - formats GT information on a string |
| 1506 | + * bcf_format_gt_v2 - formats GT information on a string |
1561 | 1507 | * @param hdr - bcf header, to get version
|
1562 | 1508 | * @param fmt - pointer to bcf format data
|
1563 | 1509 | * @param isample - position of interested sample in data
|
1564 | 1510 | * @param str - pointer to output string
|
1565 | 1511 | * Returns 0 on success and -1 on failure
|
1566 |
| - * This method is extended from bcf_format_gt to output phasing information |
1567 |
| - * in accordance with v4.4 format, which supports explicit / prefixed phasing |
1568 |
| - * for 1st allele. |
1569 |
| - * Explicit / prefixed phasing for 1st allele is used only when it is a must to |
1570 |
| - * correctly express phasing. |
| 1512 | + * This method is preferred over bcf_format_gt as this supports vcf4.4 and |
| 1513 | + * prefixed phasing. Explicit / prefixed phasing for 1st allele is used only |
| 1514 | + * when it is a must to correctly express phasing. |
1571 | 1515 | */
|
1572 |
| -static inline int bcf_format_gt1(const bcf_hdr_t *hdr, bcf_fmt_t *fmt, int isample, kstring_t *str) |
| 1516 | +HTSLIB_EXPORT |
| 1517 | +int bcf_format_gt_v2(const bcf_hdr_t *hdr, bcf_fmt_t *fmt, int isample, |
| 1518 | + kstring_t *str) HTS_RESULT_USED; |
| 1519 | + |
| 1520 | +static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str) |
1573 | 1521 | {
|
1574 |
| - uint32_t e = 0; |
1575 |
| - bcf_version ver = v42; |
1576 |
| - int ploidy = 1, anyunphased = 0; |
1577 |
| - int32_t val0 = 0; |
1578 |
| - kstring_t tmp1 = KS_INITIALIZE, tmp2 = KS_INITIALIZE; |
1579 |
| - |
1580 |
| - if (bcf_get_version(hdr, &ver)) { |
1581 |
| - hts_log_error("Failed to get version information"); |
1582 |
| - return -1; |
1583 |
| - } |
1584 |
| - #define BRANCH(type_t, convert, missing, vector_end) { \ |
1585 |
| - uint8_t *ptr = fmt->p + isample*fmt->size; \ |
1586 |
| - int i; \ |
1587 |
| - for (i=0; i<fmt->n; i++, ptr += sizeof(type_t)) \ |
1588 |
| - { \ |
1589 |
| - type_t val = convert(ptr); \ |
1590 |
| - if ( val == vector_end ) break; \ |
1591 |
| - if (!i) { val0 = val; } \ |
1592 |
| - if (i) { \ |
1593 |
| - e |= kputc("/|"[val & 1], &tmp1) < 0; \ |
1594 |
| - anyunphased |= !(val & 1); \ |
1595 |
| - } \ |
1596 |
| - if (!(val >> 1)) e |= kputc('.', &tmp1) < 0; \ |
1597 |
| - else e |= kputw((val >> 1) - 1, &tmp1) < 0; \ |
1598 |
| - } \ |
1599 |
| - if (i == 0) e |= kputc('.', &tmp1) < 0; \ |
1600 |
| - ploidy = i; \ |
1601 |
| - } |
1602 |
| - switch (fmt->type) { |
1603 |
| - case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break; |
1604 |
| - case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break; |
1605 |
| - case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break; |
1606 |
| - case BCF_BT_NULL: e |= kputc('.', &tmp1) < 0; break; |
1607 |
| - default: hts_log_error("Unexpected type %d", fmt->type); return -2; |
1608 |
| - } |
1609 |
| - #undef BRANCH |
1610 |
| - |
1611 |
| - if (ver >= v44) { //output which supports prefixed phasing |
1612 |
| - /* update 1st allele's phasing if required and append rest to it. |
1613 |
| - use prefixed phasing only when it is a must. i.e. without which the |
1614 |
| - inferred value will be incorrect */ |
1615 |
| - if (val0 & 1) { |
1616 |
| - /* 1st one is phased, if ploidy is > 1 and an unphased allele exists |
1617 |
| - need to specify explicitly */ |
1618 |
| - e |= (ploidy > 1 && anyunphased) ? |
1619 |
| - (kputc('|', &tmp2) < 0) : |
1620 |
| - (ploidy <= 1 && !((val0 >> 1)) ? //|. needs explicit o/p |
1621 |
| - (kputc('|', &tmp2) < 0) : |
1622 |
| - 0); |
1623 |
| - } else { |
1624 |
| - /* 1st allele is unphased, if ploidy is = 1 or allele is '.' or |
1625 |
| - ploidy > 1 and no other unphased allele exist, need to specify |
1626 |
| - explicitly */ |
1627 |
| - e |= ((ploidy <= 1 && val0 != 0) || (ploidy > 1 && !anyunphased)) ? |
1628 |
| - (kputc('/', &tmp2) < 0) : |
1629 |
| - 0; |
1630 |
| - } |
1631 |
| - e |= kputsn(tmp1.s, tmp1.l, &tmp2) < 0; //append rest with updated one |
1632 |
| - ks_free(&tmp1); |
1633 |
| - tmp1 = tmp2; |
1634 |
| - } |
1635 |
| - //updated v44 string or <v44 without any update |
1636 |
| - e |= kputsn(tmp1.s, tmp1.l, str) < 0; |
1637 |
| - ks_free(&tmp1); |
1638 |
| - return e == 0 ? 0 : -1; |
| 1522 | + return bcf_format_gt_v2(NULL, fmt, isample, str); |
1639 | 1523 | }
|
1640 | 1524 |
|
1641 | 1525 | static inline int bcf_enc_size(kstring_t *s, int size, int type)
|
|
0 commit comments