Skip to content

Commit b7f3eb5

Browse files
committed
review updates
1 parent 6a0fb00 commit b7f3eb5

File tree

4 files changed

+351
-141
lines changed

4 files changed

+351
-141
lines changed

htslib/kstring.h

+52-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/* The MIT License
22
33
Copyright (C) 2011 by Attractive Chaos <[email protected]>
4-
Copyright (C) 2013-2014, 2016, 2018-2020, 2022, 2024 Genome Research Ltd.
4+
Copyright (C) 2013-2014, 2016, 2018-2020, 2022, 2024-2025 Genome Research Ltd.
55
66
Permission is hereby granted, free of charge, to any person obtaining
77
a copy of this software and associated documentation files (the
@@ -449,6 +449,57 @@ static inline int *ksplit(kstring_t *s, int delimiter, int *n)
449449
return offsets;
450450
}
451451

452+
/**
453+
* kinsert_char - inserts a char to kstring
454+
* @param c - char to insert
455+
* @param pos - position at which to insert, starting from 0
456+
* @param s - pointer to output string
457+
* Returns 0 on success and -1 on failure
458+
* 0 for pos inserts at start and length of current string as pos appends at
459+
* the end.
460+
*/
461+
static inline int kinsert_char(char c, size_t pos, kstring_t *s)
462+
{
463+
if (!s || pos < 0 || pos > s->l) {
464+
return EOF;
465+
}
466+
if (ks_resize(s, s->l + 2) < 0) {
467+
return EOF;
468+
}
469+
memmove(s->s + pos + 1, s->s + pos, s->l - pos);
470+
s->s[pos] = c;
471+
++s->l;
472+
return 0;
473+
}
474+
475+
/**
476+
* kinsert_str - inserts a null terminated string to kstring
477+
* @param str - string to insert
478+
* @param pos - position at which to insert, starting from 0
479+
* @param s - pointer to output string
480+
* Returns 0 on success and -1 on failure
481+
* 0 for pos inserts at start and length of current string as pos appends at
482+
* the end. empty string makes no update.
483+
*/
484+
static inline int kinsert_str(const char *str, size_t pos, kstring_t *s)
485+
{
486+
size_t len = 0;
487+
if (!s || pos < 0 || pos > s->l || !str) {
488+
return EOF;
489+
}
490+
if (!(len = strlen(str))) {
491+
return 0;
492+
}
493+
if (ks_resize(s, s->l + len + 1) < 0) {
494+
return EOF;
495+
}
496+
memmove(s->s + pos + len, s->s + pos, s->l - pos);
497+
memcpy(s->s + pos, str, len);
498+
s->l += len;
499+
s->s[s->l] = '\0';
500+
return 0;
501+
}
502+
452503
#ifdef HTSLIB_SSIZE_T
453504
#undef HTSLIB_SSIZE_T
454505
#undef ssize_t

htslib/vcf.h

+11-127
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
/// High-level VCF/BCF variant calling file operations.
33
/*
44
Copyright (C) 2012, 2013 Broad Institute.
5-
Copyright (C) 2012-2020, 2022-2023 Genome Research Ltd.
5+
Copyright (C) 2012-2020, 2022-2025 Genome Research Ltd.
66
77
Author: Heng Li <[email protected]>
88
@@ -1501,141 +1501,25 @@ static inline int bcf_float_is_vector_end(float f)
15011501
return u.i==bcf_float_vector_end ? 1 : 0;
15021502
}
15031503

1504-
typedef enum bcf_version {v41 = 1, v42, v43, v44} bcf_version;
1505-
/**
1506-
* bcf_get_version - get the version as bcf_version enumeration
1507-
* @param hdr - bcf header, to get version
1508-
* @param ipver - pointer to return version
1509-
* Returns 0 on success and -1 on failure
1510-
*/
1511-
static inline int bcf_get_version(const bcf_hdr_t *hdr, bcf_version *ver)
1512-
{
1513-
const char *version = NULL;
1514-
1515-
if (!hdr || !ver) {
1516-
return -1;
1517-
}
1518-
1519-
version = bcf_hdr_get_version(hdr);
1520-
if (!strcmp("VCFv4.1", version)) {
1521-
*ver = v41;
1522-
} else if (!strcmp("VCFv4.2", version)) {
1523-
*ver = v42;
1524-
} else if (!strcmp("VCFv4.3", version)) {
1525-
*ver = v43;
1526-
} else {
1527-
*ver = v44;
1528-
}
1529-
return 0;
1530-
}
1531-
1532-
static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str)
1533-
{
1534-
uint32_t e = 0;
1535-
#define BRANCH(type_t, convert, missing, vector_end) { \
1536-
uint8_t *ptr = fmt->p + isample*fmt->size; \
1537-
int i; \
1538-
for (i=0; i<fmt->n; i++, ptr += sizeof(type_t)) \
1539-
{ \
1540-
type_t val = convert(ptr); \
1541-
if ( val == vector_end ) break; \
1542-
if ( i ) e |= kputc("/|"[val&1], str) < 0; \
1543-
if ( !(val>>1) ) e |= kputc('.', str) < 0; \
1544-
else e |= kputw((val>>1) - 1, str) < 0; \
1545-
} \
1546-
if (i == 0) e |= kputc('.', str) < 0; \
1547-
}
1548-
switch (fmt->type) {
1549-
case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break;
1550-
case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break;
1551-
case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break;
1552-
case BCF_BT_NULL: e |= kputc('.', str) < 0; break;
1553-
default: hts_log_error("Unexpected type %d", fmt->type); return -2;
1554-
}
1555-
#undef BRANCH
1556-
return e == 0 ? 0 : -1;
1557-
}
15581504

15591505
/**
1560-
* bcf_format_gt1 - formats GT information on a string
1506+
* bcf_format_gt_v2 - formats GT information on a string
15611507
* @param hdr - bcf header, to get version
15621508
* @param fmt - pointer to bcf format data
15631509
* @param isample - position of interested sample in data
15641510
* @param str - pointer to output string
15651511
* Returns 0 on success and -1 on failure
1566-
* This method is extended from bcf_format_gt to output phasing information
1567-
* in accordance with v4.4 format, which supports explicit / prefixed phasing
1568-
* for 1st allele.
1569-
* Explicit / prefixed phasing for 1st allele is used only when it is a must to
1570-
* correctly express phasing.
1512+
* This method is preferred over bcf_format_gt as this supports vcf4.4 and
1513+
* prefixed phasing. Explicit / prefixed phasing for 1st allele is used only
1514+
* when it is a must to correctly express phasing.
15711515
*/
1572-
static inline int bcf_format_gt1(const bcf_hdr_t *hdr, bcf_fmt_t *fmt, int isample, kstring_t *str)
1516+
HTSLIB_EXPORT
1517+
int bcf_format_gt_v2(const bcf_hdr_t *hdr, bcf_fmt_t *fmt, int isample,
1518+
kstring_t *str) HTS_RESULT_USED;
1519+
1520+
static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str)
15731521
{
1574-
uint32_t e = 0;
1575-
bcf_version ver = v42;
1576-
int ploidy = 1, anyunphased = 0;
1577-
int32_t val0 = 0;
1578-
kstring_t tmp1 = KS_INITIALIZE, tmp2 = KS_INITIALIZE;
1579-
1580-
if (bcf_get_version(hdr, &ver)) {
1581-
hts_log_error("Failed to get version information");
1582-
return -1;
1583-
}
1584-
#define BRANCH(type_t, convert, missing, vector_end) { \
1585-
uint8_t *ptr = fmt->p + isample*fmt->size; \
1586-
int i; \
1587-
for (i=0; i<fmt->n; i++, ptr += sizeof(type_t)) \
1588-
{ \
1589-
type_t val = convert(ptr); \
1590-
if ( val == vector_end ) break; \
1591-
if (!i) { val0 = val; } \
1592-
if (i) { \
1593-
e |= kputc("/|"[val & 1], &tmp1) < 0; \
1594-
anyunphased |= !(val & 1); \
1595-
} \
1596-
if (!(val >> 1)) e |= kputc('.', &tmp1) < 0; \
1597-
else e |= kputw((val >> 1) - 1, &tmp1) < 0; \
1598-
} \
1599-
if (i == 0) e |= kputc('.', &tmp1) < 0; \
1600-
ploidy = i; \
1601-
}
1602-
switch (fmt->type) {
1603-
case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break;
1604-
case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break;
1605-
case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break;
1606-
case BCF_BT_NULL: e |= kputc('.', &tmp1) < 0; break;
1607-
default: hts_log_error("Unexpected type %d", fmt->type); return -2;
1608-
}
1609-
#undef BRANCH
1610-
1611-
if (ver >= v44) { //output which supports prefixed phasing
1612-
/* update 1st allele's phasing if required and append rest to it.
1613-
use prefixed phasing only when it is a must. i.e. without which the
1614-
inferred value will be incorrect */
1615-
if (val0 & 1) {
1616-
/* 1st one is phased, if ploidy is > 1 and an unphased allele exists
1617-
need to specify explicitly */
1618-
e |= (ploidy > 1 && anyunphased) ?
1619-
(kputc('|', &tmp2) < 0) :
1620-
(ploidy <= 1 && !((val0 >> 1)) ? //|. needs explicit o/p
1621-
(kputc('|', &tmp2) < 0) :
1622-
0);
1623-
} else {
1624-
/* 1st allele is unphased, if ploidy is = 1 or allele is '.' or
1625-
ploidy > 1 and no other unphased allele exist, need to specify
1626-
explicitly */
1627-
e |= ((ploidy <= 1 && val0 != 0) || (ploidy > 1 && !anyunphased)) ?
1628-
(kputc('/', &tmp2) < 0) :
1629-
0;
1630-
}
1631-
e |= kputsn(tmp1.s, tmp1.l, &tmp2) < 0; //append rest with updated one
1632-
ks_free(&tmp1);
1633-
tmp1 = tmp2;
1634-
}
1635-
//updated v44 string or <v44 without any update
1636-
e |= kputsn(tmp1.s, tmp1.l, str) < 0;
1637-
ks_free(&tmp1);
1638-
return e == 0 ? 0 : -1;
1522+
return bcf_format_gt_v2(NULL, fmt, isample, str);
16391523
}
16401524

16411525
static inline int bcf_enc_size(kstring_t *s, int size, int type)

test/test_kstring.c

+124-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* test_kstring.c -- kstring unit tests
22
3-
Copyright (C) 2018, 2020, 2024 Genome Research Ltd.
3+
Copyright (C) 2018, 2020, 2024-2025 Genome Research Ltd.
44
55
Author: Rob Davies <[email protected]>
66
@@ -451,6 +451,123 @@ static int test_kgetline2(void) {
451451
return EXIT_SUCCESS;
452452
}
453453

454+
static int test_kinsertchar(void) {
455+
kstring_t t = KS_INITIALIZE, res = KS_INITIALIZE;
456+
int i = 0;
457+
struct data {
458+
int pos;
459+
const char *val;
460+
};
461+
462+
struct data tdata[] = { { -1, ""}, {0, "X0123"}, {1, "0X123"}, {2, "01X23"},
463+
{3, "012X3"}, {4, "0123X"}, {5, ""} };
464+
465+
for (i = -1; i < 6; ++i) {
466+
kstring_t s = KS_INITIALIZE;
467+
kputs("0123", &s);
468+
if (kinsert_char('X', i, &s) < 0) {
469+
if ( i < 0 || i > 4) { ks_free(&s); continue; } //expected failures
470+
fprintf(stderr, "kinsert_char failed\n");
471+
return -1;
472+
}
473+
if (s.s[s.l] != '\0') {
474+
fprintf(stderr, "No NUL termination on string from kinsert_char\n");
475+
return -1;
476+
}
477+
if (memcmp(s.s, tdata[i + 1].val, s.l + 1)) {
478+
fprintf(stderr, "kinsert_char comparison failed\n");
479+
return -1;
480+
}
481+
ks_free(&s);
482+
}
483+
//realloc checks
484+
for (i = 0; i < 7; ++i) {
485+
kputc('A' + i, &res);
486+
if (kinsert_char('A' + i, t.l, &t) < 0) {
487+
fprintf(stderr, "kinsert_char failed\n");
488+
return -1;
489+
}
490+
if (t.s[t.l] != '\0') {
491+
fprintf(stderr, "No NUL termination on string from kinsert_char\n");
492+
return -1;
493+
}
494+
if (memcmp(t.s, res.s, res.l+1)) {
495+
fprintf(stderr, "kinsert_char realloc comparison failed\n");
496+
return -1;
497+
}
498+
}
499+
ks_free(&t);
500+
ks_free(&res);
501+
return 0;
502+
}
503+
504+
static int test_kinsertstr(void) {
505+
kstring_t t = KS_INITIALIZE, res = KS_INITIALIZE;
506+
int i = 0;
507+
struct data {
508+
int pos;
509+
const char *val;
510+
};
511+
512+
struct data tdata[] = { { -1, ""}, {0, "XYZ0123"}, {1, "0XYZ123"},
513+
{2, "01XYZ23"}, {3, "012XYZ3"}, {4, "0123XYZ"}, {5, ""} };
514+
515+
for (i = -1; i < 6; ++i) {
516+
kstring_t s = KS_INITIALIZE;
517+
kputs("0123", &s);
518+
if (kinsert_str("XYZ", i, &s) < 0) {
519+
if ( i < 0 || i > 4) { ks_free(&s); continue; } //expected failures
520+
fprintf(stderr, "kinsert_str failed\n");
521+
return -1;
522+
}
523+
if (s.s[s.l] != '\0') {
524+
fprintf(stderr, "No NUL termination on string from kinsert_str\n");
525+
return -1;
526+
}
527+
if (memcmp(s.s, tdata[i + 1].val, s.l + 1)) {
528+
fprintf(stderr, "kinsert_str comparison failed\n");
529+
return -1;
530+
}
531+
ks_free(&s);
532+
}
533+
//realloc checks
534+
for (i = 0; i < 15; ++i) {
535+
kstring_t val = KS_INITIALIZE;
536+
ksprintf(&val, "%c", 'A' + i);
537+
kputs(val.s, &res);
538+
if (kinsert_str(val.s, t.l, &t) < 0) {
539+
fprintf(stderr, "kinsert_str failed\n");
540+
return -1;
541+
}
542+
if (t.s[t.l] != '\0') {
543+
fprintf(stderr, "No NUL termination on string from kinsert_str\n");
544+
return -1;
545+
}
546+
if (memcmp(t.s, res.s, res.l+1)) {
547+
fprintf(stderr, "kinsert_str realloc comparison failed\n");
548+
return -1;
549+
}
550+
ks_free(&val);
551+
}
552+
//empty strings
553+
ks_free(&t);
554+
if (kinsert_str("", 1, &t)) { //expected
555+
if (kinsert_str("", 0, &t) || t.l != 0) {
556+
fprintf(stderr, "kinsert_str empty insertion failed\n");
557+
return -1;
558+
}
559+
} else {
560+
fprintf(stderr, "kinsert_str empty ins to invalid pos succeeded\n");
561+
return -1;
562+
}
563+
i = res.l;
564+
if (kinsert_str("", 1, &res) || i != res.l) {
565+
fprintf(stderr, "kinsert_str empty ins to valid pos failed\n");
566+
return -1;
567+
}
568+
return 0;
569+
}
570+
454571
int main(int argc, char **argv) {
455572
int opt, res = EXIT_SUCCESS;
456573
int64_t start = 0;
@@ -500,5 +617,11 @@ int main(int argc, char **argv) {
500617
if (!test || strcmp(test, "kgetline2") == 0)
501618
if (test_kgetline2() != 0) res = EXIT_FAILURE;
502619

620+
if (!test || strcmp(test, "kinsertchar") == 0)
621+
if (test_kinsertchar() != 0) res = EXIT_FAILURE;
622+
623+
if (!test || strcmp(test, "kinsertstr") == 0)
624+
if (test_kinsertstr() != 0) res = EXIT_FAILURE;
625+
503626
return res;
504627
}

0 commit comments

Comments
 (0)