Skip to content

Commit 19a27e9

Browse files
jkbonfieldwhitwham
authored andcommitted
Speed up kputll.
The kputuw function is considerably faster as it encodes 2 digits at a time and also utilises __builtin_clz. This changes kputll to use the same 2 digits at a time trick. I have a __builtin_clzll variant too, but with longer numbers it's not the main bottleneck and we fall back to kputuw for small numbers. This avoids complicating the code with builtin checks and alternate versions. An alternative, purely for sam_format1_append would be something like: static inline int kputll_fast(long long c, kstring_t *s) { return c <= INT_MAX && c >= INT_MIN ? kputw(c, s) : kputll(c, s); } #define kputll kputll_fast This works as BAM/CRAM only support 32-bit numbers for POS, PNEXT and TLEN anyway, so ll vs w is an irrelevant distinction. However I chose to modify the header file so it fixes other callers. Overall compressed BAM to uncompressed SAM conversion is about 5% quicker (tested on 10 million short-read seqs; it'll be minimal on long seqs). This includes decode time and other functions too. The sam_format1_append only component of that is about 15-25% quicker depending on compiler and version.
1 parent 1187fa8 commit 19a27e9

File tree

2 files changed

+138
-11
lines changed

2 files changed

+138
-11
lines changed

htslib/kstring.h

+57-11
Original file line numberDiff line numberDiff line change
@@ -375,17 +375,63 @@ static inline int kputw(int c, kstring_t *s)
375375

376376
static inline int kputll(long long c, kstring_t *s)
377377
{
378-
char buf[32];
379-
int i, l = 0;
380-
unsigned long long x = c;
381-
if (c < 0) x = -x;
382-
do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
383-
if (c < 0) buf[l++] = '-';
384-
if (ks_resize(s, s->l + l + 2) < 0)
385-
return EOF;
386-
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
387-
s->s[s->l] = 0;
388-
return 0;
378+
// Worst case expansion. One check reduces function size
379+
// and aids inlining chance. Memory overhead is minimal.
380+
if (ks_resize(s, s->l + 23) < 0)
381+
return EOF;
382+
383+
unsigned long long x = c;
384+
if (c < 0) {
385+
x = -x;
386+
s->s[s->l++] = '-';
387+
}
388+
389+
if (x <= UINT32_MAX)
390+
return kputuw(x, s);
391+
392+
static const char kputull_dig2r[] =
393+
"00010203040506070809"
394+
"10111213141516171819"
395+
"20212223242526272829"
396+
"30313233343536373839"
397+
"40414243444546474849"
398+
"50515253545556575859"
399+
"60616263646566676869"
400+
"70717273747576777879"
401+
"80818283848586878889"
402+
"90919293949596979899";
403+
unsigned int l, j;
404+
char *cp;
405+
406+
// Find out how long the number is (could consider clzll)
407+
uint64_t m = 1;
408+
l = 0;
409+
if (sizeof(long long)==sizeof(uint64_t) && x >= 10000000000000000000ULL) {
410+
// avoids overflow below
411+
l = 20;
412+
} else {
413+
do {
414+
l++;
415+
m *= 10;
416+
} while (x >= m);
417+
}
418+
419+
// Add digits two at a time
420+
j = l;
421+
cp = s->s + s->l;
422+
while (x >= 10) {
423+
const char *d = &kputull_dig2r[2*(x%100)];
424+
x /= 100;
425+
memcpy(&cp[j-=2], d, 2);
426+
}
427+
428+
// Last one (if necessary). We know that x < 10 by now.
429+
if (j == 1)
430+
cp[0] = x + '0';
431+
432+
s->l += l;
433+
s->s[s->l] = 0;
434+
return 0;
389435
}
390436

391437
static inline int kputl(long c, kstring_t *s) {

test/test_kstring.c

+81
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,84 @@ static int test_kputw(int64_t start, int64_t end) {
261261
return 0;
262262
}
263263

264+
static int test_kputll_from_to(kstring_t *str, long long s, long long e) {
265+
long long i = s;
266+
267+
for (;;) {
268+
str->l = 0;
269+
memset(str->s, 0xff, str->m);
270+
if (kputll(i, str) < 0 || !str->s) {
271+
perror("kputll");
272+
return -1;
273+
}
274+
if (str->l >= str->m || str->s[str->l] != '\0') {
275+
fprintf(stderr, "No NUL termination on string from kputll\n");
276+
return -1;
277+
}
278+
if (i != strtoll(str->s, NULL, 10)) {
279+
fprintf(stderr,
280+
"kputll wrote the wrong value, expected %lld, got %s\n",
281+
i, str->s);
282+
return -1;
283+
}
284+
if (i >= e) break;
285+
i++;
286+
}
287+
return 0;
288+
}
289+
290+
static int test_kputll(long long start, long long end) {
291+
kstring_t str = { 0, 0, NULL };
292+
unsigned long long val;
293+
294+
str.s = malloc(2);
295+
if (!str.s) {
296+
perror("malloc");
297+
return -1;
298+
}
299+
str.m = 2;
300+
301+
for (val = 1; val < INT64_MAX-5; val *= 10) {
302+
if (test_kputll_from_to(&str, val >= 5 ? val - 5 : val, val) < 0) {
303+
free(ks_release(&str));
304+
return -1;
305+
}
306+
}
307+
308+
for (val = 1; val < INT64_MAX-5; val *= 10) {
309+
long long valm = -val;
310+
if (test_kputll_from_to(&str, valm >= 5 ? valm - 5 : valm, valm) < 0) {
311+
free(ks_release(&str));
312+
return -1;
313+
}
314+
}
315+
316+
if (test_kputll_from_to(&str, INT64_MAX - 5, INT64_MAX) < 0) {
317+
free(ks_release(&str));
318+
return -1;
319+
}
320+
321+
if (test_kputll_from_to(&str, INT64_MIN, INT64_MIN + 5) < 0) {
322+
free(ks_release(&str));
323+
return -1;
324+
}
325+
326+
str.m = 1; // Force a resize
327+
int64_t start2 = (int64_t)start; // no larger on our platforms
328+
int64_t end2 = (int64_t)end;
329+
clamp(&start2, INT64_MIN, INT64_MAX);
330+
clamp(&end2, INT64_MIN, INT64_MAX);
331+
332+
if (test_kputll_from_to(&str, start, end) < 0) {
333+
free(ks_release(&str));
334+
return -1;
335+
}
336+
337+
free(ks_release(&str));
338+
339+
return 0;
340+
}
341+
264342
// callback used by test_kgetline
265343
static char *mock_fgets(char *str, int num, void *p) {
266344
int *mock_state = (int*)p;
@@ -413,6 +491,9 @@ int main(int argc, char **argv) {
413491
if (!test || strcmp(test, "kputw") == 0)
414492
if (test_kputw(start, end) != 0) res = EXIT_FAILURE;
415493

494+
if (!test || strcmp(test, "kputll") == 0)
495+
if (test_kputll(start, end) != 0) res = EXIT_FAILURE;
496+
416497
if (!test || strcmp(test, "kgetline") == 0)
417498
if (test_kgetline() != 0) res = EXIT_FAILURE;
418499

0 commit comments

Comments
 (0)