Skip to content

Commit dac8f29

Browse files
vasudeva8jkbonfield
authored andcommitted
Add bcf_sr_add_hreader() interface
This is like bcf_sr_add_reader but the caller supplies an existing open htsFile instead of a filename. Fixes #1862
1 parent 329e794 commit dac8f29

File tree

4 files changed

+139
-16
lines changed

4 files changed

+139
-16
lines changed

htslib/synced_bcf_reader.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/// @file htslib/synced_bcf_reader.h
22
/// Stream through multiple VCF files.
33
/*
4-
Copyright (C) 2012-2017, 2019-2024 Genome Research Ltd.
4+
Copyright (C) 2012-2017, 2019-2025 Genome Research Ltd.
55
66
Author: Petr Danecek <[email protected]>
77
@@ -233,10 +233,30 @@ void bcf_sr_destroy_threads(bcf_srs_t *files);
233233
*
234234
* See also the bcf_srs_t data structure for parameters controlling
235235
* the reader's logic.
236+
* Invokes bcf_sr_add_hreader with opened file
236237
*/
237238
HTSLIB_EXPORT
238239
int bcf_sr_add_reader(bcf_srs_t *readers, const char *fname);
239240

241+
/**
242+
* bcf_sr_add_hreader() - open new reader using htsfile
243+
* @readers: holder of the open readers
244+
* @file_ptr: htsfile already opened
245+
* @autoclose: close file along with reader or not, 1 - close, 0 - do not close
246+
* @idxname: index file name for file in @file_ptr
247+
*
248+
* Returns 1 if the call succeeded, or 0 on error.
249+
*
250+
* See also the bcf_srs_t data structure for parameters controlling
251+
* the reader's logic.
252+
* If idxname is NULL, uses file_ptr->fn to find index file.
253+
* With idxname as NULL, index file must be present along with the file with
254+
* default name
255+
*/
256+
HTSLIB_EXPORT
257+
int bcf_sr_add_hreader(bcf_srs_t *readers, htsFile *file_ptr, int autoclose,
258+
const char *idxname);
259+
240260
HTSLIB_EXPORT
241261
void bcf_sr_remove_reader(bcf_srs_t *files, int i);
242262

synced_bcf_reader.c

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* synced_bcf_reader.c -- stream through multiple VCF files.
22
3-
Copyright (C) 2012-2023 Genome Research Ltd.
3+
Copyright (C) 2012-2023, 2025 Genome Research Ltd.
44
55
Author: Petr Danecek <[email protected]>
66
@@ -69,6 +69,7 @@ typedef struct
6969
{
7070
sr_sort_t sort;
7171
int regions_overlap, targets_overlap;
72+
int *closefile; // close htsfile with sync reader close or not
7273
}
7374
aux_t;
7475

@@ -251,13 +252,32 @@ void bcf_sr_destroy_threads(bcf_srs_t *files) {
251252
int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
252253
{
253254
char fmode[5];
255+
int ret = 0;
256+
const char *idxname = NULL;
257+
254258
strcpy(fmode, "r");
255259
vcf_open_mode(fmode+1, fname, NULL);
256260
htsFile* file_ptr = hts_open(fname, fmode);
257261
if ( ! file_ptr ) {
258262
files->errnum = open_failed;
259263
return 0;
260264
}
265+
//get idx name and pass to add_hreader
266+
idxname = strstr(fname, HTS_IDX_DELIM);
267+
idxname += idxname ? sizeof(HTS_IDX_DELIM) - 1 : 0;
268+
if (!(ret = bcf_sr_add_hreader(files, file_ptr, 1, idxname))) {
269+
hts_close(file_ptr); //failed, close the file
270+
}
271+
return ret;
272+
}
273+
274+
int bcf_sr_add_hreader(bcf_srs_t *files, htsFile *file_ptr, int autoclose, const char *idxname)
275+
{
276+
aux_t *auxdata = NULL;
277+
if ( ! file_ptr ) {
278+
files->errnum = open_failed;
279+
return 0;
280+
}
261281

262282
files->has_line = (int*) realloc(files->has_line, sizeof(int)*(files->nreaders+1));
263283
files->has_line[files->nreaders] = 0;
@@ -274,7 +294,7 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
274294
BGZF *bgzf = hts_get_bgzfp(reader->file);
275295
if ( bgzf && bgzf_check_EOF(bgzf) == 0 ) {
276296
files->errnum = no_eof;
277-
hts_log_warning("No BGZF EOF marker; file '%s' may be truncated", fname);
297+
hts_log_warning("No BGZF EOF marker; file '%s' may be truncated", file_ptr->fn);
278298
}
279299
if (files->p)
280300
bgzf_thread_pool(bgzf, files->p->pool, files->p->qsize);
@@ -290,7 +310,7 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
290310
return 0;
291311
}
292312

293-
reader->tbx_idx = tbx_index_load(fname);
313+
reader->tbx_idx = tbx_index_load2(file_ptr->fn, idxname);
294314
if ( !reader->tbx_idx )
295315
{
296316
files->errnum = idx_load_failed;
@@ -309,7 +329,7 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
309329

310330
reader->header = bcf_hdr_read(reader->file);
311331

312-
reader->bcf_idx = bcf_index_load(fname);
332+
reader->bcf_idx = bcf_index_load2(file_ptr->fn, idxname);
313333
if ( !reader->bcf_idx )
314334
{
315335
files->errnum = idx_load_failed;
@@ -362,7 +382,7 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
362382
return 0;
363383
}
364384

365-
reader->fname = strdup(fname);
385+
reader->fname = strdup(file_ptr->fn);
366386
if ( files->apply_filters )
367387
reader->filter_ids = init_filters(reader->header, files->apply_filters, &reader->nfilter_ids);
368388

@@ -413,6 +433,18 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
413433
}
414434
}
415435

436+
if ((auxdata = BCF_SR_AUX(files))) {
437+
//store closure status for htsfile
438+
int *tmp = realloc(auxdata->closefile, sizeof(int) * files->nreaders);
439+
if (!tmp) {
440+
hts_log_error("Failed to allocate memory");
441+
return 0;
442+
}
443+
tmp[files->nreaders - 1] = autoclose;
444+
auxdata->closefile = tmp;
445+
}
446+
447+
416448
return 1;
417449
}
418450

@@ -426,13 +458,15 @@ bcf_srs_t *bcf_sr_init(void)
426458
return files;
427459
}
428460

429-
static void bcf_sr_destroy1(bcf_sr_t *reader)
461+
static void bcf_sr_destroy1(bcf_sr_t *reader, int closefile)
430462
{
431463
free(reader->fname);
432464
if ( reader->tbx_idx ) tbx_destroy(reader->tbx_idx);
433465
if ( reader->bcf_idx ) hts_idx_destroy(reader->bcf_idx);
434466
bcf_hdr_destroy(reader->header);
435-
hts_close(reader->file);
467+
if (closefile) {
468+
hts_close(reader->file);
469+
}
436470
if ( reader->itr ) tbx_itr_destroy(reader->itr);
437471
int j;
438472
for (j=0; j<reader->mbuffer; j++)
@@ -445,8 +479,10 @@ static void bcf_sr_destroy1(bcf_sr_t *reader)
445479
void bcf_sr_destroy(bcf_srs_t *files)
446480
{
447481
int i;
482+
int *autoclose = BCF_SR_AUX(files)->closefile;
483+
448484
for (i=0; i<files->nreaders; i++)
449-
bcf_sr_destroy1(&files->readers[i]);
485+
bcf_sr_destroy1(&files->readers[i], autoclose[i]);
450486
free(files->has_line);
451487
free(files->readers);
452488
for (i=0; i<files->n_smpl; i++) free(files->samples[i]);
@@ -456,19 +492,23 @@ void bcf_sr_destroy(bcf_srs_t *files)
456492
if (files->tmps.m) free(files->tmps.s);
457493
if (files->n_threads) bcf_sr_destroy_threads(files);
458494
bcf_sr_sort_destroy(&BCF_SR_AUX(files)->sort);
495+
free(autoclose);
459496
free(files->aux);
460497
free(files);
461498
}
462499

463500
void bcf_sr_remove_reader(bcf_srs_t *files, int i)
464501
{
465502
assert( !files->samples ); // not ready for this yet
503+
int *autoclose = BCF_SR_AUX(files)->closefile;
504+
466505
bcf_sr_sort_remove_reader(files, &BCF_SR_AUX(files)->sort, i);
467-
bcf_sr_destroy1(&files->readers[i]);
506+
bcf_sr_destroy1(&files->readers[i], autoclose[i]);
468507
if ( i+1 < files->nreaders )
469508
{
470509
memmove(&files->readers[i], &files->readers[i+1], (files->nreaders-i-1)*sizeof(bcf_sr_t));
471510
memmove(&files->has_line[i], &files->has_line[i+1], (files->nreaders-i-1)*sizeof(int));
511+
memmove(&autoclose[i], &autoclose[i+1], (files->nreaders-i-1)*sizeof(int));
472512
}
473513
files->nreaders--;
474514
}

test/test-bcf-sr.c

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (C) 2017, 2020, 2023 Genome Research Ltd.
2+
Copyright (C) 2017, 2020, 2023, 2025 Genome Research Ltd.
33
44
Author: Petr Danecek <[email protected]>
55
@@ -54,7 +54,7 @@ error(const char *format, ...)
5454
void HTS_NORETURN usage(int exit_code)
5555
{
5656
fprintf(stderr, "Usage: test-bcf-sr [OPTIONS] vcf-list.txt\n");
57-
fprintf(stderr, " test-bcf-sr [OPTIONS] -args file1.bcf [...]\n");
57+
fprintf(stderr, " test-bcf-sr [OPTIONS] --args file1.bcf [...]\n");
5858
fprintf(stderr, "Options:\n");
5959
fprintf(stderr, " --args pass filenames directly in argument list\n");
6060
fprintf(stderr, " --no-index allow streaming\n");
@@ -63,6 +63,7 @@ void HTS_NORETURN usage(int exit_code)
6363
fprintf(stderr, " -p, --pair <logic[+ref]> logic: snps,indels,both,snps+ref,indels+ref,both+ref,exact,some,all\n");
6464
fprintf(stderr, " -r, --regions <reg_list> comma-separated list of regions\n");
6565
fprintf(stderr, " -t, --targets <reg_list> comma-separated list of targets\n");
66+
fprintf(stderr, " -u, --usefptr use hfile pointer interface on reader addition\n");
6667
fprintf(stderr, "\n");
6768
exit(exit_code);
6869
}
@@ -133,13 +134,15 @@ int main(int argc, char *argv[])
133134
{"targets",required_argument,NULL,'t'},
134135
{"no-index",no_argument,NULL,1000},
135136
{"args",no_argument,NULL,1001},
137+
{"usefptr",no_argument,NULL,'u'},
136138
{NULL,0,NULL,0}
137139
};
138140

139-
int c, pair = 0, use_index = 1, use_fofn = 1;
141+
int c, pair = 0, use_index = 1, use_fofn = 1, usefptr = 0;
140142
enum htsExactFormat out_fmt = text_format; // for original pos + alleles
141143
const char *out_fn = NULL, *regions = NULL, *targets = NULL;
142-
while ((c = getopt_long(argc, argv, "o:O:p:r:t:h", loptions, NULL)) >= 0)
144+
htsFile **htsfp = NULL;
145+
while ((c = getopt_long(argc, argv, "o:O:p:r:t:hu", loptions, NULL)) >= 0)
143146
{
144147
switch (c)
145148
{
@@ -179,6 +182,9 @@ int main(int argc, char *argv[])
179182
case 1001:
180183
use_fofn = 0;
181184
break;
185+
case 'u':
186+
usefptr = 1; //use htsfile interface instead of fname i/f
187+
break;
182188
case 'h':
183189
usage(EXIT_SUCCESS);
184190
default: usage(EXIT_FAILURE);
@@ -218,8 +224,32 @@ int main(int argc, char *argv[])
218224
error("Failed to set targets\n");
219225
}
220226

221-
for (i=0; i<nvcf; i++)
222-
if ( !bcf_sr_add_reader(sr,vcfs[i]) ) error("Failed to open %s: %s\n", vcfs[i],bcf_sr_strerror(sr->errnum));
227+
if (usefptr && !(htsfp = malloc(sizeof(htsFile*) * nvcf))) {
228+
error("Failed to allocate memory\n");
229+
}
230+
231+
for (i=0; i<nvcf; i++) {
232+
if (!usefptr) {
233+
if ( !bcf_sr_add_reader(sr,vcfs[i]) ) {
234+
error("Failed to open %s: %s\n", vcfs[i],
235+
bcf_sr_strerror(sr->errnum));
236+
}
237+
} else { //use htsfile i/f
238+
if (!(htsfp[i] = hts_open(vcfs[i], "r"))) {
239+
error("Failed to open %s: %s\n", vcfs[i],
240+
bcf_sr_strerror(sr->errnum));
241+
}
242+
/*with name, index can be anywhere, named as anything
243+
w/o name it has to be along with file with default naming*/
244+
245+
const char *idxname = strstr(vcfs[i], HTS_IDX_DELIM);
246+
idxname += idxname ? sizeof(HTS_IDX_DELIM) - 1 : 0;
247+
if ( !bcf_sr_add_hreader(sr, htsfp[i], 1, idxname) ) {
248+
error("Failed to add reader %s: %s\n", vcfs[i],
249+
bcf_sr_strerror(sr->errnum));
250+
}
251+
}
252+
}
223253

224254
if (!sr->readers || sr->nreaders < 1)
225255
error("No readers set, even though one was added\n");
@@ -264,6 +294,10 @@ int main(int argc, char *argv[])
264294
free(vcfs[i]);
265295
free(vcfs);
266296
}
297+
if (usefptr) {
298+
//files are closed along with sr destroy
299+
free(htsfp);
300+
}
267301

268302
return 0;
269303
}

test/test.pl

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
run_test('test_bcf_sr_sort',$opts);
5757
run_test('test_bcf_sr_no_index',$opts);
5858
run_test('test_bcf_sr_range', $opts);
59+
run_test('test_bcf_sr_hreader', $opts);
5960
run_test('test_command',$opts,cmd=>'test-bcf-translate -',out=>'test-bcf-translate.out');
6061
run_test('test_convert_padded_header',$opts);
6162
run_test('test_rebgzip',$opts);
@@ -1345,6 +1346,34 @@ sub test_bcf_sr_range {
13451346
}
13461347
}
13471348

1349+
sub test_bcf_sr_hreader {
1350+
#uses input file from test_bcf_sr_sort / test-bcf-sr.pl
1351+
#invokes bcf sync reader with hread method
1352+
my ($opts, %args) = @_;
1353+
my $test = "test_bcf_sr_hreader";
1354+
my $fail = 0;
1355+
my $cmd = "$$opts{path}/test-bcf-sr -p all $$opts{tmp}/list.txt -o $$opts{tmp}/file.out";
1356+
my $cmd_header = "$$opts{path}/test-bcf-sr -p all $$opts{tmp}/list.txt -o $$opts{tmp}/filenew.out -u";
1357+
my $cmd_diff = "diff $$opts{tmp}/filenew.out $$opts{tmp}/file.out";
1358+
1359+
my ($ret, $out) = _cmd($cmd);
1360+
if ($ret != 0) {
1361+
failed($opts, $test, "Failed to create reference output\n");
1362+
return;
1363+
}
1364+
($ret, $out) = _cmd($cmd_header);
1365+
if ($ret != 0) {
1366+
failed($opts, $test, "Failed to create output\n");
1367+
return;
1368+
}
1369+
($ret, $out) = _cmd($cmd_diff);
1370+
if ($ret != 0) {
1371+
failed($opts, $test, "Output differs to reference output\n");
1372+
return;
1373+
}
1374+
passed($opts, $test);
1375+
}
1376+
13481377
sub test_command
13491378
{
13501379
my ($opts, %args) = @_;

0 commit comments

Comments
 (0)