Skip to content

Commit 90b3649

Browse files
committed
Merge Enable samtools index on multiple alignment files (PR samtools#1674)
2 parents c7acf84 + 1df722b commit 90b3649

File tree

6 files changed

+107
-33
lines changed

6 files changed

+107
-33
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ bam_cat.o: bam_cat.c config.h $(htslib_bgzf_h) $(htslib_sam_h) $(htslib_cram_h)
173173
bam_color.o: bam_color.c config.h $(htslib_sam_h)
174174
bam_fastq.o: bam_fastq.c config.h $(htslib_sam_h) $(htslib_klist_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(htslib_thread_pool_h) $(samtools_h) $(sam_opts_h)
175175
bam_import.o: bam_import.c config.h $(htslib_sam_h) $(htslib_thread_pool_h) $(samtools_h) $(sam_opts_h)
176-
bam_index.o: bam_index.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_h) $(samtools_h) $(sam_opts_h)
176+
bam_index.o: bam_index.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_hfile_h) $(htslib_khash_h) $(samtools_h) $(sam_opts_h)
177177
bam_lpileup.o: bam_lpileup.c config.h $(bam_plbuf_h) $(bam_lpileup_h) splaysort.h
178178
bam_mate.o: bam_mate.c config.h $(htslib_thread_pool_h) $(sam_opts_h) $(htslib_kstring_h) $(htslib_sam_h) $(samtools_h)
179179
bam_md.o: bam_md.c config.h $(htslib_faidx_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_thread_pool_h) $(sam_opts_h) $(samtools_h)

NEWS

+5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ Release a.b
44
* The dict command can now read BWA's .alt file and add AH:* tags
55
indicating reference sequences that represent alternate loci.
66

7+
* The "samtools index" command can now accept multiple alignment filenames
8+
with the new -M option, and will index each of them separately. (Specifying
9+
the output index filename via out.index or the new -o option is currently
10+
only applicable when there is only one alignment file to be indexed.)
11+
712

813
Release 1.15.1 (7th April 2022)
914
-------------------------------

bam_index.c

+61-27
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ DEALINGS IN THE SOFTWARE. */
2828

2929
#include <htslib/hts.h>
3030
#include <htslib/sam.h>
31+
#include <htslib/hfile.h>
3132
#include <htslib/khash.h>
3233
#include <stdlib.h>
3334
#include <stdio.h>
34-
#define __STDC_FORMAT_MACROS
3535
#include <inttypes.h>
3636
#include <unistd.h>
3737
#include <getopt.h>
@@ -44,63 +44,97 @@ DEALINGS IN THE SOFTWARE. */
4444
static void index_usage(FILE *fp)
4545
{
4646
fprintf(fp,
47-
"Usage: samtools index [-bc] [-m INT] <in.bam> [out.index]\n"
47+
"Usage: samtools index -M [-bc] [-m INT] <in1.bam> <in2.bam>...\n"
48+
" or: samtools index [-bc] [-m INT] <in.bam> [out.index]\n"
4849
"Options:\n"
4950
" -b Generate BAI-format index for BAM files [default]\n"
5051
" -c Generate CSI-format index for BAM files\n"
5152
" -m INT Set minimum interval size for CSI indices to 2^INT [%d]\n"
53+
" -M Interpret all filename arguments as files to be indexed\n"
54+
" -o FILE Write index to FILE [alternative to <out.index> as an argument]\n"
5255
" -@ INT Sets the number of threads [none]\n", BAM_LIDX_SHIFT);
5356
}
5457

58+
// Returns 1 if the file does not exist or can be positively
59+
// identified as an index file.
60+
static int nonexistent_or_index(const char *fn)
61+
{
62+
int ret1, ret2;
63+
htsFormat fmt;
64+
hFILE *fp = hopen(fn, "r");
65+
if (fp == NULL) return 1;
66+
67+
ret1 = hts_detect_format2(fp, fn, &fmt);
68+
ret2 = hclose(fp);
69+
if (ret1 < 0 || ret2 < 0) return 0;
70+
71+
return fmt.category == index_file;
72+
}
73+
5574
int bam_index(int argc, char *argv[])
5675
{
5776
int csi = 0;
5877
int min_shift = BAM_LIDX_SHIFT;
78+
int multiple = 0;
5979
int n_threads = 0;
60-
int c, ret;
80+
int n_files, c, i, ret;
81+
const char *fn_idx = NULL;
6182

62-
while ((c = getopt(argc, argv, "bcm:@:")) >= 0)
83+
while ((c = getopt(argc, argv, "bcm:Mo:@:")) >= 0)
6384
switch (c) {
6485
case 'b': csi = 0; break;
6586
case 'c': csi = 1; break;
6687
case 'm': csi = 1; min_shift = atoi(optarg); break;
88+
case 'M': multiple = 1; break;
89+
case 'o': fn_idx = optarg; break;
6790
case '@': n_threads = atoi(optarg); break;
6891
default:
6992
index_usage(stderr);
7093
return 1;
7194
}
7295

73-
if (optind == argc) {
74-
index_usage(stdout);
75-
return 1;
76-
}
96+
n_files = argc - optind;
7797

78-
ret = sam_index_build3(argv[optind], argv[optind+1], csi? min_shift : 0, n_threads);
79-
switch (ret) {
80-
case 0:
98+
if (n_files == 0) {
99+
index_usage(stdout);
81100
return 0;
101+
}
82102

83-
case -2:
84-
print_error_errno("index", "failed to open \"%s\"", argv[optind]);
85-
break;
103+
// Handle legacy synopsis
104+
if (n_files == 2 && !fn_idx && nonexistent_or_index(argv[optind+1])) {
105+
n_files = 1;
106+
fn_idx = argv[optind+1];
107+
}
86108

87-
case -3:
88-
print_error("index", "\"%s\" is in a format that cannot be usefully indexed", argv[optind]);
89-
break;
109+
if (n_files > 1 && !multiple) {
110+
print_error("index", "use -M to enable indexing more than one alignment file");
111+
return EXIT_FAILURE;
112+
}
90113

91-
case -4:
92-
if (argv[optind+1])
93-
print_error("index", "failed to create or write index \"%s\"", argv[optind+1]);
94-
else
95-
print_error("index", "failed to create or write index");
96-
break;
114+
if (fn_idx && n_files > 1) {
115+
// TODO In future we may allow %* placeholders or similar
116+
print_error("index", "can't use -o with multiple input alignment files");
117+
return EXIT_FAILURE;
118+
}
97119

98-
default:
99-
print_error_errno("index", "failed to create index for \"%s\"", argv[optind]);
100-
break;
120+
for (i = optind; i < optind + n_files; i++) {
121+
ret = sam_index_build3(argv[i], fn_idx, csi? min_shift : 0, n_threads);
122+
if (ret < 0) {
123+
if (ret == -2)
124+
print_error_errno("index", "failed to open \"%s\"", argv[i]);
125+
else if (ret == -3)
126+
print_error("index", "\"%s\" is in a format that cannot be usefully indexed", argv[i]);
127+
else if (ret == -4 && fn_idx)
128+
print_error("index", "failed to create or write index \"%s\"", fn_idx);
129+
else if (ret == -4)
130+
print_error("index", "failed to create or write index");
131+
else
132+
print_error_errno("index", "failed to create index for \"%s\"", argv[i]);
133+
return EXIT_FAILURE;
134+
}
101135
}
102136

103-
return EXIT_FAILURE;
137+
return EXIT_SUCCESS;
104138
}
105139

106140
/*

doc/samtools-index.1

+29-5
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,14 @@ samtools index \- indexes SAM/BAM/CRAM files
4343
.
4444
.SH SYNOPSIS
4545
.PP
46-
samtools index
46+
.B samtools index -M
47+
.RB [ -bc ]
48+
.RB [ -m
49+
.IR INT ]
50+
.I FILE FILE
51+
.RI [ FILE ...]
52+
.PP
53+
.B samtools index
4754
.RB [ -bc ]
4855
.RB [ -m
4956
.IR INT ]
@@ -52,19 +59,25 @@ samtools index
5259

5360
.SH DESCRIPTION
5461
.PP
55-
Index a coordinate-sorted BGZIP-compressed SAM, BAM or CRAM file for fast
62+
Index coordinate-sorted BGZIP-compressed SAM, BAM or CRAM files for fast
5663
random access.
5764
Note for SAM this only works if the file has been BGZF compressed first.
65+
(The first synopsis with multiple input
66+
.IR FILE s
67+
is only available with Samtools 1.16 or later.)
5868

5969
This index is needed when
6070
.I region
6171
arguments are used to limit
6272
.B samtools view
6373
and similar commands to particular regions of interest.
6474

65-
If an output filename is given, the index file will be written to
66-
.IR out.index .
67-
Otherwise, for a CRAM file
75+
When only one alignment file is being indexed, the output index filename
76+
can be specified via
77+
.B -o
78+
or as shown in the second synopsis.
79+
80+
When no output filename is specified, for a CRAM file
6881
.IR aln.cram ,
6982
index file
7083
.IB aln.cram .crai
@@ -101,6 +114,17 @@ as the fixed value used by the BAI format.
101114
.BI "-m " INT
102115
Create a CSI index, with a minimum interval size of 2^INT.
103116
.TP
117+
.B -M
118+
Interpret all filename arguments as alignment files to be indexed individually.
119+
(Without
120+
.BR -M ,
121+
filename arguments are interpreted solely as per the second synopsis.)
122+
.TP
123+
.BI "-o " FILE
124+
Write the output index to
125+
.IR FILE .
126+
(Currently may only be used when exactly one alignment file is being indexed.)
127+
.TP
104128
.BI "-@, --threads " INT
105129
Number of input/output compression threads to use in addition to main thread [0].
106130

doc/samtools.1

+2
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ samtools index
233233

234234
Index a coordinate-sorted SAM, BAM or CRAM file for fast random access.
235235
Note for SAM this only works if the file has been BGZF compressed first.
236+
(Starting from Samtools 1.16, this command can also be given several
237+
alignment filenames, which are indexed individually.)
236238

237239
This index is needed when
238240
.I region

test/test.pl

+9
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,15 @@ sub test_index
843843
test_cmd($opts,out=>'dat/test_input_1_b.X.expected',cmd=>"$$opts{bin}/samtools view${threads} -X $$opts{path}/dat/test_input_1_b.bam $$opts{tmp}/test_input_1_b.bam.bai ref2");
844844
test_cmd($opts,out=>'dat/test_input_1_ab.X.expected', ignore_pg_header => 1, cmd=>"$$opts{bin}/samtools merge${threads} -O sam - -X -cp -R ref2 $$opts{path}/dat/test_input_1_a.bam $$opts{path}/dat/test_input_1_b.bam $$opts{path}/dat/test_input_1_a.bam.bai $$opts{tmp}/test_input_1_b.bam.bai");
845845

846+
# Check -o option
847+
cmd("$$opts{bin}/samtools index${threads} -o $$opts{tmp}/test_input_1_b_opt.bam.bai $$opts{path}/dat/test_input_1_b.bam");
848+
test_cmd($opts,out=>'dat/test_input_1_b.X.expected',cmd=>"$$opts{bin}/samtools view${threads} -X $$opts{path}/dat/test_input_1_b.bam $$opts{tmp}/test_input_1_b_opt.bam.bai ref2");
849+
850+
# Check indexing multiple alignment files
851+
test_cmd($opts,out=>'dat/empty.expected',cmd=>"$$opts{bin}/samtools index${threads} $$opts{path}/dat/test_input_1_a.bam $$opts{path}/dat/test_input_1_b.bam",want_fail=>1);
852+
test_cmd($opts,out=>'dat/test_input_1_a.bam.bai.expected',cmd=>"$$opts{bin}/samtools index${threads} -M $$opts{path}/dat/test_input_1_a.bam $$opts{path}/dat/test_input_1_b.bam && cat $$opts{path}/dat/test_input_1_a.bam.bai",binary=>1);
853+
test_cmd($opts,out=>'dat/test_input_1_b.X.expected',cmd=>"$$opts{bin}/samtools view${threads} -X $$opts{path}/dat/test_input_1_b.bam $$opts{path}/dat/test_input_1_b.bam.bai ref2");
854+
846855
# Check auto-indexing
847856
cmd("$$opts{bin}/samtools view${threads} --write-index -o $$opts{path}/dat/auto_indexed.tmp.bam $$opts{path}/dat/mpileup.1.sam");
848857
test_cmd($opts,out=>"dat/auto_indexed.tmp.bam.csi", cmd=>"$$opts{bin}/samtools index${threads} -c $$opts{path}/dat/auto_indexed.tmp.bam $$opts{tmp}/auto_indexed.csi && cat $$opts{tmp}/auto_indexed.csi", binary=>1);

0 commit comments

Comments
 (0)