Skip to content

Commit 872d506

Browse files
authored
Merge pull request #2 from samtools/develop
Merge upstream changes
2 parents c71d3d6 + 44192ea commit 872d506

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+2310
-425
lines changed

INSTALL

Lines changed: 60 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,32 @@
11
System Requirements
22
===================
33

4-
BCFtools and HTSlib depend on the zlib library <http://zlib.net>, the bzip2
5-
library <http://bzip.org/> and liblzma <http://tukaani.org/xz/>. Building
6-
them requires development files to be installed on the build machine;
4+
BCFtools and HTSlib depend on the following libraries:
5+
6+
BCFtools:
7+
zlib <http://zlib.net>
8+
gsl <https://www.gnu.org/software/gsl/>
9+
(optional, for the 'polysomy' command)
10+
libperl <http://www.perl.org/>
11+
(optional, to support filters using perl syntax)
12+
13+
HTSlib:
14+
zlib <http://zlib.net>
15+
libbz2 <http://bzip.org/>
16+
liblzma <http://tukaani.org/xz/>
17+
libcurl <https://curl.haxx.se/>
18+
(optional but strongly recommended, for network access)
19+
libcrypto <https://www.openssl.org/>
20+
(optional, for Amazon S3 support; not needed on MacOS)
21+
22+
Building them requires development files to be installed on the build machine;
723
note that some Linux distributions package these separately from the library
8-
itself (see below).
24+
itself. See the "System Specific Details" below for guidance on how to install
25+
these on a variety of systems.
926

1027
The bzip2 and liblzma dependencies can be removed if full CRAM support
1128
is not needed - see HTSlib's INSTALL file for details.
1229

13-
Packages for dpkg-based Linux distributions (Debian / Ubuntu) are:
14-
15-
zlib1g-dev
16-
libbz2-dev
17-
liblzma-dev
18-
19-
Packages for rpm or yum-based Linux distributions (RedHat / Fedora / CentOS)
20-
are:
21-
22-
zlib-devel
23-
bzip2-devel
24-
xz-devel
25-
2630
To build BCFtools, you will need:
2731

2832
GNU make
@@ -85,12 +89,6 @@ sophisticated filtering. This option can be enabled by supplying the
8589

8690
./configure --enable-perl-filters
8791

88-
Note that enabling this option changes the license from MIT to GPL
89-
because bcftools need to be built with
90-
91-
perl -MExtUtils::Embed -e ccopts -e ldopts
92-
93-
9492
Optional Compilation with GSL
9593
=============================
9694

@@ -136,3 +134,42 @@ The bgzip and tabix utilities are provided by HTSlib. If you have not also
136134
installed HTSlib separately, you may wish to install these utilities by hand
137135
by copying bcftools-1.x/htslib-1.x/{bgzip,tabix} to the same bin directory
138136
to which you have installed bcftools et al.
137+
138+
139+
System Specific Details
140+
=======================
141+
142+
Installing the prerequisites is system dependent and there is more
143+
than one correct way of satisfying these, including downloading them
144+
from source, compiling and installing them yourself.
145+
146+
For people with super-user access, we provide an example set of commands
147+
below for installing the dependencies on a variety of operating system
148+
distributions. Note these are not specific recommendations on distribution,
149+
compiler or SSL implementation. It is assumed you already have the core set
150+
of packages for the given distribution - the lists may be incomplete if
151+
this is not the case.
152+
153+
Debian / Ubuntu
154+
---------------
155+
156+
sudo apt-get update # Ensure the package list is up to date
157+
sudo apt-get install autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libperl-dev libgsl0-dev
158+
159+
Note: libcurl4-openssl-dev can be used as an alternative to libcurl4-gnutls-dev.
160+
161+
RedHat / CentOS
162+
---------------
163+
164+
sudo yum install autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel gsl-devel perl-ExtUtils-Embed
165+
166+
Alpine Linux
167+
------------
168+
169+
sudo apk update # Ensure the package list is up to date
170+
sudo apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev libressl-dev gsl-dev perl-dev
171+
172+
OpenSUSE
173+
--------
174+
175+
sudo zypper install autoconf automake make gcc perl zlib-devel libbz2-devel xz-devel libcurl-devel libopenssl-devel gsl-devel

Makefile

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ endif
9494

9595
include config.mk
9696

97-
PACKAGE_VERSION = 1.8
97+
PACKAGE_VERSION = 1.9
9898

9999
# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
100100
# description of the working tree: either a release tag with the same value
@@ -176,65 +176,69 @@ endif # PLUGINS_ENABLED
176176
plugins: $(PLUGINS)
177177

178178
bcftools_h = bcftools.h $(htslib_hts_defs_h) $(htslib_vcf_h)
179-
bin_h = bin.h $(htslib_hts_h)
180179
call_h = call.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) vcmp.h
181180
variantkey_h = variantkey.h
182181
convert_h = convert.h $(htslib_vcf_h) $(variantkey_h)
183182
tsv2vcf_h = tsv2vcf.h $(htslib_vcf_h)
184183
filter_h = filter.h $(htslib_vcf_h)
184+
gvcf_h = gvcf.h $(bcftools_h)
185+
khash_str2str_h = khash_str2str.h $(htslib_khash_h)
185186
ploidy_h = ploidy.h regidx.h
186187
prob1_h = prob1.h $(htslib_vcf_h) $(call_h)
187-
roh_h = HMM.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kseq_h) $(bcftools_h)
188-
cnv_h = HMM.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h)
188+
smpl_ilist_h = smpl_ilist.h $(htslib_vcf_h)
189+
vcfbuf_h = vcfbuf.h $(htslib_vcf_h)
189190
bam2bcf_h = bam2bcf.h $(htslib_hts_h) $(htslib_vcf_h)
190191
bam_sample_h = bam_sample.h $(htslib_sam_h)
191192

192193
main.o: main.c $(htslib_hts_h) config.h version.h $(bcftools_h)
193-
vcfannotate.o: vcfannotate.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(bcftools_h) vcmp.h $(filter_h)
194-
vcfplugin.o: vcfplugin.c config.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(bcftools_h) vcmp.h $(filter_h)
195-
vcfcall.o: vcfcall.c $(htslib_vcf_h) $(htslib_kfunc_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(bcftools_h) $(call_h) $(prob1_h) $(ploidy_h)
194+
vcfannotate.o: vcfannotate.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h) $(convert_h) $(smpl_ilist_h) $(htslib_khash_h)
195+
vcfplugin.o: vcfplugin.c config.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h)
196+
vcfcall.o: vcfcall.c $(htslib_vcf_h) $(htslib_kfunc_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(bcftools_h) $(call_h) $(prob1_h) $(ploidy_h) $(gvcf_h)
196197
vcfconcat.o: vcfconcat.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(bcftools_h)
197-
vcfconvert.o: vcfconvert.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h)
198+
vcfconvert.o: vcfconvert.c $(htslib_faidx_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kseq_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h)
198199
vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) rbuf.h
199200
vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) hclust.h
200-
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h)
201+
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(bcftools_h)
201202
vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h)
202203
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) regidx.h $(bcftools_h) vcmp.h $(htslib_khash_h)
203-
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(bcftools_h) rbuf.h
204-
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h)
205-
vcfroh.o: vcfroh.c $(roh_h)
206-
vcfcnv.o: vcfcnv.c $(cnv_h)
204+
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h
205+
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h)
206+
vcfroh.o: vcfroh.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(bcftools_h) HMM.h $(smpl_ilist_h) $(filter_h)
207+
vcfcnv.o: vcfcnv.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(bcftools_h) HMM.h rbuf.h
207208
vcfsom.o: vcfsom.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h)
208-
vcfsort.o: vcfsort.c $(htslib_vcf_h) $(bcftools_h)
209-
vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) $(bin_h)
210-
vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h)
211-
reheader.o: reheader.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_kseq_h) $(bcftools_h)
209+
vcfsort.o: vcfsort.c $(htslib_vcf_h) $(htslib_kstring_h) kheap.h $(bcftools_h)
210+
vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) bin.h
211+
vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(htslib_khash_str2int_h)
212+
reheader.o: reheader.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_kseq_h) $(htslib_thread_pool_h) $(bcftools_h) $(khash_str2str_h)
212213
tabix.o: tabix.c $(htslib_bgzf_h) $(htslib_tbx_h)
213214
ccall.o: ccall.c $(htslib_kfunc_h) $(call_h) kmin.h $(prob1_h)
214215
variantkey.o: variantkey.c $(variantkey_h)
215216
convert.o: convert.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(convert_h)
216217
tsv2vcf.o: tsv2vcf.c $(tsv2vcf_h)
217218
em.o: em.c $(htslib_vcf_h) kmin.h $(call_h)
218-
filter.o: filter.c config.h $(htslib_khash_str2int_h) $(filter_h) $(bcftools_h) $(htslib_hts_defs_h) $(htslib_vcfutils_h)
219+
filter.o: filter.c $(htslib_khash_str2int_h) $(htslib_hts_defs_h) $(htslib_vcfutils_h) $(htslib_kfunc_h) config.h $(filter_h) $(bcftools_h)
219220
$(CC) $(CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CPPFLAGS) $(PERL_CFLAGS) -c -o $@ $<
220-
gvcf.o: gvcf.c gvcf.h $(call_h)
221+
gvcf.o: gvcf.c $(gvcf_h) $(bcftools_h)
221222
kmin.o: kmin.c kmin.h
222223
mcall.o: mcall.c $(htslib_kfunc_h) $(call_h)
223224
prob1.o: prob1.c $(prob1_h)
224-
vcmp.o: vcmp.c $(htslib_hts_h) vcmp.h
225-
ploidy.o: ploidy.c regidx.h $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_hts_h) $(bcftools_h) $(ploidy_h)
225+
vcmp.o: vcmp.c $(htslib_hts_h) $(htslib_vcf_h) vcmp.h
226+
ploidy.o: ploidy.c $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_hts_h) $(bcftools_h) $(ploidy_h)
226227
polysomy.o: polysomy.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(bcftools_h) peakfit.h
227228
peakfit.o: peakfit.c peakfit.h $(htslib_hts_h) $(htslib_kstring_h)
228-
bin.o: bin.c $(bin_h)
229+
bin.o: bin.c $(bcftools_h) bin.h
229230
regidx.o: regidx.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) regidx.h
230-
consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) rbuf.h $(bcftools_h) regidx.h
231-
mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) regidx.h $(bcftools_h) $(call_h) $(bam2bcf_h) $(bam_sample_h)
232-
bam_sample.o: $(bam_sample_h) $(htslib_hts_h) $(htslib_khash_str2int_h)
231+
consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) regidx.h $(bcftools_h) rbuf.h $(filter_h)
232+
mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) regidx.h $(bcftools_h) $(bam2bcf_h) $(bam_sample_h) $(gvcf_h)
233+
bam2bcf.o: bam2bcf.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(bam2bcf_h) mw.h
234+
bam2bcf_indel.o: bam2bcf_indel.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h)
235+
bam_sample.o: bam_sample.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(khash_str2str_h) $(bam_sample_h) $(bcftools_h)
233236
version.o: version.h version.c
234-
hclust.o: hclust.c hclust.h
235-
vcfbuf.o: vcfbuf.c vcfbuf.h rbuf.h
236-
smpl_ilist.o: smpl_ilist.c smpl_ilist.h
237-
csq.o: csq.c smpl_ilist.h regidx.h filter.h kheap.h rbuf.h
237+
hclust.o: hclust.c $(htslib_hts_h) $(htslib_kstring_h) $(bcftools_h) hclust.h
238+
HMM.o: HMM.c $(htslib_hts_h) HMM.h
239+
vcfbuf.o: vcfbuf.c $(htslib_vcf_h) $(htslib_vcfutils_h) $(bcftools_h) $(vcfbuf_h) rbuf.h
240+
smpl_ilist.o: smpl_ilist.c $(bcftools_h) $(smpl_ilist_h)
241+
csq.o: csq.c $(htslib_hts_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_h) $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) regidx.h kheap.h $(smpl_ilist_h) rbuf.h
238242

239243
# test programs
240244

@@ -256,7 +260,7 @@ test/test-rbuf.o: test/test-rbuf.c rbuf.h
256260
test/test-rbuf: test/test-rbuf.o
257261
$(CC) $(LDFLAGS) -o $@ $^ $(ALL_LIBS)
258262

259-
test/test-regidx.o: test/test-regidx.c regidx.h
263+
test/test-regidx.o: test/test-regidx.c $(htslib_kstring_h) regidx.h
260264

261265
test/test-regidx: test/test-regidx.o regidx.o $(HTSLIB)
262266
$(CC) $(ALL_LDFLAGS) -o $@ $^ $(HTSLIB) -lpthread $(HTSLIB_LIB) $(ALL_LIBS)

NEWS

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,29 @@
1-
## Release 1.9
1+
## Release a.b
22

3-
* `annotate`: also the REF and ALT columns can be now transferred from the annotation file
3+
* `csq`
4+
5+
- significant reduction of memory usage in the local -l mode for VCFs
6+
with thousands of samples and 20% reduction in the non-local
7+
haplotype-aware mode.
8+
9+
- fixes a small memory leak and formatting issue in FORMAT/BCSQ at
10+
sites with many consequences
11+
12+
* `+dosage`: fix some serious bugs
13+
14+
* `reheader`
15+
16+
- new -f, --fai option for updating contig lines in the VCF header
17+
18+
* VariantKey support
19+
20+
## Release 1.9 (18th July 2018)
21+
22+
* `annotate`
23+
24+
- REF and ALT columns can be now transferred from the annotation file.
25+
26+
- fixed bug when setting vector_end values.
427

528
* `consensus`
629

@@ -22,8 +45,13 @@
2245
records will be expanded. In order to drop records completely, one can stream
2346
through "bcftools view" first.
2447

25-
* `csq`: since the real consequence of start/splice events are not known, the aminoacid
26-
positions at subsequent variants should stay unchanged
48+
* `csq`
49+
50+
- since the real consequence of start/splice events are not known, the aminoacid
51+
positions at subsequent variants should stay unchanged
52+
53+
- add `--force` option to skip malformatted transcripts in GFFs with out-of-phase
54+
CDS exons.
2755

2856
* `+dosage`: output all alleles and all their dosages at multiallelic sites
2957

@@ -43,6 +71,9 @@
4371

4472
- fix a parsing problem, '@' was not removed from '@filename' expressions
4573

74+
* `mpileup`: fixed bug where, if samples were renamed using the `-G` (`--read-groups`)
75+
option, some samples could be omitted from the output file.
76+
4677
* `norm`: update INFO/END when normalizing indels
4778

4879
* `+split`: new -S option to subset samples and to use custom file names instead of the defaults

bam_sample.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/* bam_sample.c -- group data by sample.
22
33
Copyright (C) 2010, 2011 Broad Institute.
4-
Copyright (C) 2013, 2016 Genome Research Ltd.
4+
Copyright (C) 2013, 2016-2018 Genome Research Ltd.
55
66
Author: Heng Li <[email protected]>, Petr Danecek <[email protected]>
77
@@ -167,10 +167,14 @@ int bam_smpl_add_bam(bam_smpl_t *bsmpl, char *bam_hdr, const char *fname)
167167
void *bam_smpls = khash_str2int_init();
168168
int first_smpl = -1, nskipped = 0;
169169
const char *p = bam_hdr, *q, *r;
170-
while ((q = strstr(p, "@RG")) != 0)
170+
while (p != NULL && (q = strstr(p, "@RG")) != 0)
171171
{
172+
char *eol = strchr(q + 3, '\n');
173+
if (q > bam_hdr && *(q - 1) != '\n') { // @RG must be at start of line
174+
p = eol;
175+
continue;
176+
}
172177
p = q + 3;
173-
r = q = 0;
174178
if ((q = strstr(p, "\tID:")) != 0) q += 4;
175179
if ((r = strstr(p, "\tSM:")) != 0) r += 4;
176180
if (r && q)
@@ -220,7 +224,7 @@ int bam_smpl_add_bam(bam_smpl_t *bsmpl, char *bam_hdr, const char *fname)
220224
}
221225
else
222226
break;
223-
p = q > r ? q : r;
227+
p = eol;
224228
}
225229
int nsmpls = khash_str2int_size(bam_smpls);
226230
khash_str2int_destroy_free(bam_smpls);
@@ -234,6 +238,7 @@ int bam_smpl_add_bam(bam_smpl_t *bsmpl, char *bam_hdr, const char *fname)
234238
{
235239
// no suitable read group is available in this bam: ignore the whole file.
236240
free(file->fname);
241+
if ( file->rg2idx ) khash_str2int_destroy_free(file->rg2idx);
237242
bsmpl->nfiles--;
238243
return -1;
239244
}

bin.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ bin_t *bin_init(const char *list_def, float min, float max)
4848
{
4949
char *tmp;
5050
bin->bins[i] = strtod(list[i],&tmp);
51-
if ( !tmp ) error("Could not parse %s: %s\n", list_def, list[i]);
51+
if ( *tmp ) error("Could not parse %s: %s\n", list_def, list[i]);
5252
if ( min!=max && (bin->bins[i]<min || bin->bins[i]>max) )
5353
error("Expected values from the interval [%f,%f], found %s\n", min, max, list[i]);
5454
free(list[i]);

0 commit comments

Comments
 (0)