Skip to content

Commit

Permalink
Merge pull request #2 from samtools/develop
Browse files Browse the repository at this point in the history
Merge upstream changes
  • Loading branch information
nicolaasuni authored Sep 6, 2018
2 parents c71d3d6 + 44192ea commit 872d506
Show file tree
Hide file tree
Showing 100 changed files with 2,310 additions and 425 deletions.
83 changes: 60 additions & 23 deletions INSTALL
Original file line number Diff line number Diff line change
@@ -1,28 +1,32 @@
System Requirements
===================

BCFtools and HTSlib depend on the zlib library <http://zlib.net>, the bzip2
library <http://bzip.org/> and liblzma <http://tukaani.org/xz/>. Building
them requires development files to be installed on the build machine;
BCFtools and HTSlib depend on the following libraries:

BCFtools:
zlib <http://zlib.net>
gsl <https://www.gnu.org/software/gsl/>
(optional, for the 'polysomy' command)
libperl <http://www.perl.org/>
(optional, to support filters using perl syntax)

HTSlib:
zlib <http://zlib.net>
libbz2 <http://bzip.org/>
liblzma <http://tukaani.org/xz/>
libcurl <https://curl.haxx.se/>
(optional but strongly recommended, for network access)
libcrypto <https://www.openssl.org/>
(optional, for Amazon S3 support; not needed on MacOS)

Building them requires development files to be installed on the build machine;
note that some Linux distributions package these separately from the library
itself (see below).
itself. See the "System Specific Details" below for guidance on how to install
these on a variety of systems.

The bzip2 and liblzma dependencies can be removed if full CRAM support
is not needed - see HTSlib's INSTALL file for details.

Packages for dpkg-based Linux distributions (Debian / Ubuntu) are:

zlib1g-dev
libbz2-dev
liblzma-dev

Packages for rpm or yum-based Linux distributions (RedHat / Fedora / CentOS)
are:

zlib-devel
bzip2-devel
xz-devel

To build BCFtools, you will need:

GNU make
Expand Down Expand Up @@ -85,12 +89,6 @@ sophisticated filtering. This option can be enabled by supplying the

./configure --enable-perl-filters

Note that enabling this option changes the license from MIT to GPL
because bcftools need to be built with

perl -MExtUtils::Embed -e ccopts -e ldopts


Optional Compilation with GSL
=============================

Expand Down Expand Up @@ -136,3 +134,42 @@ The bgzip and tabix utilities are provided by HTSlib. If you have not also
installed HTSlib separately, you may wish to install these utilities by hand
by copying bcftools-1.x/htslib-1.x/{bgzip,tabix} to the same bin directory
to which you have installed bcftools et al.


System Specific Details
=======================

Installing the prerequisites is system dependent and there is more
than one correct way of satisfying these, including downloading them
from source, compiling and installing them yourself.

For people with super-user access, we provide an example set of commands
below for installing the dependencies on a variety of operating system
distributions. Note these are not specific recommendations on distribution,
compiler or SSL implementation. It is assumed you already have the core set
of packages for the given distribution - the lists may be incomplete if
this is not the case.

Debian / Ubuntu
---------------

sudo apt-get update # Ensure the package list is up to date
sudo apt-get install autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libperl-dev libgsl0-dev

Note: libcurl4-openssl-dev can be used as an alternative to libcurl4-gnutls-dev.

RedHat / CentOS
---------------

sudo yum install autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel gsl-devel perl-ExtUtils-Embed

Alpine Linux
------------

sudo apk update # Ensure the package list is up to date
sudo apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev libressl-dev gsl-dev perl-dev

OpenSUSE
--------

sudo zypper install autoconf automake make gcc perl zlib-devel libbz2-devel xz-devel libcurl-devel libopenssl-devel gsl-devel
64 changes: 34 additions & 30 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ endif

include config.mk

PACKAGE_VERSION = 1.8
PACKAGE_VERSION = 1.9

# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
# description of the working tree: either a release tag with the same value
Expand Down Expand Up @@ -176,65 +176,69 @@ endif # PLUGINS_ENABLED
plugins: $(PLUGINS)

bcftools_h = bcftools.h $(htslib_hts_defs_h) $(htslib_vcf_h)
bin_h = bin.h $(htslib_hts_h)
call_h = call.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) vcmp.h
variantkey_h = variantkey.h
convert_h = convert.h $(htslib_vcf_h) $(variantkey_h)
tsv2vcf_h = tsv2vcf.h $(htslib_vcf_h)
filter_h = filter.h $(htslib_vcf_h)
gvcf_h = gvcf.h $(bcftools_h)
khash_str2str_h = khash_str2str.h $(htslib_khash_h)
ploidy_h = ploidy.h regidx.h
prob1_h = prob1.h $(htslib_vcf_h) $(call_h)
roh_h = HMM.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kseq_h) $(bcftools_h)
cnv_h = HMM.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h)
smpl_ilist_h = smpl_ilist.h $(htslib_vcf_h)
vcfbuf_h = vcfbuf.h $(htslib_vcf_h)
bam2bcf_h = bam2bcf.h $(htslib_hts_h) $(htslib_vcf_h)
bam_sample_h = bam_sample.h $(htslib_sam_h)

main.o: main.c $(htslib_hts_h) config.h version.h $(bcftools_h)
vcfannotate.o: vcfannotate.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(bcftools_h) vcmp.h $(filter_h)
vcfplugin.o: vcfplugin.c config.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(bcftools_h) vcmp.h $(filter_h)
vcfcall.o: vcfcall.c $(htslib_vcf_h) $(htslib_kfunc_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(bcftools_h) $(call_h) $(prob1_h) $(ploidy_h)
vcfannotate.o: vcfannotate.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h) $(convert_h) $(smpl_ilist_h) $(htslib_khash_h)
vcfplugin.o: vcfplugin.c config.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h)
vcfcall.o: vcfcall.c $(htslib_vcf_h) $(htslib_kfunc_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(bcftools_h) $(call_h) $(prob1_h) $(ploidy_h) $(gvcf_h)
vcfconcat.o: vcfconcat.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(bcftools_h)
vcfconvert.o: vcfconvert.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h)
vcfconvert.o: vcfconvert.c $(htslib_faidx_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kseq_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h)
vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) rbuf.h
vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) hclust.h
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h)
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(bcftools_h)
vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h)
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) regidx.h $(bcftools_h) vcmp.h $(htslib_khash_h)
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(bcftools_h) rbuf.h
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h)
vcfroh.o: vcfroh.c $(roh_h)
vcfcnv.o: vcfcnv.c $(cnv_h)
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h)
vcfroh.o: vcfroh.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(bcftools_h) HMM.h $(smpl_ilist_h) $(filter_h)
vcfcnv.o: vcfcnv.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(bcftools_h) HMM.h rbuf.h
vcfsom.o: vcfsom.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h)
vcfsort.o: vcfsort.c $(htslib_vcf_h) $(bcftools_h)
vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) $(bin_h)
vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h)
reheader.o: reheader.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_kseq_h) $(bcftools_h)
vcfsort.o: vcfsort.c $(htslib_vcf_h) $(htslib_kstring_h) kheap.h $(bcftools_h)
vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) bin.h
vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(htslib_khash_str2int_h)
reheader.o: reheader.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_kseq_h) $(htslib_thread_pool_h) $(bcftools_h) $(khash_str2str_h)
tabix.o: tabix.c $(htslib_bgzf_h) $(htslib_tbx_h)
ccall.o: ccall.c $(htslib_kfunc_h) $(call_h) kmin.h $(prob1_h)
variantkey.o: variantkey.c $(variantkey_h)
convert.o: convert.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(convert_h)
tsv2vcf.o: tsv2vcf.c $(tsv2vcf_h)
em.o: em.c $(htslib_vcf_h) kmin.h $(call_h)
filter.o: filter.c config.h $(htslib_khash_str2int_h) $(filter_h) $(bcftools_h) $(htslib_hts_defs_h) $(htslib_vcfutils_h)
filter.o: filter.c $(htslib_khash_str2int_h) $(htslib_hts_defs_h) $(htslib_vcfutils_h) $(htslib_kfunc_h) config.h $(filter_h) $(bcftools_h)
$(CC) $(CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CPPFLAGS) $(PERL_CFLAGS) -c -o $@ $<
gvcf.o: gvcf.c gvcf.h $(call_h)
gvcf.o: gvcf.c $(gvcf_h) $(bcftools_h)
kmin.o: kmin.c kmin.h
mcall.o: mcall.c $(htslib_kfunc_h) $(call_h)
prob1.o: prob1.c $(prob1_h)
vcmp.o: vcmp.c $(htslib_hts_h) vcmp.h
ploidy.o: ploidy.c regidx.h $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_hts_h) $(bcftools_h) $(ploidy_h)
vcmp.o: vcmp.c $(htslib_hts_h) $(htslib_vcf_h) vcmp.h
ploidy.o: ploidy.c $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_hts_h) $(bcftools_h) $(ploidy_h)
polysomy.o: polysomy.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(bcftools_h) peakfit.h
peakfit.o: peakfit.c peakfit.h $(htslib_hts_h) $(htslib_kstring_h)
bin.o: bin.c $(bin_h)
bin.o: bin.c $(bcftools_h) bin.h
regidx.o: regidx.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) regidx.h
consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) rbuf.h $(bcftools_h) regidx.h
mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) regidx.h $(bcftools_h) $(call_h) $(bam2bcf_h) $(bam_sample_h)
bam_sample.o: $(bam_sample_h) $(htslib_hts_h) $(htslib_khash_str2int_h)
consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) regidx.h $(bcftools_h) rbuf.h $(filter_h)
mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) regidx.h $(bcftools_h) $(bam2bcf_h) $(bam_sample_h) $(gvcf_h)
bam2bcf.o: bam2bcf.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(bam2bcf_h) mw.h
bam2bcf_indel.o: bam2bcf_indel.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h)
bam_sample.o: bam_sample.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(khash_str2str_h) $(bam_sample_h) $(bcftools_h)
version.o: version.h version.c
hclust.o: hclust.c hclust.h
vcfbuf.o: vcfbuf.c vcfbuf.h rbuf.h
smpl_ilist.o: smpl_ilist.c smpl_ilist.h
csq.o: csq.c smpl_ilist.h regidx.h filter.h kheap.h rbuf.h
hclust.o: hclust.c $(htslib_hts_h) $(htslib_kstring_h) $(bcftools_h) hclust.h
HMM.o: HMM.c $(htslib_hts_h) HMM.h
vcfbuf.o: vcfbuf.c $(htslib_vcf_h) $(htslib_vcfutils_h) $(bcftools_h) $(vcfbuf_h) rbuf.h
smpl_ilist.o: smpl_ilist.c $(bcftools_h) $(smpl_ilist_h)
csq.o: csq.c $(htslib_hts_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_h) $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) regidx.h kheap.h $(smpl_ilist_h) rbuf.h

# test programs

Expand All @@ -256,7 +260,7 @@ test/test-rbuf.o: test/test-rbuf.c rbuf.h
test/test-rbuf: test/test-rbuf.o
$(CC) $(LDFLAGS) -o $@ $^ $(ALL_LIBS)

test/test-regidx.o: test/test-regidx.c regidx.h
test/test-regidx.o: test/test-regidx.c $(htslib_kstring_h) regidx.h

test/test-regidx: test/test-regidx.o regidx.o $(HTSLIB)
$(CC) $(ALL_LDFLAGS) -o $@ $^ $(HTSLIB) -lpthread $(HTSLIB_LIB) $(ALL_LIBS)
Expand Down
39 changes: 35 additions & 4 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
## Release 1.9
## Release a.b

* `annotate`: also the REF and ALT columns can be now transferred from the annotation file
* `csq`

- significant reduction of memory usage in the local -l mode for VCFs
with thousands of samples and 20% reduction in the non-local
haplotype-aware mode.

- fixes a small memory leak and formatting issue in FORMAT/BCSQ at
sites with many consequences

* `+dosage`: fix some serious bugs

* `reheader`

- new -f, --fai option for updating contig lines in the VCF header

* VariantKey support

## Release 1.9 (18th July 2018)

* `annotate`

- REF and ALT columns can be now transferred from the annotation file.

- fixed bug when setting vector_end values.

* `consensus`

Expand All @@ -22,8 +45,13 @@
records will be expanded. In order to drop records completely, one can stream
through "bcftools view" first.

* `csq`: since the real consequence of start/splice events are not known, the aminoacid
positions at subsequent variants should stay unchanged
* `csq`

- since the real consequence of start/splice events are not known, the aminoacid
positions at subsequent variants should stay unchanged

- add `--force` option to skip malformatted transcripts in GFFs with out-of-phase
CDS exons.

* `+dosage`: output all alleles and all their dosages at multiallelic sites

Expand All @@ -43,6 +71,9 @@

- fix a parsing problem, '@' was not removed from '@filename' expressions

* `mpileup`: fixed bug where, if samples were renamed using the `-G` (`--read-groups`)
option, some samples could be omitted from the output file.

* `norm`: update INFO/END when normalizing indels

* `+split`: new -S option to subset samples and to use custom file names instead of the defaults
Expand Down
13 changes: 9 additions & 4 deletions bam_sample.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* bam_sample.c -- group data by sample.
Copyright (C) 2010, 2011 Broad Institute.
Copyright (C) 2013, 2016 Genome Research Ltd.
Copyright (C) 2013, 2016-2018 Genome Research Ltd.
Author: Heng Li <[email protected]>, Petr Danecek <[email protected]>
Expand Down Expand Up @@ -167,10 +167,14 @@ int bam_smpl_add_bam(bam_smpl_t *bsmpl, char *bam_hdr, const char *fname)
void *bam_smpls = khash_str2int_init();
int first_smpl = -1, nskipped = 0;
const char *p = bam_hdr, *q, *r;
while ((q = strstr(p, "@RG")) != 0)
while (p != NULL && (q = strstr(p, "@RG")) != 0)
{
char *eol = strchr(q + 3, '\n');
if (q > bam_hdr && *(q - 1) != '\n') { // @RG must be at start of line
p = eol;
continue;
}
p = q + 3;
r = q = 0;
if ((q = strstr(p, "\tID:")) != 0) q += 4;
if ((r = strstr(p, "\tSM:")) != 0) r += 4;
if (r && q)
Expand Down Expand Up @@ -220,7 +224,7 @@ int bam_smpl_add_bam(bam_smpl_t *bsmpl, char *bam_hdr, const char *fname)
}
else
break;
p = q > r ? q : r;
p = eol;
}
int nsmpls = khash_str2int_size(bam_smpls);
khash_str2int_destroy_free(bam_smpls);
Expand All @@ -234,6 +238,7 @@ int bam_smpl_add_bam(bam_smpl_t *bsmpl, char *bam_hdr, const char *fname)
{
// no suitable read group is available in this bam: ignore the whole file.
free(file->fname);
if ( file->rg2idx ) khash_str2int_destroy_free(file->rg2idx);
bsmpl->nfiles--;
return -1;
}
Expand Down
2 changes: 1 addition & 1 deletion bin.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ bin_t *bin_init(const char *list_def, float min, float max)
{
char *tmp;
bin->bins[i] = strtod(list[i],&tmp);
if ( !tmp ) error("Could not parse %s: %s\n", list_def, list[i]);
if ( *tmp ) error("Could not parse %s: %s\n", list_def, list[i]);
if ( min!=max && (bin->bins[i]<min || bin->bins[i]>max) )
error("Expected values from the interval [%f,%f], found %s\n", min, max, list[i]);
free(list[i]);
Expand Down
Loading

0 comments on commit 872d506

Please sign in to comment.