diff --git a/Changes b/Changes index 2e6cf74..69a0e01 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,11 @@ +1.0.1 + Upgrade install to pull biobambam 0.0.135 + - fastqtobam supports Casava v1.8 + - bamsort supports NM/MD correction during sam->bam/merge process + Minor enhancement to BAS reader module + Sample name from command line passed through to SM of RG header in bwa_mem.pl + SRA.pm - check that rg id is unique within run of code (thanks to Junjun Zhang) + Threads.pm - join interval is now configurable. 1.0.0 bam_stats.pl actually installed now. Basic *.bas perl access module. diff --git a/MYMETA.json b/MYMETA.json index d70d5dc..62c3f4d 100644 --- a/MYMETA.json +++ b/MYMETA.json @@ -53,5 +53,5 @@ } }, "release_status" : "stable", - "version" : "v1.0.0" + "version" : "v1.0.1" } diff --git a/MYMETA.yml b/MYMETA.yml index d132837..645eed5 100644 --- a/MYMETA.yml +++ b/MYMETA.yml @@ -35,4 +35,4 @@ requires: Term::UI: 0.42 Test::Fatal: 0.013 Try::Tiny: 0.19 -version: v1.0.0 +version: v1.0.1 diff --git a/docs.tar.gz b/docs.tar.gz index c6ef546..5af691e 100644 Binary files a/docs.tar.gz and b/docs.tar.gz differ diff --git a/lib/PCAP.pm b/lib/PCAP.pm index 9f4eab2..cb06d6a 100644 --- a/lib/PCAP.pm +++ b/lib/PCAP.pm @@ -23,7 +23,7 @@ package PCAP; use strict; use Const::Fast qw(const); -our $VERSION = '1.0.0'; +our $VERSION = '1.0.1'; const my $LICENSE => "################# @@ -39,7 +39,8 @@ const my %UPGRADE_PATH => ( '0.1.0' => 'biobambam,samtools,bwa', '0.2.0' => 'biobambam', '0.2.99' => 'biobambam', '0.3.0' => 'biobambam', - '1.0.0' => '', + '1.0.0' => 'biobambam', + '1.0.1' => '', ); sub license { diff --git a/lib/PCAP/Bam.pm b/lib/PCAP/Bam.pm index 877198d..3afe7ea 100644 --- a/lib/PCAP/Bam.pm +++ b/lib/PCAP/Bam.pm @@ -52,7 +52,7 @@ sub new { } sub rg_line_for_output { - my ($bam, $uniq_id) = @_; + my ($bam, $sample, $uniq_id) = @_; my $sam = sam_ob($bam); my $header = $sam->header->text; my $rg_line; @@ -64,6 +64,9 @@ sub rg_line_for_output { my $uuid = lc Data::UUID->new->create_str; $rg_line =~ s/\tID:[^\t]+/\tID:$uuid/; } + if(defined $sample) { + $rg_line =~ s/\tSM:[^\t]+/\tSM:$sample/; + } $rg_line =~ s/\t/\\t/g; } return ($rg_line, $sam); # also return the SAM object diff --git a/lib/PCAP/Bam/Bas.pm b/lib/PCAP/Bam/Bas.pm index c96b88f..ef9c663 100644 --- a/lib/PCAP/Bam/Bas.pm +++ b/lib/PCAP/Bam/Bas.pm @@ -80,6 +80,10 @@ sub bas_keys { return $self->{'keys'}; } +sub read_groups { + return (sort keys shift->{'_data'}); +} + sub get { my ($self, $rg, $key) = @_; die qq{Readgroup '$rg' does not exist\n} unless(exists $self->{'_data'}->{$rg}); @@ -108,6 +112,10 @@ Construct an access object for BAM statistics file. Returns the list of available keys for this BAS file. +=item read_groups + +Returns sorted list of read-groups found in this BAS file. + =item get Retrieve a value by its readgroup and key: diff --git a/lib/PCAP/Bwa.pm b/lib/PCAP/Bwa.pm index 61014b6..410d36c 100644 --- a/lib/PCAP/Bwa.pm +++ b/lib/PCAP/Bwa.pm @@ -72,7 +72,7 @@ sub bwa_mem { $rg_line = q{'}.$input->rg_header(q{\t}).q{'}; } else { - ($rg_line, undef) = PCAP::Bam::rg_line_for_output($input->in, 1); + ($rg_line, undef) = PCAP::Bam::rg_line_for_output($input->in, $options->{'sample'}, 1); } my $bwa = which('bwa') || die "Unable to find 'bwa' in path"; diff --git a/lib/PCAP/SRA.pm b/lib/PCAP/SRA.pm index 408f71e..ee2f5ea 100644 --- a/lib/PCAP/SRA.pm +++ b/lib/PCAP/SRA.pm @@ -272,10 +272,12 @@ sub _check_seq_type { sub group_bams { my ($self, $in_seq_type) = @_; my %grouped; + my %rg_seen; for my $bam_ob(@{$self->{'bam_obs'}}) { + die "The same readgroup ID has been used in more than one BAM file, readgroup ID: ".$bam_ob->{'ID'}."\n" if $rg_seen{$bam_ob->{'ID'}}++; my $sm = $bam_ob->{'SM'}; my $lb = $bam_ob->{'LB'}; - my ($run) = $bam_ob->{'PU'} =~ m/^[[:alpha:]]+:([^_]+)_[^#]+/; + my ($run) = $bam_ob->{'PU'} =~ m/^.+:(.+)_\d+(#.+)?/; $bam_ob->{'run'} = sprintf '%s:%s', $bam_ob->{'CN'}, $run; my ($lib_id) = $lb =~ m/^[[:alpha:]]+:[[:alpha:]]+:(.*)$/; $bam_ob->{'exp'} = sprintf '%s:%s', $bam_ob->{'run'}, $lib_id; diff --git a/lib/PCAP/Threaded.pm b/lib/PCAP/Threaded.pm index 23ae657..90417c7 100644 --- a/lib/PCAP/Threaded.pm +++ b/lib/PCAP/Threaded.pm @@ -50,7 +50,8 @@ sub new { $max_threads = 1; } croak "Number of threads was NAN: $max_threads" if($max_threads !~ m/^[[:digit:]]+$/xms); - my $self = { 'threads' => $max_threads }; + my $self = {'threads' => $max_threads, + 'join_interval' => 1, }; bless $self, $class; return $self; } @@ -68,6 +69,16 @@ sub add_function { return 1; } +sub thread_join_interval { + my ($self, $sec) = @_; + if(defined $sec) { + croak 'join_interval must be an integer' if($sec !~ m/^[[:digit:]]+$/); + croak 'join_interval must be 1 or more' if($sec < 1); + $self->{'join_interval'} = $sec; + } + $self->{'join_interval'}; +} + sub run { my ($self, $iterations, $function_name, @params) = @_; croak 'Iterations must be defined' unless(defined $iterations); @@ -89,14 +100,14 @@ sub run { threads->create($function_ref, $index++, @params); last if($index > $iterations); } - sleep 10 while(threads->list(threads::joinable) == 0); + sleep $self->thread_join_interval while(threads->list(threads::joinable) == 0); for my $thr(threads->list(threads::joinable)) { $thr->join; if(my $err = $thr->error) { die "Thread error: $err\n"; } } } # last gasp for any remaining threads - sleep 2 while(threads->list(threads::running) > 0); + sleep $self->thread_join_interval while(threads->list(threads::running) > 0); for my $thr(threads->list(threads::joinable)) { $thr->join; if(my $err = $thr->error) { die "Thread error: $err\n"; } diff --git a/setup.sh b/setup.sh index 4464475..398df29 100755 --- a/setup.sh +++ b/setup.sh @@ -3,8 +3,8 @@ SOURCE_BWA="https://github.com/lh3/bwa/archive/0.7.7.tar.gz" SOURCE_SNAPPY="https://snappy.googlecode.com/files/snappy-1.1.1.tar.gz" SOURCE_IOLIB="http://downloads.sourceforge.net/project/staden/io_lib/1.13.4/io_lib-1.13.4.tar.gz" -SOURCE_LIBMAUS="https://github.com/gt1/libmaus/archive/0.0.112-release-20140411095503.tar.gz" -SOURCE_BIOBAMBAM="https://github.com/gt1/biobambam/archive/0.0.131-release-20140411101450.tar.gz" +SOURCE_LIBMAUS="https://github.com/gt1/libmaus/archive/0.0.115-release-20140423163910.tar.gz" +SOURCE_BIOBAMBAM="https://github.com/gt1/biobambam/archive/0.0.135-release-20140423164503.tar.gz" SOURCE_SAMTOOLS="https://github.com/samtools/samtools/archive/0.1.19.tar.gz" done_message () { diff --git a/t/3_external_progs.t b/t/3_external_progs.t index 91290d3..9e752bb 100644 --- a/t/3_external_progs.t +++ b/t/3_external_progs.t @@ -17,15 +17,15 @@ my %EXPECTED_VERSION = ( 'bamcollate2' => { 'get' => q{ -h}, 'match' => qr/This is biobambam version ([[:digit:]\.]+)\./, - 'version' => ['0.0.131']}, + 'version' => ['0.0.135']}, 'bammarkduplicates' => { 'get' => q{ -h}, 'match' => qr/This is biobambam version ([[:digit:]\.]+)\./, - 'version' => ['0.0.131']}, + 'version' => ['0.0.135']}, 'bamsort' => { 'get' => q{ -h}, 'match' => qr/This is biobambam version ([[:digit:]\.]+)\./, - 'version' => ['0.0.131']}, + 'version' => ['0.0.135']}, 'bwa' => { 'get' => q{}, 'match' => qr/Version: ([[:digit:]\.]+[[:alpha:]]?)/, # we don't care about the revision number diff --git a/t/pcapBamBas.t b/t/pcapBamBas.t index c93efd1..7b9187b 100644 --- a/t/pcapBamBas.t +++ b/t/pcapBamBas.t @@ -8,6 +8,7 @@ use Const::Fast qw(const); const my $MODULE => 'PCAP::Bam::Bas'; const my $RG_1 => 1; const my $EXP_MEDIAN => '462.000'; +const my $RG_ORDER => [qw(1 2 3 4 5 6)]; use FindBin qw($Bin); my $test_data = "$Bin/../testData"; @@ -29,6 +30,8 @@ subtest 'Access checks' => sub { my $obj = new_ok($MODULE => [$bas]); is($obj->get($RG_1, 'median_insert_size'), $EXP_MEDIAN, 'Get expected value with correct key'); is($obj->get($RG_1, 'wibble'), undef, 'Get undef with unknown key'); + my @rgs = $obj->read_groups; + is_deeply(\@rgs, $RG_ORDER, 'Readgroups returned sorted'); like(exception { $obj->get(99, 'wibble'); }, qr/Readgroup '.*' does not exist/, 'Expected error, unkown RG'); };