Skip to content

Commit 8d0e6ee

Browse files
committed
fixed #8 and added code to generate a bash script to handle automation of GNOS upload with resume capabilities.
1 parent ed5b271 commit 8d0e6ee

File tree

1 file changed

+107
-16
lines changed

1 file changed

+107
-16
lines changed

lib/PCAP/SRA.pm

Lines changed: 107 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ use File::Path qw(make_path);
3434
use File::Basename;
3535
use Cwd 'abs_path';
3636
use Data::UUID;
37+
use Fcntl qw( :mode );
3738

3839
use File::ShareDir qw(module_dir);
3940

@@ -87,7 +88,7 @@ const my %CV_MAPPINGS => ('analyte_code' => { 'file' => 'cv_tables/TCGA/port
8788
sub generate_sample_SRA {
8889
my ($grouped, $options) = @_;
8990
my $cv_lookups = create_cv_lookups();
90-
my (@cgsubmit_validate, @cgsubmit, @gtupload);
91+
my @analysis_ids;
9192
my $base_path = $options->{'outdir'};
9293
for my $seq_type(keys %{$grouped}) {
9394
for my $sample(keys %{$grouped->{$seq_type}}) {
@@ -110,7 +111,7 @@ sub generate_sample_SRA {
110111
push @{$runs{$bam_ob->{'run'}}}, $bam_ob;
111112

112113
my $run_xmls = run($bam_ob->{'CN'}, \%runs);
113-
my $exp_xml = experiment_sets($options->{'study'}, $sample, \%exps);
114+
my $exp_xml = experiment_sets($options->{'study'}, \%exps);
114115

115116
my $analysis_xml = analysis_xml($bam_ob, $options->{'study'}, $sample);
116117
open my $XML, '>', "$submission_path/analysis.xml";
@@ -127,21 +128,22 @@ sub generate_sample_SRA {
127128
my ($cleaned_filename, $directories, $suffix) = fileparse($bam_ob->{'file'}, '.bam');
128129
$cleaned_filename .= '.bam';
129130
symlink abs_path($bam_ob->{'file'}), "$submission_path/$cleaned_filename";
130-
push @cgsubmit_validate, (sprintf 'cgsubmit -s https://gtrepo-ebi.annailabs.com -o %s.log -u %s --validate-only', $submission_uuid , $submission_uuid );
131-
push @cgsubmit, (sprintf 'cgsubmit -s https://gtrepo-ebi.annailabs.com -o %s.log -u %s -c $GNOS_PERM', $submission_uuid , $submission_uuid );
132-
push @gtupload, (sprintf 'gtupload -v -c $GNOS_PERM -u %s/manifest.xml >& %s.upload.log&', $submission_uuid, $submission_uuid);
131+
push @analysis_ids, $submission_uuid;
133132
}
134133
}
135134
}
136135
}
137136
print "## Executing the following will complete the submission/upload process:\n";
138-
print "cd $base_path\n";
139-
print join "\n", @cgsubmit_validate;
140-
print "\n## if successful\n";
141-
print join "\n", @cgsubmit;
142-
print "\n## if successful\n";
143-
print join "\n", @gtupload;
144-
print "\n";
137+
my $full_path = abs_path($base_path);
138+
my $sra_sh_script = "$full_path/auto_upload.sh";
139+
open my $SH, '>', $sra_sh_script;
140+
print $SH bash_script($full_path, \@analysis_ids);
141+
close $SH;
142+
chmod S_IRUSR|S_IXUSR, $sra_sh_script;
143+
my $log = $sra_sh_script;
144+
$log .= '.log';
145+
print "$sra_sh_script >& $log &\n";
146+
print "tail -f $log\n";
145147
}
146148

147149
sub create_cv_lookups {
@@ -152,7 +154,7 @@ sub create_cv_lookups {
152154
# so try installed area
153155
unless(defined $data_path && -e $data_path) {
154156
$data_path = dirname(abs_path($0)).'/../share';
155-
$data_path = module_dir('PCAP::SRA') unless(-e $data_path);
157+
$data_path = module_dir('PCAP::SRA') unless(-e "$data_path/cv_tables");
156158
}
157159
for my $cv_field(keys %CV_MAPPINGS) {
158160
my $cv_file = "$data_path/$CV_MAPPINGS{$cv_field}{file}";
@@ -272,6 +274,7 @@ sub get_md5_from_file {
272274
my $md5 = <$IN>;
273275
close $IN;
274276
chomp $md5;
277+
$md5 =~ s/\s+.*//;
275278
return $md5;
276279
}
277280

@@ -363,7 +366,7 @@ ATTRXML
363366
}
364367

365368
sub experiment_sets {
366-
my ($study, $sample, $exp_set) = @_;
369+
my ($study, $exp_set) = @_;
367370
my $experiment_xml = <<EXP_XML;
368371
<EXPERIMENT_SET xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://www.ncbi.nlm.nih.gov/viewvc/v1/trunk/sra/doc/SRA_1-5/SRA.experiment.xsd?view=co">
369372
%s
@@ -372,13 +375,13 @@ EXP_XML
372375

373376
my @experiments;
374377
for my $exp(keys %{$exp_set}) {
375-
push @experiments, experiment($study, $sample, $exp_set->{$exp});
378+
push @experiments, experiment($study, $exp_set->{$exp});
376379
}
377380
return sprintf $experiment_xml, (join '', @experiments);
378381
}
379382

380383
sub experiment {
381-
my ($study, $sample, $bam_ob) = @_;
384+
my ($study, $bam_ob) = @_;
382385
my $exp_xml = <<EXPXML;
383386
<EXPERIMENT center_name="%s" alias="%s">
384387
<STUDY_REF refcenter="OICR" refname="%s"/>
@@ -513,6 +516,88 @@ RUNXML
513516
return $xml;
514517
}
515518

519+
sub bash_script {
520+
my ($path, $uuids) = @_;
521+
my $uuid_str = join q{" "}, @{$uuids};
522+
my $script = <<'BASHSCRIPT';
523+
#!/bin/bash
524+
set -e
525+
set -u
526+
set -o pipefail
527+
528+
submitexp=" OK ";
529+
queryext="All matching objects are in a downloadable state";
530+
531+
submit_needed () {
532+
if [ -e "$1" ]; then
533+
catres=`cat $1`
534+
if [ $catres != $submitexp ]; then
535+
return 0
536+
fi
537+
else
538+
return 0
539+
fi
540+
return 1
541+
}
542+
543+
upload_needed () {
544+
uploadlog="$1/gtupload.log"
545+
if [ -e "$uploadlog" ]; then
546+
# check against cgquery
547+
set +e
548+
tmpfile="$(mktemp)"
549+
thing="cgquery -s https://gtrepo-ebi.annailabs.com analysis_id=$1"
550+
$thing >& $tmpfile
551+
if cat "$tmpfile" | grep -q "$queryext"; then
552+
rm -f $tmpfile
553+
return 1
554+
else
555+
rm -f $tmpfile
556+
return 0
557+
fi
558+
else # no log file so upload needed
559+
return 0
560+
fi
561+
return 1
562+
}
563+
564+
process_uuids () {
565+
name=$1[@]
566+
uuids=("${!name}")
567+
568+
for i in "${uuids[@]}"; do
569+
submitlog="$i/cgsubmit.log"
570+
if submit_needed $submitlog; then
571+
set -x
572+
cgsubmit -s https://gtrepo-ebi.annailabs.com -o $submitlog -u $i -c $GNOS_PERM > $submitlog.out
573+
set +x
574+
else
575+
echo RESUME MESSAGE: cgsubmit previously successful for $i
576+
fi
577+
if upload_needed $i; then
578+
set -x
579+
gtupload -v -c $GNOS_PERM -u $i/manifest.xml >> $i/gtupload.log 2>&1
580+
set +x
581+
else
582+
echo RESUME MESSAGE: gtupload previously successful for $i
583+
fi
584+
done
585+
}
586+
587+
# change into working dir
588+
workarea="%s"
589+
echo Working directory: $workarea
590+
cd $workarea
591+
ids=( "%s" )
592+
593+
process_uuids ids
594+
595+
echo SUCCESSFULLY COMPLETED
596+
597+
BASHSCRIPT
598+
return sprintf $script, $path, $uuid_str;
599+
}
600+
516601
1;
517602

518603
__END__
@@ -606,4 +691,10 @@ Takes list of values in this order
606691
study_name
607692
aliquot_id from BAM RG header SM tag
608693
694+
=item bash_script
695+
696+
Takes output path and list of submission UUIDs.
697+
698+
Generates a bash script that can be run to complete GNOS upload with resume capabilities.
699+
609700
=back

0 commit comments

Comments
 (0)