@@ -34,6 +34,7 @@ use File::Path qw(make_path);
34
34
use File::Basename;
35
35
use Cwd ' abs_path' ;
36
36
use Data::UUID;
37
+ use Fcntl qw( :mode ) ;
37
38
38
39
use File::ShareDir qw( module_dir) ;
39
40
@@ -87,7 +88,7 @@ const my %CV_MAPPINGS => ('analyte_code' => { 'file' => 'cv_tables/TCGA/port
87
88
sub generate_sample_SRA {
88
89
my ($grouped , $options ) = @_ ;
89
90
my $cv_lookups = create_cv_lookups();
90
- my ( @cgsubmit_validate , @cgsubmit , @gtupload ) ;
91
+ my @analysis_ids ;
91
92
my $base_path = $options -> {' outdir' };
92
93
for my $seq_type (keys %{$grouped }) {
93
94
for my $sample (keys %{$grouped -> {$seq_type }}) {
@@ -110,7 +111,7 @@ sub generate_sample_SRA {
110
111
push @{$runs {$bam_ob -> {' run' }}}, $bam_ob ;
111
112
112
113
my $run_xmls = run($bam_ob -> {' CN' }, \%runs );
113
- my $exp_xml = experiment_sets($options -> {' study' }, $sample , \%exps );
114
+ my $exp_xml = experiment_sets($options -> {' study' }, \%exps );
114
115
115
116
my $analysis_xml = analysis_xml($bam_ob , $options -> {' study' }, $sample );
116
117
open my $XML , ' >' , " $submission_path /analysis.xml" ;
@@ -127,21 +128,22 @@ sub generate_sample_SRA {
127
128
my ($cleaned_filename , $directories , $suffix ) = fileparse($bam_ob -> {' file' }, ' .bam' );
128
129
$cleaned_filename .= ' .bam' ;
129
130
symlink abs_path($bam_ob -> {' file' }), " $submission_path /$cleaned_filename " ;
130
- push @cgsubmit_validate , (sprintf ' cgsubmit -s https://gtrepo-ebi.annailabs.com -o %s.log -u %s --validate-only' , $submission_uuid , $submission_uuid );
131
- push @cgsubmit , (sprintf ' cgsubmit -s https://gtrepo-ebi.annailabs.com -o %s.log -u %s -c $GNOS_PERM' , $submission_uuid , $submission_uuid );
132
- push @gtupload , (sprintf ' gtupload -v -c $GNOS_PERM -u %s/manifest.xml >& %s.upload.log&' , $submission_uuid , $submission_uuid );
131
+ push @analysis_ids , $submission_uuid ;
133
132
}
134
133
}
135
134
}
136
135
}
137
136
print " ## Executing the following will complete the submission/upload process:\n " ;
138
- print " cd $base_path \n " ;
139
- print join " \n " , @cgsubmit_validate ;
140
- print " \n ## if successful\n " ;
141
- print join " \n " , @cgsubmit ;
142
- print " \n ## if successful\n " ;
143
- print join " \n " , @gtupload ;
144
- print " \n " ;
137
+ my $full_path = abs_path($base_path );
138
+ my $sra_sh_script = " $full_path /auto_upload.sh" ;
139
+ open my $SH , ' >' , $sra_sh_script ;
140
+ print $SH bash_script($full_path , \@analysis_ids );
141
+ close $SH ;
142
+ chmod S_IRUSR|S_IXUSR, $sra_sh_script ;
143
+ my $log = $sra_sh_script ;
144
+ $log .= ' .log' ;
145
+ print " $sra_sh_script >& $log &\n " ;
146
+ print " tail -f $log \n " ;
145
147
}
146
148
147
149
sub create_cv_lookups {
@@ -152,7 +154,7 @@ sub create_cv_lookups {
152
154
# so try installed area
153
155
unless (defined $data_path && -e $data_path ) {
154
156
$data_path = dirname(abs_path($0 )).' /../share' ;
155
- $data_path = module_dir(' PCAP::SRA' ) unless (-e $data_path );
157
+ $data_path = module_dir(' PCAP::SRA' ) unless (-e " $data_path /cv_tables " );
156
158
}
157
159
for my $cv_field (keys %CV_MAPPINGS ) {
158
160
my $cv_file = " $data_path /$CV_MAPPINGS {$cv_field }{file}" ;
@@ -272,6 +274,7 @@ sub get_md5_from_file {
272
274
my $md5 = <$IN >;
273
275
close $IN ;
274
276
chomp $md5 ;
277
+ $md5 =~ s /\s +.*// ;
275
278
return $md5 ;
276
279
}
277
280
@@ -363,7 +366,7 @@ ATTRXML
363
366
}
364
367
365
368
sub experiment_sets {
366
- my ($study , $sample , $ exp_set ) = @_ ;
369
+ my ($study , $exp_set ) = @_ ;
367
370
my $experiment_xml = <<EXP_XML ;
368
371
<EXPERIMENT_SET xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://www.ncbi.nlm.nih.gov/viewvc/v1/trunk/sra/doc/SRA_1-5/SRA.experiment.xsd?view=co">
369
372
%s
@@ -372,13 +375,13 @@ EXP_XML
372
375
373
376
my @experiments ;
374
377
for my $exp (keys %{$exp_set }) {
375
- push @experiments , experiment($study , $sample , $ exp_set-> {$exp });
378
+ push @experiments , experiment($study , $exp_set -> {$exp });
376
379
}
377
380
return sprintf $experiment_xml , (join ' ' , @experiments );
378
381
}
379
382
380
383
sub experiment {
381
- my ($study , $sample , $ bam_ob ) = @_ ;
384
+ my ($study , $bam_ob ) = @_ ;
382
385
my $exp_xml = <<EXPXML ;
383
386
<EXPERIMENT center_name="%s " alias="%s ">
384
387
<STUDY_REF refcenter="OICR" refname="%s "/>
@@ -513,6 +516,88 @@ RUNXML
513
516
return $xml ;
514
517
}
515
518
519
+ sub bash_script {
520
+ my ($path , $uuids ) = @_ ;
521
+ my $uuid_str = join q{ " "} , @{$uuids };
522
+ my $script = <<'BASHSCRIPT' ;
523
+ #!/bin/bash
524
+ set -e
525
+ set -u
526
+ set -o pipefail
527
+
528
+ submitexp=" OK ";
529
+ queryext="All matching objects are in a downloadable state";
530
+
531
+ submit_needed () {
532
+ if [ -e "$1" ]; then
533
+ catres=`cat $1`
534
+ if [ $catres != $submitexp ]; then
535
+ return 0
536
+ fi
537
+ else
538
+ return 0
539
+ fi
540
+ return 1
541
+ }
542
+
543
+ upload_needed () {
544
+ uploadlog="$1/gtupload.log"
545
+ if [ -e "$uploadlog" ]; then
546
+ # check against cgquery
547
+ set +e
548
+ tmpfile="$(mktemp)"
549
+ thing="cgquery -s https://gtrepo-ebi.annailabs.com analysis_id=$1"
550
+ $thing >& $tmpfile
551
+ if cat "$tmpfile" | grep -q "$queryext"; then
552
+ rm -f $tmpfile
553
+ return 1
554
+ else
555
+ rm -f $tmpfile
556
+ return 0
557
+ fi
558
+ else # no log file so upload needed
559
+ return 0
560
+ fi
561
+ return 1
562
+ }
563
+
564
+ process_uuids () {
565
+ name=$1[@]
566
+ uuids=("${!name}")
567
+
568
+ for i in "${uuids[@]}"; do
569
+ submitlog="$i/cgsubmit.log"
570
+ if submit_needed $submitlog; then
571
+ set -x
572
+ cgsubmit -s https://gtrepo-ebi.annailabs.com -o $submitlog -u $i -c $GNOS_PERM > $submitlog.out
573
+ set +x
574
+ else
575
+ echo RESUME MESSAGE: cgsubmit previously successful for $i
576
+ fi
577
+ if upload_needed $i; then
578
+ set -x
579
+ gtupload -v -c $GNOS_PERM -u $i/manifest.xml >> $i/gtupload.log 2>&1
580
+ set +x
581
+ else
582
+ echo RESUME MESSAGE: gtupload previously successful for $i
583
+ fi
584
+ done
585
+ }
586
+
587
+ # change into working dir
588
+ workarea="%s"
589
+ echo Working directory: $workarea
590
+ cd $workarea
591
+ ids=( "%s" )
592
+
593
+ process_uuids ids
594
+
595
+ echo SUCCESSFULLY COMPLETED
596
+
597
+ BASHSCRIPT
598
+ return sprintf $script , $path , $uuid_str ;
599
+ }
600
+
516
601
1;
517
602
518
603
__END__
@@ -606,4 +691,10 @@ Takes list of values in this order
606
691
study_name
607
692
aliquot_id from BAM RG header SM tag
608
693
694
+ =item bash_script
695
+
696
+ Takes output path and list of submission UUIDs.
697
+
698
+ Generates a bash script that can be run to complete GNOS upload with resume capabilities.
699
+
609
700
=back
0 commit comments