forked from CRG-Beato/utils_beatolab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfastq_dump_from_sra.sh
executable file
·91 lines (74 loc) · 2.49 KB
/
fastq_dump_from_sra.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/bin/bash
#==================================================================================================
# Created on: 2016-02-02
# Usage: ./fastq_dump_from_sra.sh
# Author: Javier Quilez (GitHub: jaquol)
# Goal: downloads FASTQ files from SRA (using SRR code) and convert into the desired sample id
#==================================================================================================
#==================================================================================================
# CONFIGURATION VARIABLES AND PATHS
#==================================================================================================
# variables
analysis=2017-10-09_run_hic-16.05_external_samples_evidal
download_date=2017-10-09
process=fastq_dump_from_sra
project='4DGenome'
data_type='hic'
table_name=sample_id_to_srr.txt
# paths
if [[ $project == '4DGenome' ]]; then
PROJECT=/users/project/4DGenome
ODIR=$PROJECT/sequencing/$download_date
else
PROJECT=/users/mbeato/projects/projects/$project
ODIR=/users/mbeato/projects/data/$data_type/raw/$download_date
fi
mkdir -p $ODIR
ANALYSIS=$PROJECT/analysis/$analysis
JOB_CMD=$ANALYSIS/job_cmd
JOB_OUT=$ANALYSIS/job_out
mkdir -p $JOB_CMD
mkdir -p $JOB_OUT
itab=$PROJECT/analysis/$analysis/tables/$table_name
fastq_dump=`which fastq-dump`
# Cluster parameters
queue=long-sl65
memory=2G
max_time=48:00:00
slots=1
#==================================================================================================
# COMMANDS
#==================================================================================================
while read line; do
# get HIC and SRR codes
sample_name=`echo $line | awk '{print $1}'`
srr=`echo $line | awk '{print $2}'`
# Build job: parameters
job_name=${process}_${sample_name}_${srr}
job_file=$JOB_CMD/$job_name.sh
m_out=$JOB_OUT
echo "#!/bin/bash
#$ -N $job_name
#$ -q $queue
#$ -l virtual_free=$memory
#$ -l h_rt=$max_time
#$ -o $m_out/${job_name}_\$JOB_ID.out
#$ -e $m_out/${job_name}_\$JOB_ID.err
#$ -j y
#$ -M [email protected]
#$ -m abe
#$ -pe smp $slots" > $job_file
sed -i 's/^\t//g' $job_file
# download FASTQ and rename
job_cmd="$fastq_dump $srr --split-files -O $ODIR -DQ '+' --gzip"
echo $job_cmd >> $job_file
job_cmd="mv $ODIR/${srr}_1.fastq.gz $ODIR/${sample_name}_read1.fastq.gz"
echo $job_cmd >> $job_file
job_cmd="mv $ODIR/${srr}_2.fastq.gz $ODIR/${sample_name}_read2.fastq.gz"
echo $job_cmd >> $job_file
# Submit job
chmod a+x $job_file
qsub < $job_file
sleep 10
#cat $job_file
done <$itab