-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathgsnap_pe_noclip_final.sh
executable file
·55 lines (49 loc) · 1.25 KB
/
gsnap_pe_noclip_final.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/bin/bash
# this is optimized to run on 32 procs: spliting input to 16 peices, 2 procs per peice
## MODULES
module use /data004/software/GIF/modules
module load parallel
module load gmap
## PATHS
export GMAPDB=/home/arnstrm/arnstrm/GMAPDB
DB_NAME="GRCm38.78_musmus"
FILE1="$1"
FILE2=$(echo "$1" |sed 's/_R1_/_R2_/g')
OUTFILE=$(basename ${FILE1%%.*})
## COMMAND
# important options to consider
#==============================
# if using RNA-seq, use: --novelsplicing=1
#
# if mate pairs use: --orientation=RF
# and specify the insert size using −−pairlength=2000 (for 2kb insert)
# and −−pairmax=5000 (for max insert size)
#
# for allowing soft-clipping of alignments, exlucde all 3 options below:
# --terminal-threshold=100
# --indel-penalty=1
# --trim-mismatch-score=0
#
# if fastq is gzipped use:
# --gunzip
parallel --jobs 4 \
"gsnap \
--db=${DB_NAME} \
--part={}/4 \
--batch=4 \
--nthreads=8 \
--novelsplicing=1 \
--gunzip \
--terminal-threshold=100 \
--indel-penalty=1 \
--trim-mismatch-score=0 \
--expand-offsets=1 \
--max-mismatches=5.0 \
--input-buffer-size=1000000 \
--output-buffer-size=1000000 \
--format=sam \
--split-output=${DB_NAME}_AP_${OUTFILE}.{} \
--failed-input=${DB_NAME}_AP_${OUTFILE}.not_mapped.{} \
${FILE1} \
${FILE2} " \
::: {0..3}