-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathnanoporeQC
executable file
·100 lines (88 loc) · 3.13 KB
/
nanoporeQC
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#! /bin/bash
# wrapper for nanoporeMatchTable.py
# make sure we can find the python script. Assume it's in the same directory.
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
PATH=$DIR:$PATH
if [[ ! $(which nanoporeMatchTable.py) ]] || [[ ! $(which blat) ]] || [[ ! $(which faCount) ]] ; then
echo
echo "ERROR! Please add the programs faCount, blat, and nanoporeMatchTable.py to your PATH or to this script"
echo
exit
fi
readfasta=$1
adapters=$2
alignments=$3
txfasta=$4
if [ ! -e "$adapters" ]; then
echo
echo "This program aligns adapters to nanopore reads and outputs a table with match positions"
echo
echo "Inputs are a reads file (fasta or fastq), and an adapter file (fasta with two sequences)"
echo
echo "When ALSO given a reads-vs-transcripts alignment file (in sam or psl format), "
echo "it adds the transcript matches to the output table"
echo
echo "IF you add a sam format input, you MUST ALSO add a fasta file with the transcripts"
echo "that were used in the alignments"
echo
echo "The inputs must be in order, like so:"
echo
echo "$0 reads.fa adapters.fa > output.tsv"
echo "$0 reads.fq adapters.fa alignments.psl > output.tsv"
echo "$0 reads.fa adapters.fa alignments.sam transcripts.fa > output.tsv"
echo
exit
fi
graceful_death() {
>&2 echo "ERROR: Cannot finish $0 because $1";
exit 1
}
if [ ! -e "$readfasta" ]; then
graceful_death "cannot find nanopore fasta file $readfasta"
fi
if [ ! -e "$adapters" ]; then
graceful_death "cannot find adapter fasta file $adapters"
fi
if [ ! -z "$alignments" ]; then
if [ ! -e "$alignments" ]; then
graceful_death "cannot find alignment file $alignments"
fi
if [[ "$alignments" == *sam ]]; then
atype='sam'
elif [[ "$alignments" == *psl ]]; then
atype=''
else
graceful_death "don't understand file type of $alignments"
fi
if [ "$atype" == 'sam' ] && [ ! -e "$txfasta" ]; then
graceful_death "cannot find transcript fasta file $txfasta"
fi
fi
# Format input
workdir=/tmp/npQC.$RANDOM
mkdir $workdir
base=$(echo $readfasta | sed 's/.*\///' | sed 's/\.*//')
# convert read file if necessary
if [[ "$readfasta" == *fastq ]] || [[ "$readfastq" == *fq ]]; then
awk 'NR%4==1 || NR%4==2' $readfasta | sed 's/@/>/' > $workdir/input.fa
readfasta="$workdir/input.fa"
fi
# Get read sizes
faCount $readfasta > $workdir/$base.rSizes
# align adapters to SRSF1
blat -noHead -minIdentity=0 -minMatch=0 -minScore=0 $readfasta $adapters $workdir/$base.psl >/dev/null
# create match table
# if we have transcript input
if [ ! -z "$alignments" ]; then
if [ "$atype" == 'sam' ]; then
faCount $txfasta > $workdir/$base.txSizes
# output to STDOUT
nanoporeMatchTable.py --adaptalign $workdir/$base.psl --readsizes $workdir/$base.rSizes --txalign $alignments --txsizes $workdir/$base.txSizes
else
nanoporeMatchTable.py --adaptalign $workdir/$base.psl --readsizes $workdir/$base.rSizes --txalign $alignments
fi
else
nanoporeMatchTable.py --adaptalign $workdir/$base.psl --readsizes $workdir/$base.rSizes
fi
rm $workdir/*
rmdir $workdir