-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRUargpore.sh
executable file
·173 lines (127 loc) · 4.2 KB
/
RUargpore.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/bin/bash
## RUARGpore is designed to realize real-time identify ARGs and their host during nanopore selective sequencing
##Author: Yu XIA & Yuhong SUN 2022-07-26
##Email: [email protected]
##version 1.0
set -e
n=1
SCRIPT=`realpath $0`
DIR=`dirname $SCRIPT`
nowt=`date +%Y-%m-%d.%H:%M:%S`;
out=out_RUARGpore_$nowt
threads=20
echo "Enter your sequencing time(h): "
read time
time=$[$time*60]
echo "Enter GridION IP and the $PATH_to_your_$fastq_pass directory :
e.g. [email protected]:/data/usr/20220616_0415_X2_FAT17403_aacfbc92/fastq_pass"
read address
echo "Enter your GridION password:"
read password
echo "Enter No. of threads you intended to use for ARG identification (default 20):"
read threads
if [ ! -d $out ]; then
mkdir $out;
else
echo "Warning: $out already exists. previous results are overwrited"
rm -rf $out
mkdir -p $out
fi
echo "waiting for sequencing ..."
cd $out
while [ $time > 0 ]
do
sleep 2m
echo "copying the $n 30min results from GridION to local server"
ip=`echo $address | cut -f 1 -d :`
dir=`echo $address | cut -f 2 -d :`
if [ $n == 1 ]
then
sshpass -p $password ssh $ip find $dir -name "*.fastq.gz" > $n.all.list
sshpass -p $password scp -r $address ./
mv $n.all.list fastq_pass
else
mkdir -p fastq_pass
cd fastq_pass
sshpass -p $password ssh $ip find $dir -name "*.fastq.gz" > $n.all.list
m=$[$n-1]
# cp previous list
cp ../fastq_pass_$m/$m.all.list .
#compare with previous list to determine which file to download
diff $m.all.list $n.all.list | sed 's/>//' | grep "fastq" > $n.todownload.list
# download required files
cat $n.todownload.lst | while read line
do
sshpass -p $password scp ${ip}:${line} ./
done
# move downloaded fastq.gz to corresponding folder of barcode
cat $n.todownload.lst | rev | cut -d "/" -f 1,2 | rev | sed 's/\//\t/g' > tmp_RUARGpore
cat tmp_RUARGpore | while read line
do
folder=`echo $line | cut -f 1 -d " "`
file=`echo $line | cut -f 2 -d " "`
if [ ! -d $folder ]
then
mkdir $folder
mv $file $folder
else
mv $file $folder
fi
done
rm -f tmp_RUARGpore
cd ..
fi
mv fastq_pass fastq_pass_$n
echo "finish copying"
cd fastq_pass_$n
barcode=`ls . | grep "barcode" | wc -l`
if [ $barcode == 0 ]
then
echo "no barcode was found"
gz=`ls .| grep "fastq.gz" | wc -l`
if [ $gz -gt 1 ]
then
# remove the newest generated fastq and combine into one fasta
i=$(ls -l |grep "^-"|wc -l)
i=$[$i-1]
rm *_$i.fastq.gz
cat *.fastq.gz > $n.fastq.gz
${DIR}/bin/seqkit fq2fa $n.fastq.gz -o $n.fa
$DIR/bin/fastaNameLengh.pl $n.fa > $n.fa.barcode
sed -i "s/^/nobarcode\t/g" $n.fa.barcode
rm -f $n.fastq.gz
else
echo "data ERROR: NO fastq.gz was found"
fi
else
ls ./barcode* -d | sed 's/.\///' | while read line
do
i=$(ls -l $line|grep "^-"|wc -l)
i=$[$i-1]
rm ${line}/*_$i.fastq.gz
cat $line/*.fastq.gz > $line.fastq.gz
${DIR}/bin/seqkit fq2fa $line.fastq.gz -o $line.fa
$DIR/bin/fastaNameLengh.pl $line.fa > $line.fa.barcode.tab
sed -i "s/^/$line\t/g" $line.fa.barcode.tab
done
cat barcode*.fa > $n.fa
cat barcode*.fa.barcode.tab > $n.fa.barcode
rm barcode*.fa
rm barcode*.fa.barcode.tab
rm barcode*.fastq.gz
fi
echo "Start ARG identification for $n.fa"
bash ${DIR}/argpore.sh -f $n.fa -t ${threads}
echo "Finish ARG identification for $n.fa"
cd ../
# combine each 30min results into accumulative results
find . -name "*_taxa.tab" -exec cat '{}' > ${n}_argpore.taxa.tab \;
find . -name "*_arg.w.taxa.tab" -exec cat '{}' > ${n}_argpore.arg.w.taxa.tab \;
find . -name "*_arg.tab" -exec cat '{}' > ${n}_argpore.arg.tab \;
# sendEmail -f [email protected] -t [email protected] -s smtp.126.com -u "测试" -xp 01080124163yx -m "hello" -a ./test.fa -o message-charset=utf-8 #有问题
# mail -s "ARG results for the first $n 30min" [email protected] <<< $n_argpore.arg.w.taxa.tab
time=$[$time-$n*30]
n=$[$n+1]
done
wait
echo "Finish RUARGpore!"