-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathrun.011.write_mol2_fp_updated.csh
executable file
·182 lines (142 loc) · 7.95 KB
/
run.011.write_mol2_fp_updated.csh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/bin/tcsh -fe
#
# This script takes the final clustered csv files and converts things into mol2 files. For each
# scoring method, it will write a mol2 of the top 'max_size' mol2 files from the clusterheads
# csv and from the families csv, to seperate files. All header information is taken from one of
# the csv files (they are equivalent), and it is written over the header information in the
# original mol2 file. In addition, it will create footprint plots for all molecules that appear
# in any of the output mol2 files. Finally, it will also copy a few extra files into the root
# directory for visualization and analysis purposes.
#
# You will want to tune max_size so that you don't have multi-mol2 files which are exceedingly
# large. The current version of Chimera (circa March 2013) has trouble opening more than about
# 13,000 - 14,000 mol2 objects in ViewDock at one time, so plan accordingly.
#
### Set some variables manually
set max_size = "1000"
set max_num = "100000"
set cutoff = "0.2"
set max_res = "50"
### Set some paths
set dockdir = "${DOCKHOMEWORK}/bin"
set amberdir = "${AMBERHOMEWORK}/bin"
set moedir = "${MOEHOMEWORK}/bin"
set rootdir = "${VS_ROOTDIR}"
set masterdir = "${rootdir}/zzz.master"
set paramdir = "${rootdir}/zzz.parameters"
set scriptdir = "${rootdir}/zzz.scripts"
set zincdir = "${rootdir}/zzz.zinclibs"
set system = "${VS_SYSTEM}"
set vendor = "${VS_VENDOR}"
### Make a directory for compiling all of the docked results for a given vendor. If the top
### directory already exists, don't remove other vendor results.
if (! -e ${rootdir}/${system}/011.final-results/) then
mkdir -p ${rootdir}/${system}/011.final-results/
endif
if (! -e ${rootdir}/${system}/011.final-results/${vendor}) then
mkdir -p ${rootdir}/${system}/011.final-results/${vendor}
endif
if (! -e ${rootdir}/${system}/011.final-results/system-files/) then
echo "Creating the system-files directory and copying the corresponding files\n"
mkdir -p ${rootdir}/${system}/011.final-results/system-files/
cd ${rootdir}/${system}/011.final-results/system-files/
cp ${rootdir}/${system}/001.lig-prep/${system}.lig.am1bcc.mol2 ./
cp ${rootdir}/${system}/007.cartesian-min/${vendor}/${system}.lig.python.min.mol2 ./
cp ${rootdir}/${system}/002.rec-prep/${system}.rec.clean.mol2 ./
cp ${rootdir}/${system}/002.rec-prep/${system}.rec.clean.pdb ./
cp ${rootdir}/${system}/002.rec-prep/pro.noH.pdb ./${system}.rec.noH.pdb
cp ${rootdir}/${system}/003.spheres/${system}.rec.clust.close.sph ./
cp ${rootdir}/${system}/004.grid/box.pdb ./${system}.box.pdb
endif
foreach primary_score (dce_sum fps_es fps_sum fps_vdw totalScore fms_score vo_score hms_score descriptor_score)
echo ${primary_score}
#qsub here
rm -rf ${rootdir}/${system}/011.final-results/${vendor}/${primary_score}_rank
mkdir -p ${rootdir}/${system}/011.final-results/${vendor}/${primary_score}_rank
cd ${rootdir}/${system}/011.final-results/${vendor}/${primary_score}_rank
### Write out mol2 files for each different sorting method and for each group - families and
### clusterheads. Each resulting mol2 file will contain $max_size molecules.
if( -e ${rootdir}/${system}/010.moe-postprocess/${vendor}/${primary_score}_rank/${system}.${vendor}.${primary_score}_rank.sorted_${primary_score}_${max_num}_dock.mol2 ) then
echo " Writing the families and clusterhead mol2 files for each scoring metric\n"
mkdir temp/
cd temp/
cp ${rootdir}/${system}/010.moe-postprocess/${vendor}/${primary_score}_rank/${system}.${vendor}.${primary_score}_rank.sorted_${primary_score}_${max_num}_dock.mol2 ./
python ${scriptdir}/break_into_mol.py ${system}.${vendor}.${primary_score}_rank.sorted_${primary_score}_${max_num}_dock.mol2
perl ${scriptdir}/concatenate_mol2_new_headers_descriptor.pl ${rootdir}/${system}/010.moe-postprocess/${vendor}/${primary_score}_rank/${system}.${vendor}.${primary_score}_rank.final_sorted_dce_sum_families.csv
cd ../
else
echo " The file ${rootdir}/${system}/010.moe-postprocess/${vendor}/${primary_score}_rank/${system}.${vendor}.${primary_score}_rank.sorted_${primary_score}_${max_num}_dock.mol2 does not exist"
exit
endif
set head_size = ${max_size}
@ head_size++
foreach score (dce_sum fps_es fps_sum fps_vdw totalScore fms_score vo_score hms_score descriptor_score)
foreach group (clusterheads families)
head -n ${head_size} ${rootdir}/${system}/010.moe-postprocess/${vendor}/${primary_score}_rank/${system}.${vendor}.${primary_score}_rank.final_sorted_${score}_${group}.csv | awk -F "," '{print $1}' | sed '1d' > ${score}_${group}_zinc_codes.txt
set listrank=1 #reset rank to 1 for a new clusterhead+scoring metric
if(${group} == "clusterheads" )then
foreach mol2 (` cat ${score}_${group}_zinc_codes.txt `)
rm -f temp/temp.mol2
cp temp/${mol2}_new.mol2 temp/temp.mol2
echo "########## List_Rank: ${listrank}" | cat - temp/temp.mol2 > temp/temp2.mol2
echo "########## From_Primary_List: ${primary_score}" | cat - temp/temp2.mol2 > temp/temp3.mol2
echo "########## From_Secondary_List: ${score}" | cat - temp/temp2.mol2 > temp/temp3.mol2
cat temp/temp3.mol2 >> ${system}.${vendor}.${primary_score}_rank.final_sorted_${score}_${group}_${max_size}.mol2
@ listrank++ # increase rank
end
else
foreach mol2 (` cat ${score}_${group}_zinc_codes.txt `)
rm -f temp/temp.mol2
cp temp/${mol2}_new.mol2 temp/temp.mol2
echo "########## From_Primary_List: ${primary_score}" | cat - temp/temp.mol2 > temp/temp2.mol2
echo "########## From_Secondary_List: ${score}" | cat - temp/temp.mol2 > temp/temp2.mol2
cat temp/temp2.mol2 >> ${system}.${vendor}.${primary_score}_rank.final_sorted_${score}_${group}_${max_size}.mol2
end
endif
cat ${score}_${group}_zinc_codes.txt >> used_zinc_codes.txt
end
end
rm -rf temp/
cat used_zinc_codes.txt | sort | uniq > zinc_codes.txt
rm -f used_zinc_codes.txt
### Write a final footprint.txt file for each score-group combo
echo "Writing footprint txt files for each scoring+group combo\n"
mkdir temp/
cd temp/
python ${scriptdir}/break_into_fp.py ${rootdir}/${system}/010.moe-postprocess/${vendor}/${primary_score}_rank/${system}.${vendor}.${primary_score}_rank.total_fp.txt
cd ../
foreach score (dce_sum fps_es fps_sum fps_vdw totalScore fms_score vo_score hms_score descriptor_score)
foreach group (clusterheads families)
foreach zincid (`cat ${score}_${group}_zinc_codes.txt `)
cat temp/${zincid}.txt >> ${system}.${vendor}.${primary_score}_rank.footprints_${score}_${group}_${max_size}.txt
end
end
end
rm -rf temp/
### Write footprint plots for all molecules that show up in any of the output mol2 files.
echo "Create the footprint pdfs for each score+group combo"
mkdir temp/
cd temp/
foreach score (dce_sum fps_es fps_sum fps_vdw totalScore fms_score vo_score hms_score descriptor_score)
foreach group (clusterheads families)
python ${scriptdir}/break_into_fp.py ../${system}.${vendor}.${primary_score}_rank.footprints_${score}_${group}_${max_size}.txt
end
end
#removed the ignore flag
#python -W ignore ${scriptdir}/plot_footprint_updated.py ../zinc_codes.txt ${cutoff} ${max_res}
python ${scriptdir}/plot_footprint_updated.py ../zinc_codes.txt ${cutoff} ${max_res}
cd ../
foreach score (dce_sum fps_es fps_sum fps_vdw totalScore fms_score vo_score hms_score descriptor_score)
foreach group (clusterheads families)
set filenames = ""
foreach zincid (`cat ${score}_${group}_zinc_codes.txt `)
set filenames = "${filenames} temp/${zincid}.pdf"
end
gs -q -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -sOutputFile=${system}.${vendor}.${primary_score}_rank.footprint_plots_${score}_${group}_${max_size}.pdf ${filenames}
end
end
mv ./temp/bad_val_id_list.txt .
rm -rf temp/
rm -f *zinc_codes.txt
end
exit