forked from Atemia/PythonMiniProject-Atemia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunPdbSoftware.py
executable file
·520 lines (446 loc) · 22 KB
/
runPdbSoftware.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
#!/usr/bin/env python3
# FUNCTIONS/SOFTWARE TOOLS
## MAIN DISPALY MENU
def menu():
"""
Function: Displays the menu with different options for the user
Arguments: None
Returns: Menu
"""
blue = lambda text: '\033[0;34m' + text + '\033[0m' # Colors the printed output blue
red = lambda text: '\033[0;31m' + text + '\033[0m'
software = "PDB FILE ANALYZER"
choices = "Select an option from below:"
c1 = "1) Open a PDB file (O)"
c2 = "2) Information (I)"
c3 = "3) Show histrogram of amino acids (H)"
c4 = "4) Display Secondary Structure (S)"
c5 = "5) Export PDB File (X)"
c6 = "6) Exit (Q)"
global file
status = "Current PDB: "+ file
len(software)
stars="*"
space=" "
print(blue(stars*80))
#The length of the inserted string and its 0 index is subtracted for the desired 80 characters per line to be met.
#Subtract index 0s in each object added, this concept is applied to each line.
print(blue(stars * 1),blue("%0s"%""),blue("%s"%software),space*(75-(len(software)))+blue(stars))
print(blue(stars*80))
print(blue(stars * 1),"%0s"%"",blue("%s" %choices),space*(75-(len(choices)))+blue(stars) )
print(blue(stars*1), blue(space*76), blue(stars*1))
print(blue(stars * 1),"%5s"%"",blue("%s" %c1),space*(70-(len(c1)))+blue(stars))
print(blue(stars * 1),"%5s"%"",blue("%s"%c2),space*(70-(len(c2)))+blue(stars))
print(blue(stars * 1),"%5s"%"",blue("%s" %c3),space*(70-(len(c3)))+blue(stars))
print(blue(stars * 1),"%5s"%"",blue("%s" %c4),space*(70-(len(c4)))+blue(stars))
print(blue(stars * 1),"%5s"%"",blue("%s" %c5),space*(70-(len(c5)))+blue(stars))
print(blue(stars * 1),"%5s"%"",blue("%s" %c6),space*(70-(len(c6)))+blue(stars))
print(blue(stars * 1),"%0s"%"",space*(74-(len(status))),blue("%s" %status),blue(stars*1))
print(blue(stars*80))
global choice
choice = input(":")
# PDB FILE FORMART TESTING
def fungua(filename):
"""
Function: Opens and tests a valid file path by loading it to the memory
Test if the file is a pdb file
Argument: Name or valid path of the file
"""
red = lambda text: '\033[0;31m' + text + '\033[0m'
from pathlib import Path
filename = Path(filename)
try:
with open(filename, 'r') as f:
global file
file = filename.name
checkList= ['HEADER','OBSLTE','TITLE','SPLT','CAVEAT','COMPND','SOURCE','KEYWDS','EXPDTA','NUMMDL','MDLTYP',\
'AUTHOR','REVDAT','SPRSDE','JRNL','REMARKS','DBREF','DBREF1','DBREF2','SEQADV','SEQRES','MODRES',\
'HET','FORMUL','HETNAM','HETSYN','HELIX','SHEET','SSBOND','LINK','CISPEP','SITE','CRYST1','MTRIXn',\
'ORIGXn','SCALEn','MODEL','ATOM','ANISOU','TER','HETATM','ENDMDL','CONECT','MASTER','END']
checkList = ','.join(checkList)
for line in f:
if len(line) == 81:
pass
starts = str(line[:6])
status = True
for i in starts:
if i in checkList:
pass
else:
status = False
return status
except:
print(red("Invalid file loaded."))
## CHOICES OPTIONS
def choiceO():
"""Function: loads the file to the software"""
red = lambda text: '\033[0;31m' + text + '\033[0m'
global load_file # Retains the loaded file the memory of the software allowing other functions to open the file using this variable
load_file = str(input('Enter a Valid PATH for a PDB File:'))
if fungua(load_file) == True:
try:
print(red("The File %s has been sucessfully loaded" %load_file))
except:
print(red("Ivalid file loaded"))
global file #Displays the name of the file on the menu as long as it is loaded
file = "None"
else:
print(red("The file loaded does not follow the pdb format.\nPlease Enter a valid pdb file"))
menu()
choiceO()
menu()
# CHOICES INFORMATION
def choiceI():
"""Function prints a summary of the general description of the pdb file"""
titlePdb() # prints the file name and the title of th pdb file
printChains(load_file)
chainInfo(load_file)
menu()
##
def titlePdb():
"""
Function: Extracts the title from the pdb file.
"""
from pathlib import Path
red = lambda text: '\033[0;31m' + text + '\033[0m'
if fungua(load_file):
myLis = []
myFile = Path(load_file)
with open(load_file, 'r') as f:
global file
file = myFile.name
print("PDB File: %s " %red(file) )
Title = ""
for line in f:
if line.startswith('TITLE'):
Title = line.strip('TITLE')
Title = Title.strip()
myLis.append(Title)
myString = str(("").join(myLis)) # joining the list and converting it into a string
myString = "Title: " + myString.strip()
if len(myString) <= 80:
print(myString[:80])
else:
print(myString[:80]+"\n"+myString[80:])
##
def printChains(load_file):
"""
Input: pdb file
Function: Prints all the chains in the pdb file
"""
with open(load_file, 'r') as f:
chain = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
lyst = ""
for line in f:
if line.startswith('SEQRES'):
s = line.split()[2:] # creaeting a list of our three letter code amino acids and displays starting from the chain type
for i in s:
for letter in chain:
if i == letter:
lyst+= letter
lyst = sorted(lyst) #sort the list
lyst = list(dict.fromkeys(lyst)) # remove duplicates leaving us with the number of chains in the pdb file
lyst = "".join(lyst)
global chainString
chainString = str(lyst[:-1])+str(lyst[-1])
allChains = str(lyst[:-1])+' '+'and'+' '+str(lyst[-1])
print('Chains: '+ allChains)
##
def chainInfo(load_file):
import textwrap # introduces prints stirng of specified length
aa_dic = {'A':'ALA', 'R':'ARG', 'N':'ASN', 'D':'ASP', 'G':'GLY', 'Q':'GLN', 'E':'GLU', 'H':'HIS','C':'CYS',\
'I':'ILE', 'L':'LEU', 'K':'LYS', 'M':'MET', 'F':'PHE', 'P':'PRO', 'S':'SER', 'T':'THR', 'W':'TRP', 'Y':'TYR', 'V':'VAL'} # dictionary for amino acids
new_dict = dict([(value, key) for (key, value) in aa_dic.items()]) # swapped the values and keys
def no_aa(seq):
"""Input: amino acid sequence
Fuinction: number of amino acids in a chain
"""
return (len(seq)-(len(seq)//50))
def getHelixNos(chain):
"""Input: chain name e.g "A", "B" ...
Function: counts the number of helices in a chain
"""
with open(load_file, 'r') as f:
h = ""
for line in f:
if line.startswith('HELIX'):
l = line.split()[4:]
if l[0] == chain:
h += l[0]
return (len(h))
def getSheetNos(chain):
"""Input: chain name e.g "A", "B" ...
Function: counts the number of sheets in a chain
"""
with open(load_file, 'r') as f:
sh = ""
for line in f:
if line.startswith('SHEET'):
l = line.split()[5:]
if l[0] == chain:
sh += l[0]
return (len(sh))
with open(load_file, 'r') as f:
chains = []
for line in f:
if line.startswith('SEQRES'):
l = line.split()[2:] # creaeting a list of our three letter code amino acids and displays starting from the chain type
chains.append(l[0])
chains = (list(dict.fromkeys(chains)))
for i in chains:
seq = ""
with open(load_file, 'r') as f:
for line in f:
l = line.split()
if line.startswith('SEQRES')and i == l[2]:
l = l[4:]
c = [new_dict[codon]for codon in l]
seq += "".join(c)
space = " "
s = ("\n"+space*15).join(textwrap.wrap(seq,50))
print(" - Chain %s" % i)
print("%4s Number of amino acids: "%"",no_aa(seq))
print("%4s Number of helix: %9d"%("",getHelixNos(i)))
print("%4s Number of sheet: %9d"%("",getSheetNos(i)))
print("%4s Sequence: %s" % ("",s))
# Choice H - Amino acids histograms
def choiceH():
red = lambda text: '\033[0;31m' + text + '\033[0m'
options()
if option.lower() in ('an', 'dn', 'aa', 'da'):
selectionOutput(option,load_file)
else:
print(red("Please enter a valid option"))
choiceH()
##
def options():
blue = lambda text: '\033[0;34m' + text + '\033[0m'
print(blue("Choose an option to order by:"))
print(blue(" number of amino acids - ascending (an)"))
print(blue(" number of amino acids - descending (dn)"))
print(blue(" alphabetically - ascending (aa)"))
print(blue(" alphabetically - descending (da)"))
global option
option = input("Order by:")
##
def selectionOutput(option,load_file):
"""
Input: options on the display i.e. an', 'dn', 'aa' and 'da'
Function: prints a summary of the amino acids in a pdb files according to the number of times an amino acid is in the sequence
"""
with open(load_file, 'r') as f:
seq = []
for line in f:
if line.startswith('SEQRES'):
l = line.split()[4:] # creaeting a list of our three letter code amino acids and displays starting from the chain type
seq += l
sL = []
dic = dict()
for i in seq:
sL.append(i)
for aa in sL:
dic[aa] = dic.get(aa,0) + 1
if option.lower() == 'aa':
# Alphabetically sorted amino acid histograms aa (ascending)
sort_Aa_dic = (dict(sorted(dic.items(), key = lambda t : t[0])))
sortedDic = dict(sort_Aa_dic)
for k,v in sortedDic.items():
print(k, "( %2d)" %v,": "+"*"*v)
elif option.lower() =="da":
# Alphabetically sorted amino acid histograms da (descending)
sort_Aa_dic = (dict(sorted(dic.items(), key = lambda t : t[0], reverse = True)))
sortedDic = dict(sort_Aa_dic)
for k,v in sortedDic.items():
print(k, "( %2d)" %v,": "+"*"*v)
elif option.lower() =="dn":
# choice dn decending ( by number of amino acids)
sort_no_aa_dic = (sorted(dic.items(), key = lambda t : t[1]))
s = dict(sort_no_aa_dic)
for k,v in s.items():
print(k, "( %2d)" %v,": "+"*"*v)
elif option.lower() =="an":
# choice an acending ( by number of amino acids)
sort_no_aa_dic = (sorted(dic.items(), key = lambda t : t[1], reverse = True))
s = dict(sort_no_aa_dic)
for k,v in s.items():
print(k, "( %2d)" %v,": "+"*"*v)
else:
print("Invalid selection made! Try again")
menu()
# CHOICE S
def choiceS(load_file):
"""Input: pdb file
Functions: displays the secondary structure of a pdb file
"""
print("Secondary Structure of the PDB id %s"%load_file)
def seq_helix_sheet3D(load_file, seq, chain):
"""Input: pdb file, sequence of a particular chain from the pdb, the chain name of the sequence eg "A","B","P" etc
Function: creats for you a sequence with its sheets, lables and helixes and prints them in tandem
"""
# HELIX symbols(/) and its lables
seq3D = []
lable3D= [] # empty list for the lable line
for i in range(0, len(seq)):
seq3D.append("-")
lable3D.append(" ") # append an empty space for our lables
with open(load_file, 'r') as f:
helixIndexes = [] # new empty list for appending all indexes for the helix chain that will enable us to replace dashes with helix symbols
lableIndexes = [] # new empty list for appending index where each helix is located
lables = [] # empty list for the chain numbers
for line in f:
if line.startswith('HELIX'):
newl = line.split()[:] # split our lines into a list with individual items that we can easily access
if newl[4] == chain:
frm = int(newl[5]) # Extract the start sequence index from the pdb
to = int(newl[8]) # Extract the end index for the helix from the pdb file
lables.append(newl[2]) # appends the lable for all the helixes on alist
lableIndexes.append(frm) # append the indexes that will mark the point the helix is starting
for i in range(frm,to+1): # using the range extracted above prints in to the list all the indexes of the aa in that helix to a list
helixIndexes.append(i)
for i in range(0,len(lableIndexes)): # convert the values on the LableIndex list into integers
lableIndexes[i] = int(lableIndexes[i])
for (index, lable) in zip(lableIndexes, lables): # Replacing the lable3D list at the specific indexes with the lables
if len(lable) > 1: # if the lable has more lab one character we want to make the two characters to be read as one
lable3D[index-1:index+len(lable)-1] =lable
else:
lable3D[index-1] = lable
labler = ("".join(lable3D))
replace = '' # creating a string containg the symbols for helixes for each index for the index list created before(helixIndex)
for i in range(0,len(helixIndexes)+1):
replace+="/"
for i in range(0,len(helixIndexes)): # convertring items in the list in to integers
helixIndexes[i] = int(helixIndexes[i])
for (index, r) in zip(helixIndexes, replace): # replacing the list of dashes(seq3D) which represents the sequence with helix symbols, where they are occuring
seq3D[index-1] = r
sequence3d= "".join(seq3D)
# SHEEET | representation
with open(load_file, 'r') as f: # we use the three lists from the that heve undergone helix procesing i.e we append the sheet symbols and lables to them
sheetIndex = [] # we start and empty list for the sheet indexes
lablesSheet = [] # for appending lables for the sheet
lableIndexesSheets = [] # for appending indexes for labling
for line in f:
if line.startswith('SHEET'):
l = line.split()[1:] # split our lines into a list with individual items that we can easily access
if l[4] == chain:
start = int(l[5]) # Extract the start sequence index from the pdb
end = int(l[8]) # Extract the end index for the sheet from the pdb file
lableIndexesSheets.append(start) # append the indexes that will mark the point the sheet is starting
lablesSheet.append((str(l[0])+(str(l[1])))) # appends the lable for all the helixes on alist
for i in range(start,end+1): # using the range extracted above prints in to the list all the indexes of the aa in that sheet to a list
sheetIndex.append(i)
else:
pass
for i in range(0,len(lableIndexesSheets)): # convert the values on the LableIndex list into integers
lableIndexesSheets[i] = int(lableIndexesSheets[i])
for (index, labl) in zip(lableIndexesSheets, lablesSheet): # Replacing the lable3D list at the specific indexes with the lables
if len(labl) > 1: # if the lable has more lab one character we want to make the two characters to be read as one
lable3D[index-1:index+len(labl)-1] =labl
else:
lable3D[index-1] = labl
lablerSheet = ("".join(lable3D))
sheetReplacer = [] # creating a string containg the symbols for sheet for each index for the index list created before(sheetIndex)
for i in range(0, len(sheetIndex)+1):
sheetReplacer.append("|")
for i in range(0,len(sheetIndex)): # convertring items in the list in to integers
sheetIndex[i] = int(sheetIndex[i])
for (index, r) in zip(sheetIndex, sheetReplacer): # replacing the list of dashes(seq3D) which represents the sequence with helix symbols, where they are occuring
seq3D[index-1] = r
sequence3dSheet = "".join(seq3D)
def print80char(seq, seqSymbols, seqLables):
"""Input: Amino acid string sequence, Amino acid string with symbols, Amino acid string of sequence lables
Function: prints 80 characters of the three seqences in tandem
"""
print("Chain %s:\n(1)"%chain)
for c in range(0,len(seq),80):
print(seq[c:c+80]+"\n"+ seqSymbols[c:c+80]+"\n"+seqLables[c:c+80]+"\n")
print80char(seq,sequence3dSheet,lablerSheet)
print("(%d)"%len(seq),"\n") # print length of the sequence
aa_dic = {'A':'ALA', 'R':'ARG', 'N':'ASN', 'D':'ASP', 'G':'GLY', 'Q':'GLN', 'E':'GLU', 'H':'HIS','C':'CYS',\
'I':'ILE', 'L':'LEU', 'K':'LYS', 'M':'MET', 'F':'PHE', 'P':'PRO', 'S':'SER', 'T':'THR', 'W':'TRP', 'Y':'TYR', 'V':'VAL'} # dictionary for amino acids
new_dict = dict([(value, key) for (key, value) in aa_dic.items()]) # swapped the values and keys
with open(load_file, 'r') as f:
chains = []
for line in f:
if line.startswith('SEQRES'):
l = line.split()[2:]
chains.append(l[0]) # Appends all the chains found on the chain identifier column to an empty list
chains = (list(dict.fromkeys(chains))) # removes repeated chain names to remain with only individual chain names
for i in chains:
seq = ""
with open(load_file, 'r') as f:
for line in f:
l = line.split()
if line.startswith('SEQRES')and i == l[2]:
l = l[4:]
c = [new_dict[codon]for codon in l]
seq += "".join(c)
seq_helix_sheet3D(load_file,seq,i)
menu()
# CHOICE X - Export pdb
def choiceX():
"""Function: Exports your file to a pdb file"""
exported = input("Enter the file path and name you want to export including the '.pdb' extension: ")
with open(load_file) as f:
with open(exported, "w+") as fx:
for line in f:
fx.write(line)
menu()
# Running the software
def mainMenu():
"""Function: Runs the software by calling various defined functions for the different options and also the display of the software"""
import sys
#import pdbSoftwareTools
red = lambda text: '\033[0;31m' + text + '\033[0m'
global file
file= "None"
menu() # The display function
def choiceMenu():
"""Function: allows the diffrent options from the defined function to be accesed at any given time while running the software"""
if choice.lower()== ("i"): #
choiceI()
choiceMenu()
if choice.lower() == ("h"):
choiceH()
choiceMenu()
if choice.lower() == ("s"):
choiceS(load_file)
choiceMenu()
if choice.lower()== ("x"): #
choiceX()
choiceMenu()
else:
if choice.lower() == "o":
print("Current loaded pdb file is %s"%red(load_file),"\nDo you want to load another file (yes/no)")
key = input()
if key.lower() == 'yes':
mainMenu()
elif key.lower() == "no":
pass
menu()
choiceMenu()
if choice.lower() == "q":
print(red("Do you want to exit(E) or do you want go back to the menu (M)"))
select = input()
if select.lower() == "e":
sys.exit(red("Good bye! Thank you for using this sofware."))
elif select.lower() == "m":
mainMenu()
if choice.lower() not in ('o', 'i', 'h', 's', 'x', 'q'):
print(red("Invalid option selected"))
menu()
mainMenu()
if choice.lower() in ('o', 'i', 'h', 's', 'x', 'q'):
if choice.lower() == "o":
choiceO()
choiceMenu()
if choice.lower() == "q":
print(red("Do you want to exit(E) or do you want go back to the menu (M)"))
select = input()
if select.lower() == "e":
sys.exit(red("Good bye! Thank you for using this sofware."))
elif select.lower() == "m":
mainMenu()
else:
print(red("Invalid choice!\nEnter a valid choice ie 'o' to load a pdb file or 'q' to Exit the program "))
mainMenu()
mainMenu()