-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetchNewPdb.py
71 lines (54 loc) · 2.53 KB
/
fetchNewPdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/python3
'''
Search for new PDB structures corresponding to a search term and deposited after a certain date.
Default search term: autophagy
Default max deposition date: 30 days ago
'''
from pypdb import make_query, do_search, describe_pdb, get_pdb_file, get_all_info
from dateutil import parser
from datetime import datetime, timedelta
def searchPDB(searchTerm):
# use these entries for test purposes
#pdbtest = '3vtv' #BUG: this structure uses entityNr instead of nr_entities
pdbtest = '4xkl'
#pdbtest = '5v4k'
searchStructures = make_query(pdbtest, querytype='AdvancedKeywordQuery')
#searchStructures = make_query(searchTerm, querytype='AdvancedKeywordQuery')
foundStructures = do_search(searchStructures)
print(foundStructures)
return(foundStructures)
def extractData(maxDepositionDate, foundStructures):
results = {}
moleculeName = []
entityTitle = ''
for entry in foundStructures:
entityInfo = describe_pdb(entry)
depositionDate = datetime.date(parser.parse(entityInfo['deposition_date']))
if depositionDate >= maxDepositionDate:
entityStructureID = entityInfo['structureId']
entityTitle = entityInfo['title']
entityExtraInfo = get_all_info(entry)
if int(describe_pdb(entry)['nr_entities']) > 1:
for mol in entityExtraInfo['polymer']:
moleculeName.append(mol.get('macroMolecule').get('@name'))
elif int(describe_pdb(entry)['nr_entities']) == 1:
moleculeName = entityExtraInfo.get('polymer').get('macroMolecule').get('@name')
else:
moleculeName = 'No molecule name give'
results[entry] = (moleculeName, entityTitle, str(depositionDate))
return(results)
def printOutput(foundStructures, searchTerm, results, maxDepositionDate):
message = 'I found {} structures from the PDB corresponding to the search term \'{}\'. Of the {} structures, {} were deposited after {}.'.format(len(foundStructures), searchTerm, len(foundStructures), len(results), maxDepositionDate)
return(message)
def main():
searchTerm = 'autophagy'
daysToSubtract = 20000
maxDepositionDate = datetime.date(datetime.today() - timedelta(days = daysToSubtract))
foundStructures = searchPDB(searchTerm)
results = extractData(maxDepositionDate, foundStructures)
#message = printOutput(foundStructures, searchTerm, results, maxDepositionDate)
#print(message)
print(results)
return(results)
if __name__ == '__main__':
main()