Skip to content

Commit c849d6e

Browse files
author
Evgenii Osipov
committed
Fixed search when haystack spans multiple objects and the same column is selected in all objects
1 parent fd57af9 commit c849d6e

File tree

1 file changed

+47
-54
lines changed

1 file changed

+47
-54
lines changed

findseq.py

Lines changed: 47 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -70,24 +70,6 @@
7070

7171

7272
def findseq(needle, haystack, selName=None, het=0, firstOnly=0):
73-
"""USAGE:
74-
findseq needle, haystack[, selName[, het[, firstOnly]]]
75-
"""
76-
# set the name of the selection to return.
77-
if selName == None:
78-
rSelName = "foundSeq" + str(random.randint(0, 32000))
79-
selName = rSelName
80-
elif selName == "sele":
81-
rSelName = "sele"
82-
else:
83-
rSelName = selName
84-
85-
# input checking
86-
if not checkParams(needle, haystack, selName, het, firstOnly):
87-
print("There was an error with a parameter. Please see")
88-
print("the above error message for how to fix it.")
89-
return None
90-
9173
one_letter = {
9274
'00C': 'C', '01W': 'X', '0A0': 'D', '0A1': 'Y', '0A2': 'K',
9375
'0A8': 'C', '0AA': 'V', '0AB': 'V', '0AC': 'G', '0AD': 'G',
@@ -338,51 +320,62 @@ def findseq(needle, haystack, selName=None, het=0, firstOnly=0):
338320
'YG ': 'G', 'YOF': 'Y', 'YRR': 'N', 'YYG': 'G', 'Z ': 'C',
339321
'ZAD': 'A', 'ZAL': 'A', 'ZBC': 'C', 'ZCY': 'C', 'ZDU': 'U',
340322
'ZFB': 'X', 'ZGU': 'G', 'ZHP': 'N', 'ZTH': 'T', 'ZZJ': 'A'}
341-
342-
# remove hetero atoms (waters/ligands/etc) from consideration?
343-
if het:
344-
cmd.select("__h", "br. " + haystack)
323+
# set the name of the selection to return.
324+
if selName == None:
325+
rSelName = "foundSeq" + str(random.randint(0, 32000))
326+
selName = rSelName
327+
elif selName == "sele":
328+
rSelName = "sele"
345329
else:
346-
cmd.select("__h", "br. " + haystack + " and not het")
330+
rSelName = selName
331+
# make an empty selection to which we add residues
332+
cmd.select(rSelName, 'None')
333+
for obj in cmd.get_object_list(haystack):
334+
# input checking
335+
if not checkParams(needle, haystack, selName, het, firstOnly):
336+
print("There was an error with a parameter. Please see")
337+
print("the above error message for how to fix it.")
338+
return None
339+
347340

348-
# get the AAs in the haystack
349-
aaDict = {'aaList': []}
350-
cmd.iterate("(name ca) and __h", "aaList.append((resi,resn,chain))", space=aaDict)
341+
# remove hetero atoms (waters/ligands/etc) from consideration?
342+
if het:
343+
cmd.select("__h", f"br. {obj} and {haystack}")
344+
else:
345+
cmd.select("__h", f"br. {obj} and {haystack} and not het")
351346

352-
IDs = [x[0] for x in aaDict['aaList']]
353-
AAs = ''.join([one_letter[x[1]] for x in aaDict['aaList']])
354-
chains = [x[2] for x in aaDict['aaList']]
347+
# get the AAs in the haystack
348+
aaDict = {'aaList': []}
349+
cmd.iterate("(name ca) and __h", "aaList.append((resi,resn,chain))", space=aaDict)
355350

356-
reNeedle = re.compile(needle.upper())
357-
it = reNeedle.finditer(AAs)
351+
IDs = [x[0] for x in aaDict['aaList']]
352+
AAs = ''.join([one_letter[x[1]] for x in aaDict['aaList']])
353+
chains = [x[2] for x in aaDict['aaList']]
358354

359-
# make an empty selection to which we add residues
360-
cmd.select(rSelName, 'None')
355+
reNeedle = re.compile(needle.upper())
356+
it = reNeedle.finditer(AAs)
361357

362-
for i in it:
363-
(start, stop) = i.span()
364-
# we found some residues, which chains are they from?
365-
i_chains = chains[start:stop]
366-
# are all residues from one chain?
367-
if len(set(i_chains)) != 1:
368-
# now they are not, this match is not really a match, skip it
369-
continue
370-
chain = i_chains[0]
371-
# Only apply chains to selection algebra if there are defined chains.
372-
if chain:
373-
cmd.select(rSelName, rSelName + " or (__h and i. " + '+'.join(IDs[ii] for ii in range(start, stop)) + " and c. " + chain + " )")
374-
else:
375-
cmd.select(rSelName, rSelName + " or (__h and i. " + '+'.join(IDs[ii] for ii in range(start, stop)) + ")")
376-
if int(firstOnly):
377-
break
358+
359+
for i in it:
360+
(start, stop) = i.span()
361+
# we found some residues, which chains are they from?
362+
i_chains = chains[start:stop]
363+
# are all residues from one chain?
364+
if len(set(i_chains)) != 1:
365+
# now they are not, this match is not really a match, skip it
366+
continue
367+
chain = i_chains[0]
368+
# Only apply chains to selection algebra if there are defined chains.
369+
if chain:
370+
cmd.select(rSelName, rSelName + " or (__h and i. " + '+'.join(IDs[ii] for ii in range(start, stop)) + " and c. " + chain + " )")
371+
else:
372+
cmd.select(rSelName, rSelName + " or (__h and i. " + '+'.join(IDs[ii] for ii in range(start, stop)) + ")")
373+
if int(firstOnly):
374+
break
378375
cmd.delete("__h")
379376
return rSelName
380-
381377
cmd.extend("findseq", findseq)
382-
cmd.findseq = findseq
383-
cmd.auto_arg[1]['findseq'] = [ cmd.object_sc, 'object', '']
384-
cmd.auto_arg[2]['findseq'] = [lambda: cmd.Shortcut(['het=1','firstOnly=1']), 'params', '']
385-
cmd.auto_arg[3]['findseq'] = [lambda: cmd.Shortcut(['het=1','firstOnly=1']), 'params', '']
378+
386379

387380
def checkParams(needle, haystack, selName, het, firstOnly):
388381
"""

0 commit comments

Comments
 (0)