This is a read-only mirror of pymolwiki.org
Difference between revisions of "Grepsel"
Jump to navigation
Jump to search
(fixed selection from "chainless" models) |
m (6 revisions) |
||
(5 intermediate revisions by 3 users not shown) | |||
Line 1: | Line 1: | ||
+ | Create selections matching motifs, using python regular expression syntax. | ||
+ | |||
+ | This is very similar to the [[FindSeq]] script. | ||
+ | |||
+ | == The Code == | ||
+ | |||
<source lang="python"> | <source lang="python"> | ||
#Create named selections using regular expressions for the protein sequence | #Create named selections using regular expressions for the protein sequence | ||
Line 4: | Line 10: | ||
import pymol | import pymol | ||
import re | import re | ||
+ | cmd = pymol.cmd | ||
aa = { 'ASP' : 'D' , 'GLU' : 'E' , 'GLN' : 'Q' , 'ASN' : 'N' , 'SER' : 'S' , | aa = { 'ASP' : 'D' , 'GLU' : 'E' , 'GLN' : 'Q' , 'ASN' : 'N' , 'SER' : 'S' , | ||
Line 27: | Line 34: | ||
− | def grepsel( | + | def grepsel(selection="(all)",stretch="",prefix="",combined="0",single="1"): |
''' | ''' | ||
DESCRIPTION | DESCRIPTION | ||
− | + | Create selections matching motifs, using python regular expression syntax. | |
− | as " | + | Motif is automatically converted to uppercase. Motif selections are labelled |
− | + | as "prefix_motif_###", where ### is the index for the first residue of the | |
− | + | match. Prefix defaults to selection name. combined = 1 creates one selection | |
− | + | for all occurences. single = 1 creates one selection for each occurance | |
+ | (the default). | ||
USAGE | USAGE | ||
− | grepsel selection, | + | grepsel selection, motif, [prefix, [combined, [single ]]] |
EXAMPLES | EXAMPLES | ||
− | + | Create selections for all motifs matching "ESS" (selection_ESS_###,...): | |
− | grepsel | + | grepsel selection, ess |
− | + | Create selections for the PxGY motif with prefix m (m_P.CY_###,...): | |
− | grepsel | + | grepsel selection, p.gy, m |
− | |||
''' | ''' | ||
− | + | ||
− | + | if selection == "(all)": | |
− | if | + | selection = "all" |
− | |||
if prefix == "": | if prefix == "": | ||
− | prefix= | + | prefix=selection |
stretch = stretch.upper() | stretch = stretch.upper() | ||
− | seq = seqoneint( | + | seq = seqoneint(selection) |
pymol.stored.resi = [] | pymol.stored.resi = [] | ||
pymol.stored.chain = [] | pymol.stored.chain = [] | ||
− | cmd.iterate("%s and name ca"% | + | cmd.iterate("%s and name ca"%selection,"stored.resi.append(resi);stored.chain.append(chain)") |
motif = re.compile(stretch) | motif = re.compile(stretch) | ||
occurrences = motif.finditer(seq) | occurrences = motif.finditer(seq) | ||
+ | stretchmod = stretch.replace("+","\+") | ||
+ | stretchmod = stretchmod.replace("?","\?") | ||
+ | |||
+ | print stretchmod | ||
if combined == "1": | if combined == "1": | ||
− | cmd. | + | cmd.select("%s_%s"%(prefix,stretch), "none") |
+ | |||
+ | |||
+ | for find in occurrences: | ||
+ | |||
+ | mb = pymol.stored.resi[find.start()] | ||
+ | me = pymol.stored.resi[find.end()-1] | ||
+ | |||
+ | ch = pymol.stored.chain[find.start()] | ||
+ | cmd.select("%s_%s_%s%s"%(prefix,stretch,me,ch), "chain %s and (i; %s-%s)"%(ch,int(mb),int(me))) | ||
+ | if combined == "1": | ||
+ | cmd.select("%s_%s"%(prefix,stretch),"\"%s_%s\" | (%s and chain %s and (i; %s-%s))"%(prefix,stretchmod,selection,ch,int(mb),int(me))) | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
cmd.select("none") | cmd.select("none") | ||
cmd.delete("sel*") | cmd.delete("sel*") | ||
− | + | ||
cmd.extend("grepsel",grepsel) | cmd.extend("grepsel",grepsel) | ||
</source> | </source> | ||
+ | == See Also == | ||
+ | |||
+ | * [[FindSeq]] | ||
+ | * [[Selection Algebra|pepseq]] selection operator | ||
+ | * [http://pldserver1.biochem.queensu.ca/~rlc/work/pymol/ seq_select] by Robert Campbell | ||
[[Category:Script_Library|Grep selections]] | [[Category:Script_Library|Grep selections]] | ||
+ | [[Category:ObjSel_Scripts]] |
Latest revision as of 03:06, 28 March 2014
Create selections matching motifs, using python regular expression syntax.
This is very similar to the FindSeq script.
The Code
#Create named selections using regular expressions for the protein sequence
import pymol
import re
cmd = pymol.cmd
aa = { 'ASP' : 'D' , 'GLU' : 'E' , 'GLN' : 'Q' , 'ASN' : 'N' , 'SER' : 'S' ,
'THR' : 'T' , 'CYS' : 'C' , 'HIS' : 'H' , 'ARG' : 'R' , 'LYS' : 'K' ,
'MET' : 'M' , 'ALA' : 'A' , 'ILE' : 'I' , 'LEU' : 'L' , 'VAL' : 'V' ,
'GLY' : 'G' , 'PRO' : 'P' , 'TRP' : 'W' , 'PHE' : 'F' , 'TYR' : 'Y' ,
'SCY' : 'U' , 'ASX' : 'B' , 'GLX' : 'Z' , 'XXX' : 'X'}
#made this before the sequence view option, probably another way to do it now
def seqoneint(model):
pymol.stored.seq = []
cmd.iterate("%s and name ca"%model,"stored.seq.append(resn)")
seq = ""
for x in pymol.stored.seq:
if aa.has_key(x):
res = aa[x]
seq = seq+res
else:
seq = seq + '-'
return seq
def grepsel(selection="(all)",stretch="",prefix="",combined="0",single="1"):
'''
DESCRIPTION
Create selections matching motifs, using python regular expression syntax.
Motif is automatically converted to uppercase. Motif selections are labelled
as "prefix_motif_###", where ### is the index for the first residue of the
match. Prefix defaults to selection name. combined = 1 creates one selection
for all occurences. single = 1 creates one selection for each occurance
(the default).
USAGE
grepsel selection, motif, [prefix, [combined, [single ]]]
EXAMPLES
Create selections for all motifs matching "ESS" (selection_ESS_###,...):
grepsel selection, ess
Create selections for the PxGY motif with prefix m (m_P.CY_###,...):
grepsel selection, p.gy, m
'''
if selection == "(all)":
selection = "all"
if prefix == "":
prefix=selection
stretch = stretch.upper()
seq = seqoneint(selection)
pymol.stored.resi = []
pymol.stored.chain = []
cmd.iterate("%s and name ca"%selection,"stored.resi.append(resi);stored.chain.append(chain)")
motif = re.compile(stretch)
occurrences = motif.finditer(seq)
stretchmod = stretch.replace("+","\+")
stretchmod = stretchmod.replace("?","\?")
print stretchmod
if combined == "1":
cmd.select("%s_%s"%(prefix,stretch), "none")
for find in occurrences:
mb = pymol.stored.resi[find.start()]
me = pymol.stored.resi[find.end()-1]
ch = pymol.stored.chain[find.start()]
cmd.select("%s_%s_%s%s"%(prefix,stretch,me,ch), "chain %s and (i; %s-%s)"%(ch,int(mb),int(me)))
if combined == "1":
cmd.select("%s_%s"%(prefix,stretch),"\"%s_%s\" | (%s and chain %s and (i; %s-%s))"%(prefix,stretchmod,selection,ch,int(mb),int(me)))
cmd.select("none")
cmd.delete("sel*")
cmd.extend("grepsel",grepsel)
See Also
- FindSeq
- pepseq selection operator
- seq_select by Robert Campbell