This is a read-only mirror of pymolwiki.org

Difference between revisions of "Grepsel"

From PyMOL Wiki
Jump to navigation Jump to search
(fixed selection from "chainless" models)
m (6 revisions)
 
(5 intermediate revisions by 3 users not shown)
Line 1: Line 1:
 +
Create selections matching motifs, using python regular expression syntax.
 +
 +
This is very similar to the [[FindSeq]] script.
 +
 +
== The Code ==
 +
 
<source lang="python">
 
<source lang="python">
 
#Create named selections using regular expressions for the protein sequence
 
#Create named selections using regular expressions for the protein sequence
Line 4: Line 10:
 
import pymol
 
import pymol
 
import re
 
import re
 +
cmd = pymol.cmd
  
 
aa = { 'ASP' : 'D' , 'GLU' : 'E' , 'GLN' : 'Q' , 'ASN' : 'N' , 'SER' : 'S' ,
 
aa = { 'ASP' : 'D' , 'GLU' : 'E' , 'GLN' : 'Q' , 'ASN' : 'N' , 'SER' : 'S' ,
Line 27: Line 34:
  
  
def grepsel(model="(all)",stretch="",prefix="",combined="0",single="1"):
+
def grepsel(selection="(all)",stretch="",prefix="",combined="0",single="1"):
 
   '''
 
   '''
 
DESCRIPTION
 
DESCRIPTION
  
     Make selections matching regular expressions. Selections are labelled
+
     Create selections matching motifs, using python regular expression syntax.
     as "prefix_expression_###", where ### is the index for the first residue
+
    Motif is automatically converted to uppercase. Motif selections are labelled
     of the match. Prefix defaults to model name. combined = 1 creates one
+
     as "prefix_motif_###", where ### is the index for the first residue of the
     selection for all occurences. single = 1 creates one selection for each
+
     match. Prefix defaults to selection name. combined = 1 creates one selection
     occurance (the default).
+
     for all occurences. single = 1 creates one selection for each occurance
 +
     (the default).
 
      
 
      
 
USAGE
 
USAGE
  
     grepsel selection, expression, [prefix, [combined, [single ]]]
+
     grepsel selection, motif, [prefix, [combined, [single ]]]
  
 
EXAMPLES
 
EXAMPLES
  
     Make selections for all motifs matching "ESS" (model_ESS_###,...):
+
     Create selections for all motifs matching "ESS" (selection_ESS_###,...):
     grepsel model, ess
+
     grepsel selection, ess
  
     Make selections for the PxGY motif with prefix m (m_P.CY_###,...):
+
     Create selections for the PxGY motif with prefix m (m_P.CY_###,...):
     grepsel model, p.gy, m
+
     grepsel selection, p.gy, m
       
 
 
     '''
 
     '''
 
+
 
+
   if selection == "(all)":
   if model == "(all)":
+
       selection = "all"
       model = "all"
 
 
   if prefix == "":
 
   if prefix == "":
       prefix=model
+
       prefix=selection
  
 
   stretch = stretch.upper()  
 
   stretch = stretch.upper()  
   seq = seqoneint(model)
+
   seq = seqoneint(selection)
 
   pymol.stored.resi = []
 
   pymol.stored.resi = []
 
   pymol.stored.chain = []
 
   pymol.stored.chain = []
   cmd.iterate("%s and name ca"%model,"stored.resi.append(resi);stored.chain.append(chain)")
+
   cmd.iterate("%s and name ca"%selection,"stored.resi.append(resi);stored.chain.append(chain)")
 
   motif = re.compile(stretch)
 
   motif = re.compile(stretch)
 
   occurrences = motif.finditer(seq)
 
   occurrences = motif.finditer(seq)
 +
  stretchmod = stretch.replace("+","\+")
 +
  stretchmod = stretchmod.replace("?","\?")
 +
 +
  print stretchmod
 
   if combined == "1":
 
   if combined == "1":
       cmd.do("select %s_%s, none"%(prefix,stretch))
+
       cmd.select("%s_%s"%(prefix,stretch), "none")
 +
 
 +
 
 +
  for find in occurrences:     
 +
 
 +
      mb = pymol.stored.resi[find.start()]
 +
      me = pymol.stored.resi[find.end()-1]
 +
 
 +
      ch = pymol.stored.chain[find.start()]
 +
      cmd.select("%s_%s_%s%s"%(prefix,stretch,me,ch), "chain %s and (i; %s-%s)"%(ch,int(mb),int(me)))
 +
      if combined == "1":
 +
        cmd.select("%s_%s"%(prefix,stretch),"\"%s_%s\" | (%s and chain %s and (i; %s-%s))"%(prefix,stretchmod,selection,ch,int(mb),int(me)))
  
  for match in occurrences:
 
      fx = match.start()
 
      pos = 0
 
      for fy in range(int(fx),int(fx)+len(stretch)):
 
        ch = pymol.stored.chain[fy]
 
        ri = pymol.stored.resi[fy]
 
        if pos == 0:
 
            pos = ri
 
            if single == "1":
 
              cmd.select("%s_%s_%s%s"%(prefix,stretch,pos,ch), "none")           
 
        if single == "1":
 
            cmd.select("%s_%s_%s%s"%(prefix,stretch,pos,ch), "%s_%s_%s%s | ///%s/%s/"%(prefix,stretch,pos,ch,ch,ri))
 
        if combined == "1":
 
            cmd.select("%s_%s"%(prefix,stretch), "%s_%s | ////%s/"%(prefix,stretch,ri))
 
 
   cmd.select("none")
 
   cmd.select("none")
 
   cmd.delete("sel*")
 
   cmd.delete("sel*")
 
+
 
 
cmd.extend("grepsel",grepsel)
 
cmd.extend("grepsel",grepsel)
 
</source>
 
</source>
  
 +
== See Also ==
 +
 +
* [[FindSeq]]
 +
* [[Selection Algebra|pepseq]] selection operator
 +
* [http://pldserver1.biochem.queensu.ca/~rlc/work/pymol/ seq_select] by Robert Campbell
  
 
[[Category:Script_Library|Grep selections]]
 
[[Category:Script_Library|Grep selections]]
 +
[[Category:ObjSel_Scripts]]

Latest revision as of 03:06, 28 March 2014

Create selections matching motifs, using python regular expression syntax.

This is very similar to the FindSeq script.

The Code

#Create named selections using regular expressions for the protein sequence

import pymol
import re
cmd = pymol.cmd

aa = { 'ASP' : 'D' , 'GLU' : 'E' , 'GLN' : 'Q' , 'ASN' : 'N' , 'SER' : 'S' ,
       'THR' : 'T' , 'CYS' : 'C' , 'HIS' : 'H' , 'ARG' : 'R' , 'LYS' : 'K' ,
       'MET' : 'M' , 'ALA' : 'A' , 'ILE' : 'I' , 'LEU' : 'L' , 'VAL' : 'V' ,
       'GLY' : 'G' , 'PRO' : 'P' , 'TRP' : 'W' , 'PHE' : 'F' , 'TYR' : 'Y' ,
       'SCY' : 'U' , 'ASX' : 'B' , 'GLX' : 'Z' , 'XXX' : 'X'}

#made this before the sequence view option, probably another way to do it now

def seqoneint(model):
   pymol.stored.seq = []
   cmd.iterate("%s and name ca"%model,"stored.seq.append(resn)")
   seq = ""
   for x in pymol.stored.seq:
      if aa.has_key(x):
         res = aa[x]
         seq = seq+res
      else:
         seq = seq + '-'
   return seq



def grepsel(selection="(all)",stretch="",prefix="",combined="0",single="1"):
   '''
DESCRIPTION

    Create selections matching motifs, using python regular expression syntax.
    Motif is automatically converted to uppercase. Motif selections are labelled
    as "prefix_motif_###", where ### is the index for the first residue of the
    match. Prefix defaults to selection name. combined = 1 creates one selection
    for all occurences. single = 1 creates one selection for each occurance
    (the default).
    
USAGE

    grepsel selection, motif, [prefix, [combined, [single ]]]

EXAMPLES

    Create selections for all motifs matching "ESS" (selection_ESS_###,...):
    grepsel selection, ess

    Create selections for the PxGY motif with prefix m (m_P.CY_###,...):
    grepsel selection, p.gy, m
    '''
 
   if selection == "(all)":
      selection = "all"
   if prefix == "":
      prefix=selection

   stretch = stretch.upper() 
   seq = seqoneint(selection)
   pymol.stored.resi = []
   pymol.stored.chain = []
   cmd.iterate("%s and name ca"%selection,"stored.resi.append(resi);stored.chain.append(chain)")
   motif = re.compile(stretch)
   occurrences = motif.finditer(seq)
   stretchmod = stretch.replace("+","\+")
   stretchmod = stretchmod.replace("?","\?")

   print stretchmod
   if combined == "1":
      cmd.select("%s_%s"%(prefix,stretch), "none")


   for find in occurrences:      

      mb = pymol.stored.resi[find.start()]
      me = pymol.stored.resi[find.end()-1]

      ch = pymol.stored.chain[find.start()]
      cmd.select("%s_%s_%s%s"%(prefix,stretch,me,ch), "chain %s and (i; %s-%s)"%(ch,int(mb),int(me)))
      if combined == "1":
         cmd.select("%s_%s"%(prefix,stretch),"\"%s_%s\" | (%s and chain %s and (i; %s-%s))"%(prefix,stretchmod,selection,ch,int(mb),int(me)))

   cmd.select("none")
   cmd.delete("sel*")
   
cmd.extend("grepsel",grepsel)

See Also