view allpy/sequence.py @ 201:33259ccf6bc7
repeats: fix in formula for ori
 | author | boris (netbook) <bnagaev@gmail.com> | 
 | date | Fri, 19 Nov 2010 11:44:04 +0300 | 
 | parents | a016cad5f5a0 | 
 | children | 346a6cd4fc1d | 
 line source
     2 # -*- coding: utf-8 -*-  
     4 from monomer import AminoAcidType 
     5 from Bio.PDB import CaPPBuilder, PDBIO 
     6 from Bio.PDB.DSSP import make_dssp_dict 
     7 from allpy_pdb import std_id, pdb_id_parse, get_structure 
    13 from tempfile import NamedTemporaryFile 
    17 class Sequence(object): 
    18     """ Sequence of Monomers 
    21     *   name -- str with the name of sequence 
    22     *   description -- str with description of the sequence 
    23     *   monomers -- list of monomer objects (aminoacids or nucleotides) 
    25     Optional (may be empty): 
    26     *   pdb_chains -- list of Bio.PDB.Chain's 
    27     *   pdb_files -- dictionary like {Bio.PDB.Chain: file_obj} 
    29     *   pdb_residues -- dictionary like {Bio.PDB.Chain: {Monomer: Bio.PDB.Residue}} 
    30     *   pdb_secstr -- dictionary like {Bio.PDB.Chain: {Monomer: 'Secondary structure'}} 
    31             Code   Secondary structure 
    33             B      Isolated beta-bridge residue 
    42     ?TODO: global pdb_structures  
    44     def __init__(self, monomers=None, name='', description=""): 
    48         self.description = description 
    49         self.monomers = monomers  
    52         self.pdb_residues = {} 
    56         return len(self.monomers) 
    59         """ Returns sequence in one-letter code """ 
    60         return ''.join([monomer.type.code1 for monomer in self.monomers]) 
    62     def __eq__(self, other): 
    63         """ Returns if all corresponding monomers of this sequences are equal 
    65         If lengths of sequences are not equal, returns False 
    67         return len(self) == len(other) and \
 
    68         all([a==b for a, b in zip(self.monomers, other.monomers)]) 
    70     def __ne__(self, other): 
    71         return not (self == other) 
    73     def pdb_chain_add(self, pdb_file, pdb_id, pdb_chain, pdb_model=0): 
    74         """ Reads Pdb chain from file  
    76         and align each Monomer with PDB.Residue (TODO) 
    78         name = std_id(pdb_id, pdb_chain, pdb_model) 
    79         structure = get_structure(pdb_file, name) 
    80         chain = structure[pdb_model][pdb_chain] 
    81         self.pdb_chains.append(chain) 
    82         self.pdb_residues[chain] = {} 
    83         self.pdb_secstr[chain] = {} 
    84         pdb_sequence = Sequence.from_pdb_chain(chain) 
    85         a = alignment.Alignment.from_sequences(self, pdb_sequence) 
    87         for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self): 
    88             if pdb_sequence.pdb_has(chain, pdb_monomer): 
    89                 residue = pdb_sequence.pdb_residues[chain][pdb_monomer] 
    90                 self.pdb_residues[chain][monomer] = residue 
    91         self.pdb_files[chain] = pdb_file 
    94         """ Delete all pdb-connected links """ 
    95         #~ gc.get_referrers(self.pdb_chains[0]) 
    97         self.pdb_residues = {} 
    98         self.pdb_secstr = {} # FIXME 
    99         self.pdb_files = {} # FIXME 
   102     def from_str(fasta_str, name='', description='', monomer_kind=AminoAcidType): 
   103         """ Import data from one-letter code 
   105         monomer_kind is class, inherited from MonomerType 
   107         monomers = [monomer_kind.from_code1(aa).instance() for aa in fasta_str] 
   108         return Sequence(monomers, name, description) 
   111     def from_pdb_chain(chain): 
   112         """ Returns Sequence with Monomers with link to Bio.PDB.Residue 
   114         chain is Bio.PDB.Chain 
   116         cappbuilder = CaPPBuilder() 
   117         peptides = cappbuilder.build_peptides(chain) 
   118         sequence = Sequence() 
   119         sequence.pdb_chains = [chain] 
   120         sequence.pdb_residues[chain] = {} 
   121         sequence.pdb_secstr[chain] = {} 
   122         for peptide in peptides: 
   123             for ca_atom in peptide.get_ca_list(): 
   124                 residue = ca_atom.get_parent() 
   125                 monomer = AminoAcidType.from_pdb_residue(residue).instance() 
   126                 sequence.pdb_residues[chain][monomer] = residue 
   127                 sequence.monomers.append(monomer) 
   130     def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'): 
   131         """ Adds pdb information to each monomer 
   133         Returns if information has been successfully added 
   134         TODO: conformity_file 
   138         if not conformity_info: 
   139             path = os.path.join(pdb_directory, self.name) 
   140             if os.path.exists(path) and os.path.getsize(path): 
   141                 match = pdb_id_parse(self.name) 
   142                 self.pdb_chain_add(open(path), match['code'],  
   143                 match['chain'], match['model']) 
   145                 match = pdb_id_parse(self.name) 
   148                     pdb_filename = config.pdb_dir % code 
   149                     if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename): 
   150                         url = config.pdb_url % code 
   151                         print "Download %s" % url 
   152                         pdb_file = open(pdb_filename, 'w') 
   153                         data = urllib2.urlopen(url).read() 
   156                         print "Save %s" % pdb_filename 
   157                     pdb_file = open(pdb_filename) 
   158                     self.pdb_chain_add(pdb_file, code, match['chain'], match['model']) 
   160     def pdb_save(self, out_filename, pdb_chain): 
   161         """ Saves pdb_chain to out_file """ 
   162         class GlySelect(Select): 
   163             def accept_chain(self, chain): 
   164                 if chain == pdb_chain: 
   169         structure = chain.get_parent() 
   170         io.set_structure(structure) 
   171         io.save(out_filename, GlySelect()) 
   174     def pdb_add_sec_str(self, pdb_chain): 
   175         """ Add secondary structure data """ 
   176         tmp_file = NamedTemporaryFile(delete=False) 
   178         pdb_file = self.pdb_files[pdb_chain].name 
   179         os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name}) 
   180         dssp, keys = make_dssp_dict(tmp_file.name) 
   181         for monomer in self.monomers: 
   182             if self.pdb_has(pdb_chain, monomer): 
   183                 residue = self.pdb_residues[pdb_chain][monomer] 
   185                     d = dssp[(pdb_chain.get_id(), residue.get_id())] 
   186                     self.pdb_secstr[pdb_chain][monomer] = d[1] 
   188                     print "No dssp information about %s at %s" % (monomer, pdb_chain) 
   189         os.unlink(tmp_file.name) 
   191     def pdb_has(self, chain, monomer): 
   192         return chain in self.pdb_residues and monomer in self.pdb_residues[chain] 
   194     def secstr_has(self, chain, monomer): 
   195         return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain] 
   198     def file_slice(file, n_from, n_to, fasta_name='', name='', description='', monomer_kind=AminoAcidType): 
   199         """ Build and return sequence, consisting of part of sequence from file  
   201         Does not control gaps 
   209                 if line.startswith('>%s' % fasta_name): 
   213                 s += line[(n_from - number_user):(n_to - number_user)] 
   214         return Sequence.from_str(s, name, description, monomer_kind)