Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/6507cd6808c7/allpy/sequence.py
Дата изменения: Unknown
Дата индексирования: Mon Feb 4 07:43:32 2013
Кодировка:
allpy: 6507cd6808c7 allpy/sequence.py

allpy

view allpy/sequence.py @ 244:6507cd6808c7

allpy: monomers completed _monomers.py -- provate module for monomers other modules imports MonomerType's descendant from _monomer and inherit from Monomer type thre are some problems with Monomer -- MonomerType links
author boris <bnagaev@gmail.com>
date Fri, 03 Dec 2010 23:24:43 +0300
parents 280a8069c206
children deae8240d396
line source
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
4 from monomer import AminoAcidType
5 from Bio.PDB import CaPPBuilder, PDBIO
6 from Bio.PDB.DSSP import make_dssp_dict
7 from allpy_pdb import std_id, pdb_id_parse, get_structure
8 import alignment
9 import sys
10 import config
11 import os.path
12 import urllib2
13 from tempfile import NamedTemporaryFile
14 import os
17 class Sequence(list):
18 """ Sequence of Monomers
20 list of monomer objects (aminoacids or nucleotides)
22 Mandatory data:
23 * name -- str with the name of sequence
24 * description -- str with description of the sequence
26 Optional (may be empty):
27 * source -- source of sequence
28 * pdb_chain -- Bio.PDB.Chain
29 * pdb_file -- file object
31 * pdb_residues -- {Monomer: Bio.PDB.Residue}
32 * pdb_secstr -- {Monomer: 'Secondary structure'}
33 Code Secondary structure
34 H alpha-helix
35 B Isolated beta-bridge residue
36 E Strand
37 G 3-10 helix
38 I pi-helix
39 T Turn
40 S Bend
41 - Other
44 ?TODO: global pdb_structures
45 """
46 def __init__(self, monomers=None, name='', description=""):
47 if not monomers:
48 monomers = []
49 self.name = name
50 self.description = description
51 self.monomers = monomers
52 self.pdb_chains = []
53 self.pdb_files = {}
54 self.pdb_residues = {}
55 self.pdb_secstr = {}
57 def __len__(self):
58 return len(self.monomers)
60 def __str__(self):
61 """ Returns sequence in one-letter code """
62 return ''.join([monomer.type.code1 for monomer in self.monomers])
64 def __eq__(self, other):
65 """ Returns if all corresponding monomers of this sequences are equal
67 If lengths of sequences are not equal, returns False
68 """
69 return len(self) == len(other) and \
70 all([a==b for a, b in zip(self.monomers, other.monomers)])
72 def __ne__(self, other):
73 return not (self == other)
75 def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):
76 """ Reads Pdb chain from file
78 and align each Monomer with PDB.Residue (TODO)
79 """
80 name = std_id(pdb_id, pdb_chain, pdb_model)
81 structure = get_structure(pdb_file, name)
82 chain = structure[pdb_model][pdb_chain]
83 self.pdb_chains.append(chain)
84 self.pdb_residues[chain] = {}
85 self.pdb_secstr[chain] = {}
86 pdb_sequence = Sequence.from_pdb_chain(chain)
87 a = alignment.Alignment.from_sequences(self, pdb_sequence)
88 a.muscle_align()
89 for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self):
90 if pdb_sequence.pdb_has(chain, pdb_monomer):
91 residue = pdb_sequence.pdb_residues[chain][pdb_monomer]
92 self.pdb_residues[chain][monomer] = residue
93 self.pdb_files[chain] = pdb_file
95 def pdb_unload(self):
96 """ Delete all pdb-connected links """
97 #~ gc.get_referrers(self.pdb_chains[0])
98 self.pdb_chains = []
99 self.pdb_residues = {}
100 self.pdb_secstr = {} # FIXME
101 self.pdb_files = {} # FIXME
103 @staticmethod
104 def from_str(fasta_str, name='', description='', monomer_kind=AminoAcidType):
105 """ Import data from one-letter code
107 monomer_kind is class, inherited from MonomerType
108 """
109 monomers = [monomer_kind.from_code1(aa).instance() for aa in fasta_str]
110 return Sequence(monomers, name, description)
112 @staticmethod
113 def from_pdb_chain(chain):
114 """ Returns Sequence with Monomers with link to Bio.PDB.Residue
116 chain is Bio.PDB.Chain
117 """
118 cappbuilder = CaPPBuilder()
119 peptides = cappbuilder.build_peptides(chain)
120 sequence = Sequence()
121 sequence.pdb_chains = [chain]
122 sequence.pdb_residues[chain] = {}
123 sequence.pdb_secstr[chain] = {}
124 for peptide in peptides:
125 for ca_atom in peptide.get_ca_list():
126 residue = ca_atom.get_parent()
127 monomer = AminoAcidType.from_pdb_residue(residue).instance()
128 sequence.pdb_residues[chain][monomer] = residue
129 sequence.monomers.append(monomer)
130 return sequence
132 def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'):
133 """ Adds pdb information to each monomer
135 Returns if information has been successfully added
136 TODO: conformity_file
138 id-format lava flow
139 """
140 if not conformity_info:
141 path = os.path.join(pdb_directory, self.name)
142 if os.path.exists(path) and os.path.getsize(path):
143 match = pdb_id_parse(self.name)
144 self.pdb_chain_add(open(path), match['code'],
145 match['chain'], match['model'])
146 else:
147 match = pdb_id_parse(self.name)
148 if match:
149 code = match['code']
150 pdb_filename = config.pdb_dir % code
151 if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename):
152 url = config.pdb_url % code
153 print "Download %s" % url
154 pdb_file = open(pdb_filename, 'w')
155 data = urllib2.urlopen(url).read()
156 pdb_file.write(data)
157 pdb_file.close()
158 print "Save %s" % pdb_filename
159 pdb_file = open(pdb_filename)
160 self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])
162 def pdb_save(self, out_filename, pdb_chain):
163 """ Saves pdb_chain to out_file """
164 class GlySelect(Select):
165 def accept_chain(self, chain):
166 if chain == pdb_chain:
167 return 1
168 else:
169 return 0
170 io = PDBIO()
171 structure = chain.get_parent()
172 io.set_structure(structure)
173 io.save(out_filename, GlySelect())
176 def pdb_add_sec_str(self, pdb_chain):
177 """ Add secondary structure data """
178 tmp_file = NamedTemporaryFile(delete=False)
179 tmp_file.close()
180 pdb_file = self.pdb_files[pdb_chain].name
181 os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})
182 dssp, keys = make_dssp_dict(tmp_file.name)
183 for monomer in self.monomers:
184 if self.pdb_has(pdb_chain, monomer):
185 residue = self.pdb_residues[pdb_chain][monomer]
186 try:
187 d = dssp[(pdb_chain.get_id(), residue.get_id())]
188 self.pdb_secstr[pdb_chain][monomer] = d[1]
189 except:
190 print "No dssp information about %s at %s" % (monomer, pdb_chain)
191 os.unlink(tmp_file.name)
193 def pdb_has(self, chain, monomer):
194 return chain in self.pdb_residues and monomer in self.pdb_residues[chain]
196 def secstr_has(self, chain, monomer):
197 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]
199 @staticmethod
200 def file_slice(file, n_from, n_to, fasta_name='', name='', description='', monomer_kind=AminoAcidType):
201 """ Build and return sequence, consisting of part of sequence from file
203 Does not control gaps
204 """
205 inside = False
206 number_used = 0
207 s = ''
208 for line in file:
209 line = line.split()
210 if not inside:
211 if line.startswith('>%s' % fasta_name):
212 inside = True
213 else:
214 n = len(line)
215 s += line[(n_from - number_user):(n_to - number_user)]
216 return Sequence.from_str(s, name, description, monomer_kind)
218 class ProteinSequence(Sequence):
219 """ """
220 pass