Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/06659228cd6f/allpy/pdb.py
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 17:56:18 2013
Кодировка:
allpy: 06659228cd6f allpy/pdb.py

allpy

view allpy/pdb.py @ 386:06659228cd6f

fix bug in allpy/pdb.py
author boris <bnagaev@gmail.com>
date Wed, 02 Feb 2011 16:21:04 +0300
parents d7fc6865ce58
children
line source
1 """ Functions to get pdb information from fasta id
2 and to generate fasta id from pdb information
4 pdb information: code, chain, model
6 TODO: same for local pdb files
7 """
9 import re
10 import os
11 import os.path
12 from tempfile import NamedTemporaryFile
13 import urllib2
15 from Bio.PDB import PDBParser
16 from Bio.PDB import Superimposer, CaPPBuilder, PDBIO
17 from Bio.PDB.DSSP import make_dssp_dict
19 import base
20 import processors
21 import config
22 from graph import Graph
25 # for pdb-codes
26 re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I)
28 def pdb_id_parse(ID):
29 match = re1.search(ID)
30 if not match:
31 return None
32 d = match.groupdict()
33 if 'chain' not in d or not d['chain']:
34 d['chain'] = ' '
35 if 'model' not in d or not d['model']:
36 d['model'] = 0
37 return d
40 def get_structure(file, name):
41 return PDBParser().get_structure(name, file)
43 class SequenceMixin(base.Sequence):
44 """Mixin for adding PDB data to a Sequence.
46 Please note: since this is a mixin, no objects of this class should be
47 created. This class is intended to be subclassed together with one of
48 Sequence classes.
50 Attributes:
52 * pdb_chain -- Bio.PDB.Chain
53 * pdb_residues -- {Monomer: Bio.PDB.Residue}
55 ?TODO: global pdb_structures
56 """
58 def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain='A', pdb_model=0):
59 """ Reads Pdb chain from file
61 and align each Monomer with PDB.Residue (TODO)
62 """
63 structure = get_structure(pdb_file, self.name)
64 chain = structure[pdb_model][pdb_chain]
65 self.pdb_chain = chain
66 self.pdb_residues = {}
67 pdb_sequence = self.__class__.from_pdb_chain(chain)
68 Alignment = self.types.Alignment
69 a = Alignment()
70 a.append_sequence(self)
71 a.append_sequence(pdb_sequence)
72 a.process(processors.Muscle())
73 for monomer, pdb_monomer in a.columns_as_lists():
74 residue = pdb_sequence.pdb_residues[pdb_monomer]
75 self.pdb_residues[monomer] = residue
77 def pdb_unload(self):
78 """ Delete all pdb-connected links """
79 del self.pdb_chain
80 del self.pdb_residues
82 @classmethod
83 def from_pdb_chain(cls, chain):
84 """ Returns Sequence with Monomers with link to Bio.PDB.Residue
86 chain is Bio.PDB.Chain
87 """
88 cappbuilder = CaPPBuilder()
89 peptides = cappbuilder.build_peptides(chain)
90 Sequence = cls
91 Monomer = Sequence.types.Monomer
92 sequence = Sequence()
93 sequence.pdb_chain = chain
94 sequence.pdb_residues = {}
95 for peptide in peptides:
96 for ca_atom in peptide.get_ca_list():
97 residue = ca_atom.get_parent()
98 monomer = Monomer.from_code3(residue.get_resname())
99 sequence.pdb_residues[monomer] = residue
100 sequence.append(monomer)
101 return sequence
103 def auto_pdb(self, conformity=None, dir='/tmp', mask='%s.pdb'):
104 """ Adds pdb information to each monomer
106 Returns if information has been successfully added
107 TODO: conformity_file
109 id-format lava flow
110 """
111 match = pdb_id_parse(self.name)
112 if not match:
113 return False
114 code = match['code']
115 chain = match['chain']
116 model = match['model']
117 user_path = os.path.join(dir, mask % self.name)
118 path = user_path
119 if not os.path.exists(path) or not os.path.getsize(path):
120 path = config.pdb_path % code
121 if not os.path.exists(path) or not os.path.getsize(path):
122 url = config.pdb_url % code
123 print "Download %s" % url
124 path = user_path
125 data = urllib2.urlopen(url).read()
126 pdb_file = open(path, 'w')
127 pdb_file.write(data)
128 pdb_file.close()
129 print "Save %s" % path
130 self.set_pdb_chain(open(path), code, chain, model)
131 return True
133 def pdb_save(self, out_filename, pdb_chain):
134 """ Saves pdb_chain to out_file """
135 class MySelect(Select):
136 def accept_chain(self, chain):
137 if chain == pdb_chain:
138 return 1
139 else:
140 return 0
141 io = PDBIO()
142 structure = chain.get_parent()
143 io.set_structure(structure)
144 io.save(out_filename, MySelect())
146 def ca_atoms(self, sequence):
147 """ Iterates Ca-atom of monomers of this sequence """
148 return (pdb_residues[monomer]['CA'] for monomer in self)
150 class AlignmentMixin(base.Alignment):
151 """Mixin to add 3D properties to alignments.
153 Please note: since this is a mixin, no objects of this class should be
154 created. This class is intended to be subclassed together with one of
155 Alignment classes.
156 """
157 pass
159 class BlockMixin(base.Block):
160 """Mixin to add 3D properties to blocks.
162 Please note: since this is a mixin, no objects of this class should be
163 created. This class is intended to be subclassed together with one of
164 Block classes.
165 """
167 def geometrical_cores(self, max_delta=config.delta,
168 timeout=config.timeout, minsize=config.minsize,
169 ac_new_atoms=config.ac_new_atoms, ac_count=config.ac_count):
170 """ Returns length-sorted list of GCs
171 GC is set of columns
173 * max_delta -- threshold of distance spreading
174 * timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
175 * minsize -- min size of each core
176 * ac_new_atoms -- min part or new atoms in new alternative core
177 current GC is compared with each of already selected GCs if
178 difference is less then ac_new_atoms, current GC is skipped
179 difference = part of new atoms in current core
180 * ac_count -- max number of cores (including main core)
181 -1 means infinity
183 cost is calculated as 1 / (delta + 1)
184 delta in [0, +inf) => cost in (0, 1]
185 """
186 nodes = self.columns
187 lines = {}
188 for i, column1 in enumerate(self.columns):
189 for j, column2 in enumerate(self.columns):
190 if i < j:
191 distances = []
192 for sequence in self.sequences:
193 if sequence in column1 and sequence in column2:
194 m1 = column1[sequence]
195 m2 = column2[sequence]
196 r1 = sequence.pdb_residues[m1]
197 r2 = sequence.pdb_residues[m2]
198 ca1 = r1['CA']
199 ca2 = r2['CA']
200 d = ca1 - ca2 # Bio.PDB feature
201 distances.append(d)
202 if len(distances) >= 2:
203 delta = max(distances) - min(distances)
204 if delta <= max_delta:
205 line = Graph.line(column1, column2)
206 lines[line] = 1.0 / (1.0 + max_delta)
207 graph = Graph(nodes, lines)
208 cliques = graph.cliques(timeout=timeout, minsize=minsize)
209 GCs = []
210 for clique in cliques:
211 for GC in GCs:
212 new_nodes = clique - GC
213 if len(new_nodes) < ac_new_atoms * len(clique):
214 break
215 else:
216 GCs.append(clique)
217 return GCs
221 def superimpose(self):
222 """ Superimpose all pdb_chains in this block """
223 sequences_chains = list(self.sequences_chains())
224 if len(sequences_chains) >= 1:
225 sup = Superimposer()
226 fixed_sequence, fixed_chain = sequences_chains.pop()
227 fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)
228 for sequence, chain in sequences_chains:
229 moving_atoms = self.ca_atoms(sequence, chain)
230 sup.set_atoms(fixed_atoms, moving_atoms)
231 # Apply rotation/translation to the moving atoms
232 sup.apply(moving_atoms)
234 def pdb_save(self, out_file):
235 """ Save all sequences
237 Returns {(sequence, chain): CHAIN}
238 CHAIN is chain letter in new file
239 """
240 tmp_file = NamedTemporaryFile(delete=False)
241 tmp_file.close()
243 for sequence, chain in self.sequences_chains():
244 sequence.pdb_save(tmp_file.name, chain)
245 # TODO: read from tmp_file.name
246 # change CHAIN
247 # add to out_file
249 os.unlink(NamedTemporaryFile)
251 # vim: set ts=4 sts=4 sw=4 et: