Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/6134ae1dfdae/allpy/markups.py
Дата изменения: Unknown
Дата индексирования: Mon Feb 4 05:14:05 2013
Кодировка:
allpy: 6134ae1dfdae allpy/markups.py

allpy

view allpy/markups.py @ 896:6134ae1dfdae

fix error caused by applying secondary structure markup This bug should be digged. For some reason BioPython's DSSP failed to get secondary structure information of a residue: $ ./pair_cores/pair_cores.py -i test/aln1.fasta -y aaa -e True ... ('A', (' ', 51, ' ')) caused by dssp.__getitem__
author Boris Nagaev <bnagaev@gmail.com>
date Fri, 23 Sep 2011 14:10:25 +0400
parents 9b253b69f8f1
children 7ebba94eece2
line source
1 import os
2 from tempfile import NamedTemporaryFile
4 from Bio.PDB import DSSP
6 import base
8 by_name = {}
9 """A dictionary of default markup name -> markup class."""
11 def update(*args):
12 """Update `by_name` dictionary.
14 If any arguments are given, add them to markups namespace beforehands.
15 """
16 # Add user classes if necessary
17 for markup_class in args:
18 class_name = markup_class.__name__
19 assert class_name not in globals(), "SameNamed markup already exists!"
20 globals()[class_name] = markup_class
21 # Update `by_name` dictonary
22 global by_name
23 by_name = {}
24 for markup_class in globals().values():
25 if hasattr(markup_class, 'name') and hasattr(markup_class, 'kind'):
26 fullname = markup_class.kind, markup_class.name
27 assert fullname not in by_name, "Samenamed markup already exists!"
28 by_name[fullname] = markup_class
30 class MarkupIOMixin(base.Markup):
31 """Standard helper mixin for creating saveable markups."""
33 separator = ','
34 """Separator to use when saving/loading markup."""
36 io_class = None
37 """MUST be overloaded when subclassing. io_class in file."""
39 @staticmethod
40 def parse_item(key, value):
41 """Deserialize one item of markup. Overload when subclassing."""
42 return value
44 @staticmethod
45 def repr_item(key, value):
46 """Serialize one item of markup. Overload when subclassing."""
47 return str(value)
49 @classmethod
50 def from_record(cls, container, record, name=None):
51 """Read markup from semi-parsed record from 'markup' file."""
52 assert record['io_class'] == cls.io_class
53 separator = record.get('separator', cls.separator)
54 values = record['markup'].split(separator)
55 result = container.add_markup(name, markup_class=cls)
56 assert len(values) == len(result.sorted_keys())
57 for key, value in zip(result.sorted_keys(), values):
58 if value:
59 result[key] = cls.parse_item(key, value)
60 return result
62 def to_record(self):
63 """Write markup to semi-serialized record for 'markup' file."""
64 values = []
65 for key in self.sorted_keys():
66 if key in self:
67 values.append(self.repr_item(key, self[key]))
68 else:
69 values.append('')
70 markup = self.separator.join(values)
71 return {
72 'markup': markup,
73 'io_class': self.io_class,
74 'separator': self.separator,
75 }
77 class IntMarkupMixin(MarkupIOMixin):
78 """Markup that has integer values."""
80 io_class = 'IntMarkup'
82 @staticmethod
83 def parse_item(key, value):
84 return int(value)
86 class SequenceNumberMarkup(base.SequenceMarkup):
88 name = 'number'
90 def refresh(self):
91 for number, monomer in enumerate(self.sequence, 1):
92 monomer.number = number
94 class SequenceIndexMarkup(base.SequenceMarkup):
96 name = 'index'
98 def refresh(self):
99 for index, monomer in enumerate(self.sequence):
100 monomer.index = index
102 class AlignmentNumberMarkup(base.AlignmentMarkup):
104 name = 'number'
106 def refresh(self):
107 for number, column in enumerate(self.alignment.columns, 1):
108 self[column] = number
110 class AlignmentIndexMarkup(base.AlignmentMarkup):
112 name = 'index'
114 def refresh(self):
115 for index, column in enumerate(self.alignment.columns):
116 self[column] = index
118 class SequenceCaseMarkup(base.SequenceMarkup, MarkupIOMixin):
120 name = 'case'
121 io_class = 'SequenceCaseMarkup'
123 def refresh(self):
124 for monomer in self.sequence:
125 if monomer.input_code1.isupper():
126 monomer.case = 'upper'
127 elif monomer.input_code1.islower():
128 monomer.case = 'lower'
130 @staticmethod
131 def parse_value(monomer, value):
132 assert mnomer.code1 == value.upper()
133 if value.isupper():
134 return 'upper'
135 if value.islower():
136 return 'lower'
138 @staticmethod
139 def repr_value(monomer, value):
140 if monomer.case == 'upper':
141 return monomer.code1.upper()
142 if monomer.case == 'lower':
143 return monomer.code1.lower()
144 raise AssertionError("Unknown monomer case")
146 class SequencePdbResiMarkup(base.SequenceMarkup, IntMarkupMixin):
147 name = 'pdb_resi'
149 def from_pdb(self):
150 for monomer in self.sequence:
151 try:
152 monomer.pdb_resi = monomer.pdb_residue.id[1]
153 except Exception:
154 pass
156 def add_pdb(self, download_pdb=None, xyz_only=False):
157 import structure
158 if download_pdb is None:
159 download_pdb = structure.cached_download_pdb
161 match = structure.pdb_id_parse(self.sequence.name)
162 code, model , chain = match['code'], match['model'], match['chain']
163 pdb_file = download_pdb(code)
164 pdb_structure = structure.get_structure(pdb_file, self.sequence.name)
165 pdb_chain = pdb_structure[0][chain]
166 if not xyz_only:
167 self.sequence.pdb_chain = pdb_chain
168 for monomer in self.sequence:
169 if monomer in self:
170 pdb_residue = pdb_chain[' ', monomer.pdb_resi, ' ']
171 monomer.ca_xyz = pdb_residue['CA'].get_vector()
172 if not xyz_only:
173 monomer.pdb_residue = pdb_residue
175 class SequenceSecondaryStructureMarkup(base.SequenceMarkup, MarkupIOMixin):
176 """ Secondary structure markup for sequence.
178 Depends on dsspcmbi program.
179 Sequence should be structure.SequenceMixin, pdb should be loaded.
180 Note that DSSP cannot handle mutiple models!
181 Note that dssp executable name is hardcoded (=dsspcmbi).
183 Notation:
184 * H -- alpha-helix
185 * B -- Isolated beta-bridge residue
186 * E -- Strand
187 * G -- 3-10 helix
188 * I -- pi-helix
189 * T -- Turn
190 * S -- Bend
191 * - -- Other
192 """
194 name = 'ss'
195 io_class = 'SequenceSecondaryStructureMarkup'
197 def refresh(self):
198 chain = self.sequence.pdb_chain
199 model = chain.get_parent()
200 pdb_file = NamedTemporaryFile(delete=False)
201 self.sequence.save_pdb(pdb_file)
202 pdb_file.close()
203 dssp=DSSP(model, pdb_file.name, dssp='dsspcmbi')
204 for monomer in self.sequence:
205 try:
206 monomer.ss = dssp[(chain.get_id(), monomer.pdb_residue.get_id())][1]
207 except:
208 monomer.ss = '?' # FIXME
209 os.unlink(pdb_file.name)
211 # This MUST be the last statement in this module.
212 update()
214 # vim: set ts=4 sts=4 sw=4 et: