view allpy/markups.py @ 900:7ebba94eece2
MarkupIOMixin: added attribute quotes to allow quotation around markup serialized string; fixed implementation of SequenceCaseMarkup based on that; [closes #125]
author |
Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru> |
date |
Tue, 11 Oct 2011 17:21:44 +0400 |
parents |
6134ae1dfdae |
children |
b8ddd35fac65 |
line source
2 from tempfile import NamedTemporaryFile
4 from Bio.PDB import DSSP
9 """A dictionary of default markup name -> markup class."""
12 """Update `by_name` dictionary.
14 If any arguments are given, add them to markups namespace beforehands.
16 # Add user classes if necessary
17 for markup_class in args:
18 class_name = markup_class.__name__
19 assert class_name not in globals(), "SameNamed markup already exists!"
20 globals()[class_name] = markup_class
21 # Update `by_name` dictonary
24 for markup_class in globals().values():
25 if hasattr(markup_class, 'name') and hasattr(markup_class, 'kind'):
26 fullname = markup_class.kind, markup_class.name
27 assert fullname not in by_name, "Samenamed markup already exists!"
28 by_name[fullname] = markup_class
30 class MarkupIOMixin(base.Markup):
31 """Standard helper mixin for creating saveable markups."""
34 """Separator to use when saving/loading markup."""
37 """Quotation sign used on the markup."""
40 """MUST be overloaded when subclassing. io_class in file."""
43 def parse_item(key, value):
44 """Deserialize one item of markup. Overload when subclassing."""
48 def repr_item(key, value):
49 """Serialize one item of markup. Overload when subclassing."""
53 def from_record(cls, container, record, name=None):
54 """Read markup from semi-parsed record from 'markup' file."""
55 assert record['io_class'] == cls.io_class
56 separator = record.get('separator', cls.separator)
57 quotes = record.get('quotes', cls.quotes)
58 values = record['markup'].strip(quotes)
60 values = values.split(separator)
61 result = container.add_markup(name, markup_class=cls)
62 assert len(values) == len(result.sorted_keys())
63 for key, value in zip(result.sorted_keys(), values):
65 result[key] = cls.parse_item(key, value)
69 """Write markup to semi-serialized record for 'markup' file."""
71 for key in self.sorted_keys():
73 values.append(self.repr_item(key, self[key]))
76 markup = self.separator.join(values)
79 'io_class': self.io_class,
80 'separator': self.separator,
81 'quotes': self.quotes,
84 class IntMarkupMixin(MarkupIOMixin):
85 """Markup that has integer values."""
87 io_class = 'IntMarkup'
90 def parse_item(key, value):
93 class SequenceNumberMarkup(base.SequenceMarkup):
98 for number, monomer in enumerate(self.sequence, 1):
99 monomer.number = number
101 class SequenceIndexMarkup(base.SequenceMarkup):
106 for index, monomer in enumerate(self.sequence):
107 monomer.index = index
109 class AlignmentNumberMarkup(base.AlignmentMarkup):
114 for number, column in enumerate(self.alignment.columns, 1):
115 self[column] = number
117 class AlignmentIndexMarkup(base.AlignmentMarkup):
122 for index, column in enumerate(self.alignment.columns):
125 class SequenceCaseMarkup(base.SequenceMarkup, MarkupIOMixin):
128 io_class = 'SequenceCaseMarkup'
133 for monomer in self.sequence:
134 if monomer.input_code1.isupper():
135 monomer.case = 'upper'
136 elif monomer.input_code1.islower():
137 monomer.case = 'lower'
140 def parse_item(monomer, value):
141 assert monomer.code1 == value.upper()
148 def repr_item(monomer, value):
149 if monomer.case == 'upper':
150 return monomer.code1.upper()
151 if monomer.case == 'lower':
152 return monomer.code1.lower()
153 raise AssertionError("Unknown monomer case")
155 class SequencePdbResiMarkup(base.SequenceMarkup, IntMarkupMixin):
159 for monomer in self.sequence:
161 monomer.pdb_resi = monomer.pdb_residue.id[1]
165 def add_pdb(self, download_pdb=None, xyz_only=False):
167 if download_pdb is None:
168 download_pdb = structure.cached_download_pdb
170 match = structure.pdb_id_parse(self.sequence.name)
171 code, model , chain = match['code'], match['model'], match['chain']
172 pdb_file = download_pdb(code)
173 pdb_structure = structure.get_structure(pdb_file, self.sequence.name)
174 pdb_chain = pdb_structure[0][chain]
176 self.sequence.pdb_chain = pdb_chain
177 for monomer in self.sequence:
179 pdb_residue = pdb_chain[' ', monomer.pdb_resi, ' ']
180 monomer.ca_xyz = pdb_residue['CA'].get_vector()
182 monomer.pdb_residue = pdb_residue
184 class SequenceSecondaryStructureMarkup(base.SequenceMarkup, MarkupIOMixin):
185 """ Secondary structure markup for sequence.
187 Depends on dsspcmbi program.
188 Sequence should be structure.SequenceMixin, pdb should be loaded.
189 Note that DSSP cannot handle mutiple models!
190 Note that dssp executable name is hardcoded (=dsspcmbi).
194 * B -- Isolated beta-bridge residue
204 io_class = 'SequenceSecondaryStructureMarkup'
207 chain = self.sequence.pdb_chain
208 model = chain.get_parent()
209 pdb_file = NamedTemporaryFile(delete=False)
210 self.sequence.save_pdb(pdb_file)
212 dssp=DSSP(model, pdb_file.name, dssp='dsspcmbi')
213 for monomer in self.sequence:
215 monomer.ss = dssp[(chain.get_id(), monomer.pdb_residue.get_id())][1]
217 monomer.ss = '?' # FIXME
218 os.unlink(pdb_file.name)
220 # This MUST be the last statement in this module.
223 # vim: set ts=4 sts=4 sw=4 et: