allpy: 7ebba94eece2 allpy/markups.py

allpy

view allpy/markups.py @ 900:7ebba94eece2

MarkupIOMixin: added attribute quotes to allow quotation around markup serialized string; fixed implementation of SequenceCaseMarkup based on that; [closes #125]

author	Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date	Tue, 11 Oct 2011 17:21:44 +0400
parents	6134ae1dfdae
children	b8ddd35fac65

line source

1 import os

2 from tempfile import NamedTemporaryFile

4 from Bio.PDB import DSSP

6 import base

8 by_name = {}

9 """A dictionary of default markup name -> markup class."""

11 def update(*args):

12 """Update `by_name` dictionary.

14 If any arguments are given, add them to markups namespace beforehands.

15 """

16 # Add user classes if necessary

17 for markup_class in args:

18 class_name = markup_class.__name__

19 assert class_name not in globals(), "SameNamed markup already exists!"

20 globals()[class_name] = markup_class

21 # Update `by_name` dictonary

22 global by_name

23 by_name = {}

24 for markup_class in globals().values():

25 if hasattr(markup_class, 'name') and hasattr(markup_class, 'kind'):

26 fullname = markup_class.kind, markup_class.name

27 assert fullname not in by_name, "Samenamed markup already exists!"

28 by_name[fullname] = markup_class

30 class MarkupIOMixin(base.Markup):

31 """Standard helper mixin for creating saveable markups."""

33 separator = ','

34 """Separator to use when saving/loading markup."""

36 quotes = ''

37 """Quotation sign used on the markup."""

39 io_class = None

40 """MUST be overloaded when subclassing. io_class in file."""

42 @staticmethod

43 def parse_item(key, value):

44 """Deserialize one item of markup. Overload when subclassing."""

45 return value

47 @staticmethod

48 def repr_item(key, value):

49 """Serialize one item of markup. Overload when subclassing."""

50 return str(value)

52 @classmethod

53 def from_record(cls, container, record, name=None):

54 """Read markup from semi-parsed record from 'markup' file."""

55 assert record['io_class'] == cls.io_class

56 separator = record.get('separator', cls.separator)

57 quotes = record.get('quotes', cls.quotes)

58 values = record['markup'].strip(quotes)

59 if separator:

60 values = values.split(separator)

61 result = container.add_markup(name, markup_class=cls)

62 assert len(values) == len(result.sorted_keys())

63 for key, value in zip(result.sorted_keys(), values):

64 if value:

65 result[key] = cls.parse_item(key, value)

66 return result

68 def to_record(self):

69 """Write markup to semi-serialized record for 'markup' file."""

70 values = []

71 for key in self.sorted_keys():

72 if key in self:

73 values.append(self.repr_item(key, self[key]))

74 else:

75 values.append('')

76 markup = self.separator.join(values)

77 return {

78 'markup': markup,

79 'io_class': self.io_class,

80 'separator': self.separator,

81 'quotes': self.quotes,

82 }

84 class IntMarkupMixin(MarkupIOMixin):

85 """Markup that has integer values."""

87 io_class = 'IntMarkup'

89 @staticmethod

90 def parse_item(key, value):

91 return int(value)

93 class SequenceNumberMarkup(base.SequenceMarkup):

95 name = 'number'

97 def refresh(self):

98 for number, monomer in enumerate(self.sequence, 1):

99 monomer.number = number

100

101 class SequenceIndexMarkup(base.SequenceMarkup):

102

103 name = 'index'

104

105 def refresh(self):

106 for index, monomer in enumerate(self.sequence):

107 monomer.index = index

108

109 class AlignmentNumberMarkup(base.AlignmentMarkup):

110

111 name = 'number'

112

113 def refresh(self):

114 for number, column in enumerate(self.alignment.columns, 1):

115 self[column] = number

116

117 class AlignmentIndexMarkup(base.AlignmentMarkup):

118

119 name = 'index'

120

121 def refresh(self):

122 for index, column in enumerate(self.alignment.columns):

123 self[column] = index

124

125 class SequenceCaseMarkup(base.SequenceMarkup, MarkupIOMixin):

126

127 name = 'case'

128 io_class = 'SequenceCaseMarkup'

129 separator = ''

130 quotes = "'"

131

132 def refresh(self):

133 for monomer in self.sequence:

134 if monomer.input_code1.isupper():

135 monomer.case = 'upper'

136 elif monomer.input_code1.islower():

137 monomer.case = 'lower'

138

139 @staticmethod

140 def parse_item(monomer, value):

141 assert monomer.code1 == value.upper()

142 if value.isupper():

143 return 'upper'

144 if value.islower():

145 return 'lower'

146

147 @staticmethod

148 def repr_item(monomer, value):

149 if monomer.case == 'upper':

150 return monomer.code1.upper()

151 if monomer.case == 'lower':

152 return monomer.code1.lower()

153 raise AssertionError("Unknown monomer case")

154

155 class SequencePdbResiMarkup(base.SequenceMarkup, IntMarkupMixin):

156 name = 'pdb_resi'

157

158 def from_pdb(self):

159 for monomer in self.sequence:

160 try:

161 monomer.pdb_resi = monomer.pdb_residue.id[1]

162 except Exception:

163 pass

164

165 def add_pdb(self, download_pdb=None, xyz_only=False):

166 import structure

167 if download_pdb is None:

168 download_pdb = structure.cached_download_pdb

169

170 match = structure.pdb_id_parse(self.sequence.name)

171 code, model , chain = match['code'], match['model'], match['chain']

172 pdb_file = download_pdb(code)

173 pdb_structure = structure.get_structure(pdb_file, self.sequence.name)

174 pdb_chain = pdb_structure[0][chain]

175 if not xyz_only:

176 self.sequence.pdb_chain = pdb_chain

177 for monomer in self.sequence:

178 if monomer in self:

179 pdb_residue = pdb_chain[' ', monomer.pdb_resi, ' ']

180 monomer.ca_xyz = pdb_residue['CA'].get_vector()

181 if not xyz_only:

182 monomer.pdb_residue = pdb_residue

183

184 class SequenceSecondaryStructureMarkup(base.SequenceMarkup, MarkupIOMixin):

185 """ Secondary structure markup for sequence.

186

187 Depends on dsspcmbi program.

188 Sequence should be structure.SequenceMixin, pdb should be loaded.

189 Note that DSSP cannot handle mutiple models!

190 Note that dssp executable name is hardcoded (=dsspcmbi).

191

192 Notation:

193 * H -- alpha-helix

194 * B -- Isolated beta-bridge residue

195 * E -- Strand

196 * G -- 3-10 helix

197 * I -- pi-helix

198 * T -- Turn

199 * S -- Bend

200 * - -- Other

201 """

202

203 name = 'ss'

204 io_class = 'SequenceSecondaryStructureMarkup'

205

206 def refresh(self):

207 chain = self.sequence.pdb_chain

208 model = chain.get_parent()

209 pdb_file = NamedTemporaryFile(delete=False)

210 self.sequence.save_pdb(pdb_file)

211 pdb_file.close()

212 dssp=DSSP(model, pdb_file.name, dssp='dsspcmbi')

213 for monomer in self.sequence:

214 try:

215 monomer.ss = dssp[(chain.get_id(), monomer.pdb_residue.get_id())][1]

216 except:

217 monomer.ss = '?' # FIXME

218 os.unlink(pdb_file.name)

219

220 # This MUST be the last statement in this module.

221 update()

222

223 # vim: set ts=4 sts=4 sw=4 et: