allpy: e5c8deea6f83 allpy/pdb.py

allpy

view allpy/pdb.py @ 304:e5c8deea6f83

Added docstrings to allpy.util

author	Daniil Alexeyevsky <me.dendik@gmail.com>
date	Thu, 16 Dec 2010 19:15:34 +0300
parents	4a2341bc90b1
children	4d610c277281

line source

1 """ Functions to get pdb information from fasta id

2 and to generate fasta id from pdb information

4 pdb information: code, chain, model

6 TODO: same for local pdb files

7 """

9 import re

10 import base

11 from Bio.PDB import PDBParser

12 from Bio.PDB import Superimposer, CaPPBuilder, PDBIO

15 # for pdb-codes

16 re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I)

18 #~ # for files

19 #~ re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$")

21 def std_id(pdb_id, pdb_chain, pdb_model=None):

22 if pdb_model:

23 return "%s_%s_%s" % \

24 (pdb_id.lower().strip(), pdb_chain.upper().strip(), pdb_model)

25 else:

26 return "%s_%s" % \

27 (pdb_id.lower().strip(), pdb_chain.upper().strip())

29 def pdb_id_parse(ID):

30 match = re1.search(ID)

31 if not match:

32 return None

33 d = match.groupdict()

34 if 'chain' not in d or not d['chain']:

35 d['chain'] = ' '

36 if 'model' not in d or not d['model']:

37 d['model'] = 0

38 return d

41 def get_structure(file, name):

42 return PDBParser().get_structure(name, file)

44 #~ def std_id_parse(ID):

45 #~ """

46 #~ Parse standart ID to pdb_code, chain and model

47 #~ """

48 #~ if '.ent' in ID.lower() or '.pdb' in ID.lower():

49 #~ # it is file

50 #~ parseO = self.re2.search(ID) # files

51 #~ else:

52 #~ parseO = self.re1.search(ID.lower()) # pdb codes

53 #~ if not parseO:

54 #~ return None

55 #~ parse = parseO.groups()

56 #~ if len(parse) < 2:

57 #~ return None

58 #~ code = parse[1]

59 #~ chain = ''

60 #~ model = None

61 #~ if len(parse) >= 4:

62 #~ chain = parse[3]

63 #~ if chain:

64 #~ chain = chain.upper()

65 #~ if len(parse) >= 6:

66 #~ if parse[5]:

67 #~ model = parse[5]

68 #~ return code, chain, model

70 class Sequence(base.Sequence):

71 """Sequence with associated PDB structure.

73 Attributes:

75 * pdb_chain -- Bio.PDB.Chain

76 * pdb_file -- file object

78 * pdb_residues -- {Monomer: Bio.PDB.Residue}

79 * pdb_secstr -- {Monomer: 'Secondary structure'}

80 Code Secondary structure

81 H alpha-helix

82 B Isolated beta-bridge residue

83 E Strand

84 G 3-10 helix

85 I pi-helix

86 T Turn

87 S Bend

88 - Other

91 ?TODO: global pdb_structures

92 """

94 def __init__(self, *args, **kw):

95 self.pdb_chains = []

96 self.pdb_files = {}

97 self.pdb_residues = {}

98 self.pdb_secstr = {}

100 def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):

101 """ Reads Pdb chain from file

102

103 and align each Monomer with PDB.Residue (TODO)

104 """

105 name = std_id(pdb_id, pdb_chain, pdb_model)

106 structure = get_structure(pdb_file, name)

107 chain = structure[pdb_model][pdb_chain]

108 self.pdb_chains.append(chain)

109 self.pdb_residues[chain] = {}

110 self.pdb_secstr[chain] = {}

111 pdb_sequence = Sequence.from_pdb_chain(chain)

112 a = alignment.Alignment.from_sequences(self, pdb_sequence)

113 a.muscle_align()

114 for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self):

115 if pdb_sequence.pdb_has(chain, pdb_monomer):

116 residue = pdb_sequence.pdb_residues[chain][pdb_monomer]

117 self.pdb_residues[chain][monomer] = residue

118 self.pdb_files[chain] = pdb_file

119

120 def pdb_unload(self):

121 """ Delete all pdb-connected links """

122 #~ gc.get_referrers(self.pdb_chains[0])

123 self.pdb_chains = []

124 self.pdb_residues = {}

125 self.pdb_secstr = {} # FIXME

126 self.pdb_files = {} # FIXME

127

128 @staticmethod

129 def from_pdb_chain(chain):

130 """ Returns Sequence with Monomers with link to Bio.PDB.Residue

131

132 chain is Bio.PDB.Chain

133 """

134 cappbuilder = CaPPBuilder()

135 peptides = cappbuilder.build_peptides(chain)

136 sequence = Sequence()

137 sequence.pdb_chains = [chain]

138 sequence.pdb_residues[chain] = {}

139 sequence.pdb_secstr[chain] = {}

140 for peptide in peptides:

141 for ca_atom in peptide.get_ca_list():

142 residue = ca_atom.get_parent()

143 monomer = AminoAcidType.from_pdb_residue(residue).instance()

144 sequence.pdb_residues[chain][monomer] = residue

145 sequence.monomers.append(monomer)

146 return sequence

147

148 def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'):

149 """ Adds pdb information to each monomer

150

151 Returns if information has been successfully added

152 TODO: conformity_file

153

154 id-format lava flow

155 """

156 if not conformity_info:

157 path = os.path.join(pdb_directory, self.name)

158 if os.path.exists(path) and os.path.getsize(path):

159 match = pdb_id_parse(self.name)

160 self.pdb_chain_add(open(path), match['code'],

161 match['chain'], match['model'])

162 else:

163 match = pdb_id_parse(self.name)

164 if match:

165 code = match['code']

166 pdb_filename = config.pdb_dir % code

167 if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename):

168 url = config.pdb_url % code

169 print "Download %s" % url

170 pdb_file = open(pdb_filename, 'w')

171 data = urllib2.urlopen(url).read()

172 pdb_file.write(data)

173 pdb_file.close()

174 print "Save %s" % pdb_filename

175 pdb_file = open(pdb_filename)

176 self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])

177

178 def pdb_save(self, out_filename, pdb_chain):

179 """ Saves pdb_chain to out_file """

180 class GlySelect(Select):

181 def accept_chain(self, chain):

182 if chain == pdb_chain:

183 return 1

184 else:

185 return 0

186 io = PDBIO()

187 structure = chain.get_parent()

188 io.set_structure(structure)

189 io.save(out_filename, GlySelect())

190

191

192 def pdb_add_sec_str(self, pdb_chain):

193 """ Add secondary structure data """

194 tmp_file = NamedTemporaryFile(delete=False)

195 tmp_file.close()

196 pdb_file = self.pdb_files[pdb_chain].name

197 os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})

198 dssp, keys = make_dssp_dict(tmp_file.name)

199 for monomer in self.monomers:

200 if self.pdb_has(pdb_chain, monomer):

201 residue = self.pdb_residues[pdb_chain][monomer]

202 try:

203 d = dssp[(pdb_chain.get_id(), residue.get_id())]

204 self.pdb_secstr[pdb_chain][monomer] = d[1]

205 except:

206 print "No dssp information about %s at %s" % (monomer, pdb_chain)

207 os.unlink(tmp_file.name)

208

209 def pdb_has(self, chain, monomer):

210 return chain in self.pdb_residues and monomer in self.pdb_residues[chain]

211

212 def secstr_has(self, chain, monomer):

213 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]

214

215

216 class Alignment(base.Alignment):

217

218 def secstr(self, sequence, pdb_chain, gap='-'):

219 """ Returns string representing secondary structure """

220 return ''.join([

221 (sequence.pdb_secstr[pdb_chain][m] if sequence.secstr_has(pdb_chain, m) else gap)

222 for m in self.body[sequence]])

223

224 # vim: set ts=4 sts=4 sw=4 et: