allpy: 6507cd6808c7 allpy/sequence.py

allpy

view allpy/sequence.py @ 244:6507cd6808c7

allpy: monomers completed _monomers.py -- provate module for monomers other modules imports MonomerType's descendant from _monomer and inherit from Monomer type thre are some problems with Monomer -- MonomerType links

author	boris <bnagaev@gmail.com>
date	Fri, 03 Dec 2010 23:24:43 +0300
parents	280a8069c206
children	deae8240d396

line source

1 #!/usr/bin/python

2 # -*- coding: utf-8 -*-

4 from monomer import AminoAcidType

5 from Bio.PDB import CaPPBuilder, PDBIO

6 from Bio.PDB.DSSP import make_dssp_dict

7 from allpy_pdb import std_id, pdb_id_parse, get_structure

8 import alignment

9 import sys

10 import config

11 import os.path

12 import urllib2

13 from tempfile import NamedTemporaryFile

14 import os

17 class Sequence(list):

18 """ Sequence of Monomers

20 list of monomer objects (aminoacids or nucleotides)

22 Mandatory data:

23 * name -- str with the name of sequence

24 * description -- str with description of the sequence

26 Optional (may be empty):

27 * source -- source of sequence

28 * pdb_chain -- Bio.PDB.Chain

29 * pdb_file -- file object

31 * pdb_residues -- {Monomer: Bio.PDB.Residue}

32 * pdb_secstr -- {Monomer: 'Secondary structure'}

33 Code Secondary structure

34 H alpha-helix

35 B Isolated beta-bridge residue

36 E Strand

37 G 3-10 helix

38 I pi-helix

39 T Turn

40 S Bend

41 - Other

44 ?TODO: global pdb_structures

45 """

46 def __init__(self, monomers=None, name='', description=""):

47 if not monomers:

48 monomers = []

49 self.name = name

50 self.description = description

51 self.monomers = monomers

52 self.pdb_chains = []

53 self.pdb_files = {}

54 self.pdb_residues = {}

55 self.pdb_secstr = {}

57 def __len__(self):

58 return len(self.monomers)

60 def __str__(self):

61 """ Returns sequence in one-letter code """

62 return ''.join([monomer.type.code1 for monomer in self.monomers])

64 def __eq__(self, other):

65 """ Returns if all corresponding monomers of this sequences are equal

67 If lengths of sequences are not equal, returns False

68 """

69 return len(self) == len(other) and \

70 all([a==b for a, b in zip(self.monomers, other.monomers)])

72 def __ne__(self, other):

73 return not (self == other)

75 def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):

76 """ Reads Pdb chain from file

78 and align each Monomer with PDB.Residue (TODO)

79 """

80 name = std_id(pdb_id, pdb_chain, pdb_model)

81 structure = get_structure(pdb_file, name)

82 chain = structure[pdb_model][pdb_chain]

83 self.pdb_chains.append(chain)

84 self.pdb_residues[chain] = {}

85 self.pdb_secstr[chain] = {}

86 pdb_sequence = Sequence.from_pdb_chain(chain)

87 a = alignment.Alignment.from_sequences(self, pdb_sequence)

88 a.muscle_align()

89 for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self):

90 if pdb_sequence.pdb_has(chain, pdb_monomer):

91 residue = pdb_sequence.pdb_residues[chain][pdb_monomer]

92 self.pdb_residues[chain][monomer] = residue

93 self.pdb_files[chain] = pdb_file

95 def pdb_unload(self):

96 """ Delete all pdb-connected links """

97 #~ gc.get_referrers(self.pdb_chains[0])

98 self.pdb_chains = []

99 self.pdb_residues = {}

100 self.pdb_secstr = {} # FIXME

101 self.pdb_files = {} # FIXME

102

103 @staticmethod

104 def from_str(fasta_str, name='', description='', monomer_kind=AminoAcidType):

105 """ Import data from one-letter code

106

107 monomer_kind is class, inherited from MonomerType

108 """

109 monomers = [monomer_kind.from_code1(aa).instance() for aa in fasta_str]

110 return Sequence(monomers, name, description)

111

112 @staticmethod

113 def from_pdb_chain(chain):

114 """ Returns Sequence with Monomers with link to Bio.PDB.Residue

115

116 chain is Bio.PDB.Chain

117 """

118 cappbuilder = CaPPBuilder()

119 peptides = cappbuilder.build_peptides(chain)

120 sequence = Sequence()

121 sequence.pdb_chains = [chain]

122 sequence.pdb_residues[chain] = {}

123 sequence.pdb_secstr[chain] = {}

124 for peptide in peptides:

125 for ca_atom in peptide.get_ca_list():

126 residue = ca_atom.get_parent()

127 monomer = AminoAcidType.from_pdb_residue(residue).instance()

128 sequence.pdb_residues[chain][monomer] = residue

129 sequence.monomers.append(monomer)

130 return sequence

131

132 def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'):

133 """ Adds pdb information to each monomer

134

135 Returns if information has been successfully added

136 TODO: conformity_file

137

138 id-format lava flow

139 """

140 if not conformity_info:

141 path = os.path.join(pdb_directory, self.name)

142 if os.path.exists(path) and os.path.getsize(path):

143 match = pdb_id_parse(self.name)

144 self.pdb_chain_add(open(path), match['code'],

145 match['chain'], match['model'])

146 else:

147 match = pdb_id_parse(self.name)

148 if match:

149 code = match['code']

150 pdb_filename = config.pdb_dir % code

151 if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename):

152 url = config.pdb_url % code

153 print "Download %s" % url

154 pdb_file = open(pdb_filename, 'w')

155 data = urllib2.urlopen(url).read()

156 pdb_file.write(data)

157 pdb_file.close()

158 print "Save %s" % pdb_filename

159 pdb_file = open(pdb_filename)

160 self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])

161

162 def pdb_save(self, out_filename, pdb_chain):

163 """ Saves pdb_chain to out_file """

164 class GlySelect(Select):

165 def accept_chain(self, chain):

166 if chain == pdb_chain:

167 return 1

168 else:

169 return 0

170 io = PDBIO()

171 structure = chain.get_parent()

172 io.set_structure(structure)

173 io.save(out_filename, GlySelect())

174

175

176 def pdb_add_sec_str(self, pdb_chain):

177 """ Add secondary structure data """

178 tmp_file = NamedTemporaryFile(delete=False)

179 tmp_file.close()

180 pdb_file = self.pdb_files[pdb_chain].name

181 os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})

182 dssp, keys = make_dssp_dict(tmp_file.name)

183 for monomer in self.monomers:

184 if self.pdb_has(pdb_chain, monomer):

185 residue = self.pdb_residues[pdb_chain][monomer]

186 try:

187 d = dssp[(pdb_chain.get_id(), residue.get_id())]

188 self.pdb_secstr[pdb_chain][monomer] = d[1]

189 except:

190 print "No dssp information about %s at %s" % (monomer, pdb_chain)

191 os.unlink(tmp_file.name)

192

193 def pdb_has(self, chain, monomer):

194 return chain in self.pdb_residues and monomer in self.pdb_residues[chain]

195

196 def secstr_has(self, chain, monomer):

197 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]

198

199 @staticmethod

200 def file_slice(file, n_from, n_to, fasta_name='', name='', description='', monomer_kind=AminoAcidType):

201 """ Build and return sequence, consisting of part of sequence from file

202

203 Does not control gaps

204 """

205 inside = False

206 number_used = 0

207 s = ''

208 for line in file:

209 line = line.split()

210 if not inside:

211 if line.startswith('>%s' % fasta_name):

212 inside = True

213 else:

214 n = len(line)

215 s += line[(n_from - number_user):(n_to - number_user)]

216 return Sequence.from_str(s, name, description, monomer_kind)

217

218 class ProteinSequence(Sequence):

219 """ """

220 pass

221