allpy: 2ed0c183867a lib/sequence.py

allpy

view lib/sequence.py @ 158:2ed0c183867a

fix. delete sequence.pdb_files on pdb_unload. however memory leak remains

author	boris <bnagaev@gmail.com>
date	Thu, 28 Oct 2010 08:14:23 +0400
parents	9a42bc8d5198
children	fa6ef3e58128

line source

1 #!/usr/bin/python

2 # -*- coding: utf-8 -*-

4 from monomer import AminoAcidType

5 from Bio.PDB import CaPPBuilder, PDBIO

6 from Bio.PDB.DSSP import make_dssp_dict

7 from allpy_pdb import std_id, pdb_id_parse, get_structure

8 import project

9 import sys

10 import config

11 import os.path

12 import urllib2

13 from tempfile import NamedTemporaryFile

14 import os

17 class Sequence(object):

18 """ Sequence of Monomers

20 Mandatory data:

21 * name -- str with the name of sequence

22 * description -- str with description of the sequence

23 * monomers -- list of monomer objects (aminoacids or nucleotides)

24 * pdb_chains -- list of Bio.PDB.Chain's

25 * pdb_files -- dictionary like {Bio.PDB.Chain: file_obj}

27 * pdb_residues -- dictionary like {Bio.PDB.Chain: {Monomer: Bio.PDB.Residue}}

28 * pdb_secstr -- dictionary like {Bio.PDB.Chain: {Monomer: 'Secondary structure'}}

29 Code Secondary structure

30 H ??-helix

31 B Isolated ??-bridge residue

32 E Strand

33 G 3-10 helix

34 I ??-helix

35 T Turn

36 S Bend

37 - Other

40 ?TODO: global pdb_structures

41 """

42 def __init__(self, monomers=None, name='', description=""):

43 if not monomers:

44 monomers = []

45 self.name = name

46 self.description = description

47 self.monomers = monomers

48 self.pdb_chains = []

49 self.pdb_files = {}

50 self.pdb_residues = {}

51 self.pdb_secstr = {}

53 def __len__(self):

54 return len(self.monomers)

56 def __str__(self):

57 """ Returns sequence in one-letter code """

58 return ''.join([monomer.type.code1 for monomer in self.monomers])

60 def __eq__(self, other):

61 """ Returns if all corresponding monomers of this sequences are equal

63 If lengths of sequences are not equal, returns False

64 """

65 return len(self) == len(other) and \

66 all([a==b for a, b in zip(self.monomers, other.monomers)])

68 def __ne__(self, other):

69 return not (self == other)

71 def pdb_chain_add(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):

72 """ Reads Pdb chain from file

74 and align each Monomer with PDB.Residue (TODO)

75 """

76 name = std_id(pdb_id, pdb_chain, pdb_model)

77 structure = get_structure(pdb_file, name)

78 chain = structure[pdb_model][pdb_chain]

79 self.pdb_chains.append(chain)

80 self.pdb_residues[chain] = {}

81 self.pdb_secstr[chain] = {}

82 pdb_sequence = Sequence.from_pdb_chain(chain)

83 alignment = project.Project.from_sequences(self, pdb_sequence)

84 alignment.muscle_align()

85 for monomer, pdb_monomer in alignment.column(sequence=pdb_sequence, original=self):

86 if pdb_sequence.pdb_has(chain, pdb_monomer):

87 residue = pdb_sequence.pdb_residues[chain][pdb_monomer]

88 self.pdb_residues[chain][monomer] = residue

89 self.pdb_files[chain] = pdb_file

91 def pdb_unload(self):

92 """ Delete all pdb-connected links """

93 self.pdb_chains = []

94 self.pdb_residues = {}

95 self.pdb_secstr = {} # FIXME

96 self.pdb_files = {} # FIXME

98 @staticmethod

99 def from_str(fasta_str, name='', description='', monomer_kind=AminoAcidType):

100 """ Import data from one-letter code

101

102 monomer_kind is class, inherited from MonomerType

103 """

104 monomers = [monomer_kind.from_code1(aa).instance() for aa in fasta_str]

105 return Sequence(monomers, name, description)

106

107 @staticmethod

108 def from_pdb_chain(chain):

109 """ Returns Sequence with Monomers with link to Bio.PDB.Residue

110

111 chain is Bio.PDB.Chain

112 """

113 cappbuilder = CaPPBuilder()

114 peptides = cappbuilder.build_peptides(chain)

115 sequence = Sequence()

116 sequence.pdb_chains = [chain]

117 sequence.pdb_residues[chain] = {}

118 sequence.pdb_secstr[chain] = {}

119 for peptide in peptides:

120 for ca_atom in peptide.get_ca_list():

121 residue = ca_atom.get_parent()

122 monomer = AminoAcidType.from_pdb_residue(residue).instance()

123 sequence.pdb_residues[chain][monomer] = residue

124 sequence.monomers.append(monomer)

125 return sequence

126

127 def pdb_auto_add(self, conformity_info=None, pdb_directory='./pdb'):

128 """ Adds pdb information to each monomer

129

130 Returns if information has been successfully added

131 TODO: conformity_file

132

133 id-format lava flow

134 """

135 if not conformity_info:

136 path = os.path.join(pdb_directory, self.name)

137 if os.path.exists(path):

138 match = pdb_id_parse(self.name)

139 self.pdb_chain_add(open(path), match['code'],

140 match['chain'], match['model'])

141 else:

142 match = pdb_id_parse(self.name)

143 if match:

144 code = match['code']

145 pdb_filename = config.pdb_dir % code

146 if not os.path.exists(pdb_filename):

147 url = config.pdb_url % code

148 print "Download %s" % url

149 pdb_file = open(pdb_filename, 'w')

150 data = urllib2.urlopen(url).read()

151 pdb_file.write(data)

152 pdb_file.close()

153 print "Save %s" % pdb_filename

154 pdb_file = open(pdb_filename)

155 self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])

156

157 def pdb_save(self, out_filename, pdb_chain):

158 """ Saves pdb_chain to out_file """

159 class GlySelect(Select):

160 def accept_chain(self, chain):

161 if chain == pdb_chain:

162 return 1

163 else:

164 return 0

165 io = PDBIO()

166 structure = chain.get_parent()

167 io.set_structure(structure)

168 io.save(out_filename, GlySelect())

169

170

171 def pdb_add_sec_str(self, pdb_chain):

172 """ Add secondary structure data """

173 tmp_file = NamedTemporaryFile(delete=False)

174 tmp_file.close()

175 pdb_file = self.pdb_files[pdb_chain].name

176 os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})

177 dssp, keys = make_dssp_dict(tmp_file.name)

178 for monomer in self.monomers:

179 if self.pdb_has(pdb_chain, monomer):

180 residue = self.pdb_residues[pdb_chain][monomer]

181 try:

182 d = dssp[(pdb_chain.get_id(), residue.get_id())]

183 self.pdb_secstr[pdb_chain][monomer] = d[1]

184 except:

185 print "No dssp information about %s at %s" % (monomer, pdb_chain)

186 os.unlink(tmp_file.name)

187

188 def pdb_has(self, chain, monomer):

189 return chain in self.pdb_residues and monomer in self.pdb_residues[chain]

190

191 def secstr_has(self, chain, monomer):

192 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]