allpy: bc4c291cc2b0 lib/block.py

allpy

view lib/block.py @ 148:bc4c291cc2b0

documentation improvements

author	boris <bnagaev@gmail.com>
date	Sun, 24 Oct 2010 23:17:24 +0400
parents	db9d116e979f
children	85fc264975a2

line source

1 #!usr/bin/python

3 import sys

5 import project

6 import sequence

7 import monomer

8 import config

9 from graph import Graph

10 from Bio.PDB import Superimposer

12 class Block(object):

13 """ Block of alignment

15 Mandatory data:

16 * self.project -- project object, which the block belongs to

17 * self.sequences - set of sequence objects that contain monomers

18 and/or gaps, that constitute the block

19 * self.positions -- sorted list of positions of the project.alignment that

20 are included in the block

22 Don't change self.sequences -- it may be a link to other block.sequences

24 How to create a new block:

25 >>> import project

26 >>> import block

27 >>> proj = project.Project(open("test.fasta"))

28 >>> block1 = block.Block(proj)

29 """

31 def __init__(self, project, sequences=None, positions=None):

32 """ Builds new block from project

34 if sequences==None, all sequences are used

35 if positions==None, all positions are used

36 """

37 if sequences == None:

38 sequences = set(project.sequences) # copy

39 if positions == None:

40 positions = range(len(project))

41 self.project = project

42 self.sequences = sequences

43 self.positions = positions

45 def save_fasta(self, out_file, long_line=60, gap='-'):

46 """ Saves alignment to given file in fasta-format

48 Splits long lines to substrings of length=long_line

49 To prevent this, set long_line=None

51 No changes in the names, descriptions or order of the sequences

52 are made.

53 """

54 for sequence in self.sequences:

55 out_file.write(">%(name)s %(description)s \n" % sequence.__dict__)

56 alignment_monomers = self.project.alignment[sequence]

57 block_monomers = [alignment_monomers[i] for i in self.positions]

58 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])

59 if long_line:

60 for i in range(0, len(string) // long_line + 1):

61 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])

62 else:

63 out_file.write("%s \n" % string)

65 def geometrical_cores(self, max_delta=config.delta,

66 timeout=config.timeout, minsize=config.minsize,

67 ac_new_atoms=config.ac_new_atoms,

68 ac_count=config.ac_count):

69 """ returns length-sorted list of blocks, representing GCs

71 max_delta -- threshold of distance spreading

72 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)

73 minsize -- min size of each core

74 ac_new_atoms -- min part or new atoms in new alternative core

75 current GC is compared with each of already selected GCs

76 if difference is less then ac_new_atoms, current GC is skipped

77 difference = part of new atoms in current core

78 ac_count -- max number of cores (including main core)

79 -1 means infinity

80 If more than one pdb chain for some sequence provided, consider all of them

81 cost is calculated as 1 / (delta + 1)

82 delta in [0, +inf) => cost in (0, 1]

83 """

84 nodes = self.positions

85 lines = {}

86 for i in self.positions:

87 for j in self.positions:

88 if i < j:

89 distances = []

90 for sequence in self.sequences:

91 for chain in sequence.pdb_chains:

92 m1 = self.project.alignment[sequence][i]

93 m2 = self.project.alignment[sequence][j]

94 if m1 and m2:

95 ca1 = m1.pdb_residues[chain]['CA']

96 ca2 = m2.pdb_residues[chain]['CA']

97 d = ca1 - ca2 # Bio.PDB feature

98 distances.append(d)

99 if len(distances) >= 2:

100 delta = max(distances) - min(distances)

101 if delta <= max_delta:

102 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)

103 graph = Graph(nodes, lines)

104 cliques = graph.cliques(timeout=timeout, minsize=minsize)

105 GCs = []

106 for clique in cliques:

107 for GC in GCs:

108 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):

109 break

110 else:

111 GCs.append(Block(self.project, self.sequences, clique))

112 if ac_count != -1 and len(GCs) >= ac_count:

113 break

114 return GCs

115

116 def xstring(self, x='X', gap='-'):

117 """ Returns string consisting of gap chars and chars x at self.positions

118

119 Length of returning string = length of project

120 """

121 monomers = [False] * len(self.project)

122 for i in self.positions:

123 monomers[i] = True

124 return ''.join([x if m else gap for m in monomers])

125

126 def save_xstring(self, out_file, name, description='', x='X', gap='-'):

127 """ Save xstring and name in fasta format """

128 out_file.write(">%(name)s %(description)s \n" % \

129 {'name':name, 'description':description})

130

131 out_file.write("%(xstring)s \n" % {'xstring':self.xstring(x=x, gap=gap)})

132

133 def monomers(self, sequence):

134 """ Iterates monomers of this sequence from this block """

135 alignment_sequence = self.project.alignment[sequence]

136 return (alignment_sequence[i] for i in self.positions)

137

138 def ca_atoms(self, sequence, pdb_chain):

139 """ Iterates Ca-atom of monomers of this sequence from this block """

140 return (monomer.pdb_residues[pdb_chain] for monomer in self.monomers())

141

142 def sequences_chains(self):

143 """ Iterates pairs (sequence, chain) """

144 for sequence in self.sequences:

145 for chain in sequence.pdb_chains:

146 yield (sequence, chain)

147

148 def superimpose(self):

149 """ Superimpose all pdb_chains in this block """

150 sequences_chains = []

151 if len(self.sequences) >= 1:

152 sup = Superimposer()

153 fixed_sequence, fixed_chain = sequences_chains.pop()

154 fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)

155 for sequence, chain in sequences_chains:

156 moving_atoms = self.ca_atoms(sequence, chain)

157 sup.set_atoms(fixed_atoms, moving_atoms)

158 # Apply rotation/translation to the moving atoms

159 sup.apply(moving_atoms)

160

161 def pdb_save(self, out_file):

162 """ Superimpose all pdb_chains in this block """

163 tmp_file, tmp_filename = mkstemp()

164 os.close(tmp_file) # this is file descriptor, not normal file object.

165 sequence.pdb_save(tmp_filename, chain)

166

167

168