rev |
line source |
bnagaev@89
|
1 """ |
bnagaev@89
|
2 Usage: |
bnagaev@89
|
3 |
bnagaev@89
|
4 seq_in=[] |
bnagaev@89
|
5 seq_in.append("SSNAKIDQLSSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRLDNM") |
bnagaev@89
|
6 seq_in.append("NAKADQASSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRADNAA") |
bnagaev@89
|
7 l=AlignmentSeq(seq_in) |
bnagaev@89
|
8 for t in l.new_sequences: |
bnagaev@89
|
9 print t |
bnagaev@89
|
10 |
bnagaev@89
|
11 Why not to reimplement it? |
bnagaev@89
|
12 Lava flow |
bnagaev@89
|
13 http://www.insidecpp.ru/antipatterns/lava_flow/ |
bnagaev@89
|
14 """ |
bnagaev@89
|
15 |
bnagaev@89
|
16 from allpy_data.blossum62 import matrix, gaps |
bnagaev@89
|
17 |
bnagaev@89
|
18 class AlignmentSeq(object): |
bnagaev@89
|
19 |
bnagaev@89
|
20 def __init__(self, sequences): |
bnagaev@89
|
21 """ |
bnagaev@89
|
22 sequences are strings |
bnagaev@89
|
23 |
bnagaev@89
|
24 new_sequences -- aligned sequences |
bnagaev@89
|
25 connections -- list of dicts like { [new_atom_id] => old_atom_id } |
bnagaev@89
|
26 common -- list of [list_of_letters, list_of_chain_numbers] |
bnagaev@89
|
27 """ |
bnagaev@89
|
28 self.old_sequences = [] |
bnagaev@89
|
29 self.new_sequences = [] |
bnagaev@89
|
30 self.connections = [] |
bnagaev@89
|
31 for seq in sequences: |
bnagaev@89
|
32 self.old_sequences.append(seq.upper().replace('-','').replace(' ','')) |
bnagaev@89
|
33 self.common=[] |
bnagaev@89
|
34 for i in xrange(len(self.old_sequences)): |
bnagaev@89
|
35 self.unite(i) |
bnagaev@89
|
36 for i in xrange(len(self.old_sequences)): |
bnagaev@89
|
37 self.lining(i) |
bnagaev@89
|
38 |
bnagaev@89
|
39 @staticmethod |
bnagaev@89
|
40 def cost(a1, a2): |
bnagaev@89
|
41 """ |
bnagaev@89
|
42 returns cost of aligning of aminoacids a1 and a2 |
bnagaev@89
|
43 """ |
bnagaev@89
|
44 a1 = a1.upper() |
bnagaev@89
|
45 a2 = a2.upper() |
bnagaev@89
|
46 if a1 in matrix: |
bnagaev@89
|
47 if a2 in matrix[a1]: |
bnagaev@89
|
48 return matrix[a1][a2] |
bnagaev@89
|
49 return gaps[0] |
bnagaev@89
|
50 |
bnagaev@89
|
51 @staticmethod |
bnagaev@89
|
52 def gap_cost(self, gaps_count): |
bnagaev@89
|
53 """ |
bnagaev@89
|
54 returns penalty of appending (to the right end) one more gap |
bnagaev@89
|
55 gaps_count -- real number of gaps |
bnagaev@89
|
56 """ |
bnagaev@89
|
57 if gaps_count >= len(gaps): |
bnagaev@89
|
58 return gaps[(len(gaps)-1)] |
bnagaev@89
|
59 else: |
bnagaev@89
|
60 return gaps[gaps_count] |
bnagaev@89
|
61 |
bnagaev@89
|
62 # TO BE DONE |
bnagaev@89
|
63 |
bnagaev@89
|
64 #def unite(self, chainN): |
bnagaev@89
|
65 #""" |
bnagaev@89
|
66 #alignment list creation |
bnagaev@89
|
67 #chainN - chain number |
bnagaev@89
|
68 #""" |
bnagaev@89
|
69 #str1 = self.old_sequences[chainN] |
bnagaev@89
|
70 #len1 = len(str1) |
bnagaev@89
|
71 |
bnagaev@89
|
72 #if not self.common: |
bnagaev@89
|
73 #i = 0 |
bnagaev@89
|
74 #while i < len1: |
bnagaev@89
|
75 #aminoacids = [str1[i]] |
bnagaev@89
|
76 #chains = [chainN] |
bnagaev@89
|
77 #self.common.append([aminoacids,chains]) |
bnagaev@89
|
78 #i += 1 |
bnagaev@89
|
79 #return |
bnagaev@89
|
80 |
bnagaev@89
|
81 #len2 = len(self.common) |
bnagaev@89
|
82 |
bnagaev@89
|
83 #d = [] |
bnagaev@89
|
84 #tip_from = [] |
bnagaev@89
|
85 #already_gaps = [] |
bnagaev@89
|
86 |
bnagaev@89
|
87 #for i in xrange(len1 + 1): |
bnagaev@89
|
88 #d.append([]) |
bnagaev@89
|
89 #already_gaps.append([]) |
bnagaev@89
|
90 #tip_from.append([]) |
bnagaev@89
|
91 #for j in xrange(len2 + 1): |
bnagaev@89
|
92 #d[i].append(0) |
bnagaev@89
|
93 #already_gaps[i].append([0, 0]) |
bnagaev@89
|
94 #tip_from[i].append(0) |
bnagaev@89
|
95 |
bnagaev@89
|
96 #for i in xrange(1, len1 + 1): |
bnagaev@89
|
97 #for j in xrange(1, len2 + 1): |
bnagaev@89
|
98 |
bnagaev@89
|
99 #costs = [] |
bnagaev@89
|
100 #for A in self.common[j - 1][0]: |
bnagaev@89
|
101 #costs.append(self.cost(str1[i - 1], A)) |
bnagaev@89
|
102 #cost = max(costs) |
bnagaev@89
|
103 |
bnagaev@89
|
104 #insertion = d[i - 1][j] |
bnagaev@89
|
105 #if j != len2: # non-end gap |
bnagaev@89
|
106 #insertion += self.gap_cost(already_gaps[i - 1][j][1]) |
bnagaev@89
|
107 |
bnagaev@89
|
108 #deletion = d[i][j - 1] |
bnagaev@89
|
109 #if i != len1: # non-end gap |
bnagaev@89
|
110 #deletion += self.gap_cost(already_gaps[i][j - 1][0]) |
bnagaev@89
|
111 |
bnagaev@89
|
112 #substitution = d[i - 1][j - 1] + cost |
bnagaev@89
|
113 #max_way = max(insertion, deletion, substitution) |
bnagaev@89
|
114 #d[i][j] = max_way |
bnagaev@89
|
115 |
bnagaev@89
|
116 #if max_way == substitution: |
bnagaev@89
|
117 #tip = 3 |
bnagaev@89
|
118 #if max_way == deletion: |
bnagaev@89
|
119 #tip = 2 |
bnagaev@89
|
120 #if max_way == insertion: |
bnagaev@89
|
121 #tip = 1 |
bnagaev@89
|
122 |
bnagaev@89
|
123 #if tip == 1: # insertion |
bnagaev@89
|
124 #already_gaps[i][j]=[0, (already_gaps[i-1][j][1]+1) ] |
bnagaev@89
|
125 #if tip == 2: # deletion |
bnagaev@89
|
126 #already_gaps[i][j]=[ (already_gaps[i][j-1][0]+1), 0 ] |
bnagaev@89
|
127 #if tip == 3: # substitution |
bnagaev@89
|
128 #already_gaps[i][j]=[ 0, 0 ] |
bnagaev@89
|
129 |
bnagaev@89
|
130 #tip_from[i][j] = tip |
bnagaev@89
|
131 |
bnagaev@89
|
132 #i = len1 |
bnagaev@89
|
133 #j = len2 |
bnagaev@89
|
134 |
bnagaev@89
|
135 #common = [] |
bnagaev@89
|
136 |
bnagaev@89
|
137 #while i > 0 or j > 0: |
bnagaev@89
|
138 #tip = tip_from[i][j] |
bnagaev@89
|
139 |
bnagaev@89
|
140 #if tip == 1 or j == 0 and i > 0: |
bnagaev@89
|
141 |
bnagaev@89
|
142 #aminoacids = [(str1[i-1])] |
bnagaev@89
|
143 #chains = [chainN] |
bnagaev@89
|
144 |
bnagaev@89
|
145 #common.append([aminoacids, chains]) |
bnagaev@89
|
146 |
bnagaev@89
|
147 #i -= 1 |
bnagaev@89
|
148 |
bnagaev@89
|
149 |
bnagaev@89
|
150 |
bnagaev@89
|
151 #if tip==2 or (i==0 and j>0): |
bnagaev@89
|
152 |
bnagaev@89
|
153 #common.append(self.common[j-1]) |
bnagaev@89
|
154 #j-=1 |
bnagaev@89
|
155 |
bnagaev@89
|
156 |
bnagaev@89
|
157 #if (tip==3): |
bnagaev@89
|
158 |
bnagaev@89
|
159 #chains=self.common[j-1][1] |
bnagaev@89
|
160 #chains.append(chainN) |
bnagaev@89
|
161 |
bnagaev@89
|
162 #aminoacids=self.common[j-1][0] |
bnagaev@89
|
163 |
bnagaev@89
|
164 #if (not aminoacids.count(str1[i-1])): |
bnagaev@89
|
165 #aminoacids.append(str1[i-1]) |
bnagaev@89
|
166 |
bnagaev@89
|
167 #common.append([aminoacids,chains]) |
bnagaev@89
|
168 |
bnagaev@89
|
169 #i-=1 |
bnagaev@89
|
170 #j-=1 |
bnagaev@89
|
171 |
bnagaev@89
|
172 |
bnagaev@89
|
173 |
bnagaev@89
|
174 #common.reverse() |
bnagaev@89
|
175 |
bnagaev@89
|
176 #self.common=common |
bnagaev@89
|
177 |
bnagaev@89
|
178 |
bnagaev@89
|
179 |
bnagaev@89
|
180 |
bnagaev@89
|
181 |
bnagaev@89
|
182 |
bnagaev@89
|
183 |
bnagaev@89
|
184 |
bnagaev@89
|
185 |
bnagaev@89
|
186 |
bnagaev@89
|
187 |
bnagaev@89
|
188 |
bnagaev@89
|
189 |
bnagaev@89
|
190 |
bnagaev@89
|
191 |
bnagaev@89
|
192 |
bnagaev@89
|
193 |
bnagaev@89
|
194 |
bnagaev@89
|
195 |
bnagaev@89
|
196 |
bnagaev@89
|
197 |
bnagaev@89
|
198 |
bnagaev@89
|
199 |
bnagaev@89
|
200 #def lining(self,chainN): |
bnagaev@89
|
201 |
bnagaev@89
|
202 |
bnagaev@89
|
203 #""" |
bnagaev@89
|
204 #????? ??????? ????? ??????????? ?????????????????? |
bnagaev@89
|
205 #? self.new_sequences |
bnagaev@89
|
206 |
bnagaev@89
|
207 #chainN - ????? ???? |
bnagaev@89
|
208 #""" |
bnagaev@89
|
209 |
bnagaev@89
|
210 #str1=self.old_sequences[chainN] |
bnagaev@89
|
211 #len1=len(str1) |
bnagaev@89
|
212 |
bnagaev@89
|
213 #len2=len(self.common) |
bnagaev@89
|
214 |
bnagaev@89
|
215 |
bnagaev@89
|
216 #new_seq='' |
bnagaev@89
|
217 #position_in_old=0 |
bnagaev@89
|
218 |
bnagaev@89
|
219 #for common_1 in self.common: |
bnagaev@89
|
220 #if (common_1[1].count(chainN)): |
bnagaev@89
|
221 #new_seq = new_seq + str1[position_in_old] |
bnagaev@89
|
222 #position_in_old += 1 |
bnagaev@89
|
223 #else: |
bnagaev@89
|
224 #new_seq = new_seq + '-' |
bnagaev@89
|
225 |
bnagaev@89
|
226 #self.new_sequences.append(new_seq) |
bnagaev@89
|
227 |
bnagaev@89
|
228 |
bnagaev@89
|
229 |
bnagaev@89
|
230 |
bnagaev@89
|
231 |
bnagaev@89
|
232 |
bnagaev@89
|
233
|