Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/annotate/3d40d7b9a123/lib/align.py
Дата изменения: Unknown
Дата индексирования: Sun Mar 2 02:39:32 2014
Кодировка:
allpy: lib/align.py annotate

allpy

annotate lib/align.py @ 98:3d40d7b9a123

lib::project::muscle_align project::save_fasta sequence::operator== fixes test -- alignment done
author boris <bnagaev@gmail.com>
date Wed, 20 Oct 2010 23:33:29 +0400
parents
children
rev   line source
bnagaev@89 1 """
bnagaev@89 2 Usage:
bnagaev@89 3
bnagaev@89 4 seq_in=[]
bnagaev@89 5 seq_in.append("SSNAKIDQLSSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRLDNM")
bnagaev@89 6 seq_in.append("NAKADQASSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRADNAA")
bnagaev@89 7 l=AlignmentSeq(seq_in)
bnagaev@89 8 for t in l.new_sequences:
bnagaev@89 9 print t
bnagaev@89 10
bnagaev@89 11 Why not to reimplement it?
bnagaev@89 12 Lava flow
bnagaev@89 13 http://www.insidecpp.ru/antipatterns/lava_flow/
bnagaev@89 14 """
bnagaev@89 15
bnagaev@89 16 from allpy_data.blossum62 import matrix, gaps
bnagaev@89 17
bnagaev@89 18 class AlignmentSeq(object):
bnagaev@89 19
bnagaev@89 20 def __init__(self, sequences):
bnagaev@89 21 """
bnagaev@89 22 sequences are strings
bnagaev@89 23
bnagaev@89 24 new_sequences -- aligned sequences
bnagaev@89 25 connections -- list of dicts like { [new_atom_id] => old_atom_id }
bnagaev@89 26 common -- list of [list_of_letters, list_of_chain_numbers]
bnagaev@89 27 """
bnagaev@89 28 self.old_sequences = []
bnagaev@89 29 self.new_sequences = []
bnagaev@89 30 self.connections = []
bnagaev@89 31 for seq in sequences:
bnagaev@89 32 self.old_sequences.append(seq.upper().replace('-','').replace(' ',''))
bnagaev@89 33 self.common=[]
bnagaev@89 34 for i in xrange(len(self.old_sequences)):
bnagaev@89 35 self.unite(i)
bnagaev@89 36 for i in xrange(len(self.old_sequences)):
bnagaev@89 37 self.lining(i)
bnagaev@89 38
bnagaev@89 39 @staticmethod
bnagaev@89 40 def cost(a1, a2):
bnagaev@89 41 """
bnagaev@89 42 returns cost of aligning of aminoacids a1 and a2
bnagaev@89 43 """
bnagaev@89 44 a1 = a1.upper()
bnagaev@89 45 a2 = a2.upper()
bnagaev@89 46 if a1 in matrix:
bnagaev@89 47 if a2 in matrix[a1]:
bnagaev@89 48 return matrix[a1][a2]
bnagaev@89 49 return gaps[0]
bnagaev@89 50
bnagaev@89 51 @staticmethod
bnagaev@89 52 def gap_cost(self, gaps_count):
bnagaev@89 53 """
bnagaev@89 54 returns penalty of appending (to the right end) one more gap
bnagaev@89 55 gaps_count -- real number of gaps
bnagaev@89 56 """
bnagaev@89 57 if gaps_count >= len(gaps):
bnagaev@89 58 return gaps[(len(gaps)-1)]
bnagaev@89 59 else:
bnagaev@89 60 return gaps[gaps_count]
bnagaev@89 61
bnagaev@89 62 # TO BE DONE
bnagaev@89 63
bnagaev@89 64 #def unite(self, chainN):
bnagaev@89 65 #"""
bnagaev@89 66 #alignment list creation
bnagaev@89 67 #chainN - chain number
bnagaev@89 68 #"""
bnagaev@89 69 #str1 = self.old_sequences[chainN]
bnagaev@89 70 #len1 = len(str1)
bnagaev@89 71
bnagaev@89 72 #if not self.common:
bnagaev@89 73 #i = 0
bnagaev@89 74 #while i < len1:
bnagaev@89 75 #aminoacids = [str1[i]]
bnagaev@89 76 #chains = [chainN]
bnagaev@89 77 #self.common.append([aminoacids,chains])
bnagaev@89 78 #i += 1
bnagaev@89 79 #return
bnagaev@89 80
bnagaev@89 81 #len2 = len(self.common)
bnagaev@89 82
bnagaev@89 83 #d = []
bnagaev@89 84 #tip_from = []
bnagaev@89 85 #already_gaps = []
bnagaev@89 86
bnagaev@89 87 #for i in xrange(len1 + 1):
bnagaev@89 88 #d.append([])
bnagaev@89 89 #already_gaps.append([])
bnagaev@89 90 #tip_from.append([])
bnagaev@89 91 #for j in xrange(len2 + 1):
bnagaev@89 92 #d[i].append(0)
bnagaev@89 93 #already_gaps[i].append([0, 0])
bnagaev@89 94 #tip_from[i].append(0)
bnagaev@89 95
bnagaev@89 96 #for i in xrange(1, len1 + 1):
bnagaev@89 97 #for j in xrange(1, len2 + 1):
bnagaev@89 98
bnagaev@89 99 #costs = []
bnagaev@89 100 #for A in self.common[j - 1][0]:
bnagaev@89 101 #costs.append(self.cost(str1[i - 1], A))
bnagaev@89 102 #cost = max(costs)
bnagaev@89 103
bnagaev@89 104 #insertion = d[i - 1][j]
bnagaev@89 105 #if j != len2: # non-end gap
bnagaev@89 106 #insertion += self.gap_cost(already_gaps[i - 1][j][1])
bnagaev@89 107
bnagaev@89 108 #deletion = d[i][j - 1]
bnagaev@89 109 #if i != len1: # non-end gap
bnagaev@89 110 #deletion += self.gap_cost(already_gaps[i][j - 1][0])
bnagaev@89 111
bnagaev@89 112 #substitution = d[i - 1][j - 1] + cost
bnagaev@89 113 #max_way = max(insertion, deletion, substitution)
bnagaev@89 114 #d[i][j] = max_way
bnagaev@89 115
bnagaev@89 116 #if max_way == substitution:
bnagaev@89 117 #tip = 3
bnagaev@89 118 #if max_way == deletion:
bnagaev@89 119 #tip = 2
bnagaev@89 120 #if max_way == insertion:
bnagaev@89 121 #tip = 1
bnagaev@89 122
bnagaev@89 123 #if tip == 1: # insertion
bnagaev@89 124 #already_gaps[i][j]=[0, (already_gaps[i-1][j][1]+1) ]
bnagaev@89 125 #if tip == 2: # deletion
bnagaev@89 126 #already_gaps[i][j]=[ (already_gaps[i][j-1][0]+1), 0 ]
bnagaev@89 127 #if tip == 3: # substitution
bnagaev@89 128 #already_gaps[i][j]=[ 0, 0 ]
bnagaev@89 129
bnagaev@89 130 #tip_from[i][j] = tip
bnagaev@89 131
bnagaev@89 132 #i = len1
bnagaev@89 133 #j = len2
bnagaev@89 134
bnagaev@89 135 #common = []
bnagaev@89 136
bnagaev@89 137 #while i > 0 or j > 0:
bnagaev@89 138 #tip = tip_from[i][j]
bnagaev@89 139
bnagaev@89 140 #if tip == 1 or j == 0 and i > 0:
bnagaev@89 141
bnagaev@89 142 #aminoacids = [(str1[i-1])]
bnagaev@89 143 #chains = [chainN]
bnagaev@89 144
bnagaev@89 145 #common.append([aminoacids, chains])
bnagaev@89 146
bnagaev@89 147 #i -= 1
bnagaev@89 148
bnagaev@89 149
bnagaev@89 150
bnagaev@89 151 #if tip==2 or (i==0 and j>0):
bnagaev@89 152
bnagaev@89 153 #common.append(self.common[j-1])
bnagaev@89 154 #j-=1
bnagaev@89 155
bnagaev@89 156
bnagaev@89 157 #if (tip==3):
bnagaev@89 158
bnagaev@89 159 #chains=self.common[j-1][1]
bnagaev@89 160 #chains.append(chainN)
bnagaev@89 161
bnagaev@89 162 #aminoacids=self.common[j-1][0]
bnagaev@89 163
bnagaev@89 164 #if (not aminoacids.count(str1[i-1])):
bnagaev@89 165 #aminoacids.append(str1[i-1])
bnagaev@89 166
bnagaev@89 167 #common.append([aminoacids,chains])
bnagaev@89 168
bnagaev@89 169 #i-=1
bnagaev@89 170 #j-=1
bnagaev@89 171
bnagaev@89 172
bnagaev@89 173
bnagaev@89 174 #common.reverse()
bnagaev@89 175
bnagaev@89 176 #self.common=common
bnagaev@89 177
bnagaev@89 178
bnagaev@89 179
bnagaev@89 180
bnagaev@89 181
bnagaev@89 182
bnagaev@89 183
bnagaev@89 184
bnagaev@89 185
bnagaev@89 186
bnagaev@89 187
bnagaev@89 188
bnagaev@89 189
bnagaev@89 190
bnagaev@89 191
bnagaev@89 192
bnagaev@89 193
bnagaev@89 194
bnagaev@89 195
bnagaev@89 196
bnagaev@89 197
bnagaev@89 198
bnagaev@89 199
bnagaev@89 200 #def lining(self,chainN):
bnagaev@89 201
bnagaev@89 202
bnagaev@89 203 #"""
bnagaev@89 204 #????? ??????? ????? ??????????? ??????????????????
bnagaev@89 205 #? self.new_sequences
bnagaev@89 206
bnagaev@89 207 #chainN - ????? ????
bnagaev@89 208 #"""
bnagaev@89 209
bnagaev@89 210 #str1=self.old_sequences[chainN]
bnagaev@89 211 #len1=len(str1)
bnagaev@89 212
bnagaev@89 213 #len2=len(self.common)
bnagaev@89 214
bnagaev@89 215
bnagaev@89 216 #new_seq=''
bnagaev@89 217 #position_in_old=0
bnagaev@89 218
bnagaev@89 219 #for common_1 in self.common:
bnagaev@89 220 #if (common_1[1].count(chainN)):
bnagaev@89 221 #new_seq = new_seq + str1[position_in_old]
bnagaev@89 222 #position_in_old += 1
bnagaev@89 223 #else:
bnagaev@89 224 #new_seq = new_seq + '-'
bnagaev@89 225
bnagaev@89 226 #self.new_sequences.append(new_seq)
bnagaev@89 227
bnagaev@89 228
bnagaev@89 229
bnagaev@89 230
bnagaev@89 231
bnagaev@89 232
bnagaev@89 233