allpy
changeset 100:a49f40ace6a8
removed useless file align.py
author | boris <bnagaev@gmail.com> |
---|---|
date | Thu, 21 Oct 2010 00:27:15 +0400 |
parents | ffc102ed0249 |
children | 5091fef2ad88 |
files | lib/align.py |
diffstat | 1 files changed, 0 insertions(+), 233 deletions(-) [+] |
line diff
1.1 --- a/lib/align.py Wed Oct 20 23:38:21 2010 +0400 1.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 1.3 @@ -1,233 +0,0 @@ 1.4 - 1.5 -""" 1.6 -Usage: 1.7 - 1.8 -seq_in=[] 1.9 -seq_in.append("SSNAKIDQLSSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRLDNM") 1.10 -seq_in.append("NAKADQASSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRADNAA") 1.11 -l=AlignmentSeq(seq_in) 1.12 -for t in l.new_sequences: 1.13 - print t 1.14 - 1.15 -Why not to reimplement it? 1.16 -Lava flow 1.17 -http://www.insidecpp.ru/antipatterns/lava_flow/ 1.18 -""" 1.19 - 1.20 -from allpy_data.blossum62 import matrix, gaps 1.21 - 1.22 -class AlignmentSeq(object): 1.23 - 1.24 - def __init__(self, sequences): 1.25 - """ 1.26 - sequences are strings 1.27 - 1.28 - new_sequences -- aligned sequences 1.29 - connections -- list of dicts like { [new_atom_id] => old_atom_id } 1.30 - common -- list of [list_of_letters, list_of_chain_numbers] 1.31 - """ 1.32 - self.old_sequences = [] 1.33 - self.new_sequences = [] 1.34 - self.connections = [] 1.35 - for seq in sequences: 1.36 - self.old_sequences.append(seq.upper().replace('-','').replace(' ','')) 1.37 - self.common=[] 1.38 - for i in xrange(len(self.old_sequences)): 1.39 - self.unite(i) 1.40 - for i in xrange(len(self.old_sequences)): 1.41 - self.lining(i) 1.42 - 1.43 - @staticmethod 1.44 - def cost(a1, a2): 1.45 - """ 1.46 - returns cost of aligning of aminoacids a1 and a2 1.47 - """ 1.48 - a1 = a1.upper() 1.49 - a2 = a2.upper() 1.50 - if a1 in matrix: 1.51 - if a2 in matrix[a1]: 1.52 - return matrix[a1][a2] 1.53 - return gaps[0] 1.54 - 1.55 - @staticmethod 1.56 - def gap_cost(self, gaps_count): 1.57 - """ 1.58 - returns penalty of appending (to the right end) one more gap 1.59 - gaps_count -- real number of gaps 1.60 - """ 1.61 - if gaps_count >= len(gaps): 1.62 - return gaps[(len(gaps)-1)] 1.63 - else: 1.64 - return gaps[gaps_count] 1.65 - 1.66 - # TO BE DONE 1.67 - 1.68 - #def unite(self, chainN): 1.69 - #""" 1.70 - #alignment list creation 1.71 - #chainN - chain number 1.72 - #""" 1.73 - #str1 = self.old_sequences[chainN] 1.74 - #len1 = len(str1) 1.75 - 1.76 - #if not self.common: 1.77 - #i = 0 1.78 - #while i < len1: 1.79 - #aminoacids = [str1[i]] 1.80 - #chains = [chainN] 1.81 - #self.common.append([aminoacids,chains]) 1.82 - #i += 1 1.83 - #return 1.84 - 1.85 - #len2 = len(self.common) 1.86 - 1.87 - #d = [] 1.88 - #tip_from = [] 1.89 - #already_gaps = [] 1.90 - 1.91 - #for i in xrange(len1 + 1): 1.92 - #d.append([]) 1.93 - #already_gaps.append([]) 1.94 - #tip_from.append([]) 1.95 - #for j in xrange(len2 + 1): 1.96 - #d[i].append(0) 1.97 - #already_gaps[i].append([0, 0]) 1.98 - #tip_from[i].append(0) 1.99 - 1.100 - #for i in xrange(1, len1 + 1): 1.101 - #for j in xrange(1, len2 + 1): 1.102 - 1.103 - #costs = [] 1.104 - #for A in self.common[j - 1][0]: 1.105 - #costs.append(self.cost(str1[i - 1], A)) 1.106 - #cost = max(costs) 1.107 - 1.108 - #insertion = d[i - 1][j] 1.109 - #if j != len2: # non-end gap 1.110 - #insertion += self.gap_cost(already_gaps[i - 1][j][1]) 1.111 - 1.112 - #deletion = d[i][j - 1] 1.113 - #if i != len1: # non-end gap 1.114 - #deletion += self.gap_cost(already_gaps[i][j - 1][0]) 1.115 - 1.116 - #substitution = d[i - 1][j - 1] + cost 1.117 - #max_way = max(insertion, deletion, substitution) 1.118 - #d[i][j] = max_way 1.119 - 1.120 - #if max_way == substitution: 1.121 - #tip = 3 1.122 - #if max_way == deletion: 1.123 - #tip = 2 1.124 - #if max_way == insertion: 1.125 - #tip = 1 1.126 - 1.127 - #if tip == 1: # insertion 1.128 - #already_gaps[i][j]=[0, (already_gaps[i-1][j][1]+1) ] 1.129 - #if tip == 2: # deletion 1.130 - #already_gaps[i][j]=[ (already_gaps[i][j-1][0]+1), 0 ] 1.131 - #if tip == 3: # substitution 1.132 - #already_gaps[i][j]=[ 0, 0 ] 1.133 - 1.134 - #tip_from[i][j] = tip 1.135 - 1.136 - #i = len1 1.137 - #j = len2 1.138 - 1.139 - #common = [] 1.140 - 1.141 - #while i > 0 or j > 0: 1.142 - #tip = tip_from[i][j] 1.143 - 1.144 - #if tip == 1 or j == 0 and i > 0: 1.145 - 1.146 - #aminoacids = [(str1[i-1])] 1.147 - #chains = [chainN] 1.148 - 1.149 - #common.append([aminoacids, chains]) 1.150 - 1.151 - #i -= 1 1.152 - 1.153 - 1.154 - 1.155 - #if tip==2 or (i==0 and j>0): 1.156 - 1.157 - #common.append(self.common[j-1]) 1.158 - #j-=1 1.159 - 1.160 - 1.161 - #if (tip==3): 1.162 - 1.163 - #chains=self.common[j-1][1] 1.164 - #chains.append(chainN) 1.165 - 1.166 - #aminoacids=self.common[j-1][0] 1.167 - 1.168 - #if (not aminoacids.count(str1[i-1])): 1.169 - #aminoacids.append(str1[i-1]) 1.170 - 1.171 - #common.append([aminoacids,chains]) 1.172 - 1.173 - #i-=1 1.174 - #j-=1 1.175 - 1.176 - 1.177 - 1.178 - #common.reverse() 1.179 - 1.180 - #self.common=common 1.181 - 1.182 - 1.183 - 1.184 - 1.185 - 1.186 - 1.187 - 1.188 - 1.189 - 1.190 - 1.191 - 1.192 - 1.193 - 1.194 - 1.195 - 1.196 - 1.197 - 1.198 - 1.199 - 1.200 - 1.201 - 1.202 - 1.203 - 1.204 - #def lining(self,chainN): 1.205 - 1.206 - 1.207 - #""" 1.208 - #метод создает новую выровненную последовательность 1.209 - #в self.new_sequences 1.210 - 1.211 - #chainN - номер цепи 1.212 - #""" 1.213 - 1.214 - #str1=self.old_sequences[chainN] 1.215 - #len1=len(str1) 1.216 - 1.217 - #len2=len(self.common) 1.218 - 1.219 - 1.220 - #new_seq='' 1.221 - #position_in_old=0 1.222 - 1.223 - #for common_1 in self.common: 1.224 - #if (common_1[1].count(chainN)): 1.225 - #new_seq = new_seq + str1[position_in_old] 1.226 - #position_in_old += 1 1.227 - #else: 1.228 - #new_seq = new_seq + '-' 1.229 - 1.230 - #self.new_sequences.append(new_seq) 1.231 - 1.232 - 1.233 - 1.234 - 1.235 - 1.236 -