allpy

changeset 100:a49f40ace6a8
removed useless file align.py
author: boris <bnagaev@gmail.com>
date: Thu, 21 Oct 2010 00:27:15 +0400
parents: ffc102ed0249
children: 5091fef2ad88
files: lib/align.py
diffstat: 1 files changed, 0 insertions(+), 233 deletions(-) [+]
[-]

lib/align.py 233 lib/align.py 233
lib/align.py 233
     1.1 --- a/lib/align.py	Wed Oct 20 23:38:21 2010 +0400
     1.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.3 @@ -1,233 +0,0 @@
     1.4 -
     1.5 -"""
     1.6 -Usage:
     1.7 -
     1.8 -seq_in=[]
     1.9 -seq_in.append("SSNAKIDQLSSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRLDNM")
    1.10 -seq_in.append("NAKADQASSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRADNAA")
    1.11 -l=AlignmentSeq(seq_in)
    1.12 -for t in l.new_sequences:
    1.13 -    print t
    1.14 -
    1.15 -Why not to reimplement it?
    1.16 -Lava flow
    1.17 -http://www.insidecpp.ru/antipatterns/lava_flow/
    1.18 -"""
    1.19 -
    1.20 -from allpy_data.blossum62 import matrix, gaps
    1.21 -
    1.22 -class AlignmentSeq(object):
    1.23 -
    1.24 -    def __init__(self, sequences):
    1.25 -        """
    1.26 -        sequences are strings
    1.27 -        
    1.28 -        new_sequences -- aligned sequences
    1.29 -        connections -- list of dicts like { [new_atom_id] => old_atom_id }
    1.30 -        common -- list of [list_of_letters, list_of_chain_numbers]
    1.31 -        """
    1.32 -        self.old_sequences = []
    1.33 -        self.new_sequences = []
    1.34 -        self.connections = []
    1.35 -        for seq in sequences:
    1.36 -            self.old_sequences.append(seq.upper().replace('-','').replace(' ',''))
    1.37 -        self.common=[]
    1.38 -        for i in xrange(len(self.old_sequences)):
    1.39 -             self.unite(i)
    1.40 -        for i in xrange(len(self.old_sequences)):
    1.41 -             self.lining(i)          
    1.42 -    
    1.43 -    @staticmethod
    1.44 -    def cost(a1, a2):
    1.45 -        """
    1.46 -        returns cost of aligning of aminoacids a1 and a2
    1.47 -        """
    1.48 -        a1 = a1.upper()
    1.49 -        a2 = a2.upper()
    1.50 -        if a1 in matrix:
    1.51 -            if a2 in matrix[a1]:
    1.52 -                return matrix[a1][a2]
    1.53 -        return gaps[0]
    1.54 -   
    1.55 -    @staticmethod
    1.56 -    def gap_cost(self, gaps_count):
    1.57 -        """
    1.58 -        returns penalty of appending (to the right end) one more gap
    1.59 -        gaps_count -- real number of gaps
    1.60 -        """
    1.61 -        if gaps_count >= len(gaps):
    1.62 -            return gaps[(len(gaps)-1)]
    1.63 -        else:
    1.64 -            return gaps[gaps_count]
    1.65 -    
    1.66 -    # TO BE DONE
    1.67 -    
    1.68 -    #def unite(self, chainN):
    1.69 -        #"""
    1.70 -        #alignment list creation
    1.71 -        #chainN - chain number
    1.72 -        #"""
    1.73 -        #str1 = self.old_sequences[chainN]
    1.74 -        #len1 = len(str1)
    1.75 -        
    1.76 -        #if not self.common:
    1.77 -            #i = 0
    1.78 -            #while i < len1:
    1.79 -                #aminoacids = [str1[i]]
    1.80 -                #chains = [chainN]
    1.81 -                #self.common.append([aminoacids,chains])
    1.82 -                #i += 1
    1.83 -            #return
    1.84 -
    1.85 -        #len2 = len(self.common)
    1.86 -
    1.87 -        #d = []
    1.88 -        #tip_from = []
    1.89 -        #already_gaps = []
    1.90 -        
    1.91 -        #for i in xrange(len1 + 1):
    1.92 -            #d.append([])
    1.93 -            #already_gaps.append([])
    1.94 -            #tip_from.append([])
    1.95 -            #for j in xrange(len2 + 1):
    1.96 -                #d[i].append(0)
    1.97 -                #already_gaps[i].append([0, 0])
    1.98 -                #tip_from[i].append(0)
    1.99 -
   1.100 -        #for i in xrange(1, len1 + 1):
   1.101 -            #for j in xrange(1, len2 + 1):
   1.102 -
   1.103 -                #costs = []
   1.104 -                #for A in self.common[j - 1][0]:
   1.105 -                    #costs.append(self.cost(str1[i - 1], A))
   1.106 -                #cost = max(costs)
   1.107 -
   1.108 -                #insertion = d[i - 1][j]
   1.109 -                #if j != len2: # non-end gap
   1.110 -                    #insertion += self.gap_cost(already_gaps[i - 1][j][1])
   1.111 -                    
   1.112 -                #deletion  = d[i][j - 1]
   1.113 -                #if i != len1: # non-end gap
   1.114 -                    #deletion += self.gap_cost(already_gaps[i][j - 1][0])
   1.115 -
   1.116 -                #substitution = d[i - 1][j - 1] + cost
   1.117 -                #max_way = max(insertion, deletion, substitution)
   1.118 -                #d[i][j] = max_way
   1.119 -                
   1.120 -                #if max_way == substitution:
   1.121 -                    #tip = 3   
   1.122 -                #if max_way == deletion:
   1.123 -                    #tip = 2                
   1.124 -                #if max_way == insertion:
   1.125 -                    #tip = 1
   1.126 -
   1.127 -                #if tip == 1:                             # insertion
   1.128 -                    #already_gaps[i][j]=[0, (already_gaps[i-1][j][1]+1) ]
   1.129 -                #if tip == 2:                             # deletion
   1.130 -                    #already_gaps[i][j]=[ (already_gaps[i][j-1][0]+1), 0 ]
   1.131 -                #if tip == 3:                             # substitution
   1.132 -                    #already_gaps[i][j]=[ 0, 0 ]
   1.133 -                
   1.134 -                #tip_from[i][j] = tip
   1.135 -
   1.136 -        #i = len1
   1.137 -        #j = len2
   1.138 -
   1.139 -        #common = []
   1.140 -
   1.141 -        #while i > 0 or j > 0:
   1.142 -            #tip = tip_from[i][j]
   1.143 -            
   1.144 -            #if tip == 1 or j == 0 and i > 0:
   1.145 -
   1.146 -                #aminoacids = [(str1[i-1])]
   1.147 -                #chains = [chainN]
   1.148 -                
   1.149 -                #common.append([aminoacids, chains])
   1.150 -                
   1.151 -                #i -= 1
   1.152 -
   1.153 -
   1.154 -                
   1.155 -            #if tip==2 or (i==0 and j>0):
   1.156 -                
   1.157 -                #common.append(self.common[j-1])
   1.158 -                #j-=1
   1.159 -
   1.160 -                
   1.161 -            #if (tip==3):
   1.162 -                                
   1.163 -                #chains=self.common[j-1][1]
   1.164 -                #chains.append(chainN)
   1.165 -                
   1.166 -                #aminoacids=self.common[j-1][0]
   1.167 -                
   1.168 -                #if (not aminoacids.count(str1[i-1])):
   1.169 -                    #aminoacids.append(str1[i-1])
   1.170 -
   1.171 -                #common.append([aminoacids,chains])
   1.172 -                    
   1.173 -                #i-=1
   1.174 -                #j-=1
   1.175 -
   1.176 -          
   1.177 -            
   1.178 -        #common.reverse()
   1.179 -        
   1.180 -        #self.common=common
   1.181 -
   1.182 -
   1.183 -
   1.184 -
   1.185 -
   1.186 -
   1.187 -
   1.188 -
   1.189 -
   1.190 -
   1.191 -
   1.192 -
   1.193 -
   1.194 -
   1.195 -
   1.196 -
   1.197 -
   1.198 -
   1.199 -
   1.200 -
   1.201 -
   1.202 -
   1.203 -
   1.204 -    #def lining(self,chainN):
   1.205 -
   1.206 -
   1.207 -        #"""
   1.208 -        #метод создает новую выровненную последовательность
   1.209 -        #в self.new_sequences
   1.210 -
   1.211 -        #chainN - номер цепи    
   1.212 -        #"""
   1.213 -
   1.214 -        #str1=self.old_sequences[chainN]
   1.215 -        #len1=len(str1)
   1.216 -        
   1.217 -        #len2=len(self.common)
   1.218 -
   1.219 -
   1.220 -        #new_seq=''
   1.221 -        #position_in_old=0
   1.222 -
   1.223 -        #for common_1 in self.common:
   1.224 -            #if (common_1[1].count(chainN)):
   1.225 -                #new_seq = new_seq + str1[position_in_old]
   1.226 -                #position_in_old += 1
   1.227 -            #else:
   1.228 -                #new_seq = new_seq + '-'
   1.229 -
   1.230 -        #self.new_sequences.append(new_seq)
   1.231 -
   1.232 -
   1.233 -
   1.234 -
   1.235 -
   1.236 -