allpy
changeset 737:ccee8a6023f4
sbbs_2009 created. test_homology.py contains function that writes classes of homology to file.
author | Boris Burkov <BurkovBA@gmail.com> |
---|---|
date | Sat, 09 Jul 2011 16:30:28 +0400 |
parents | eb2aa74497d5 |
children | f66135a68ae5 |
files | sbbs_2009/blocks_finder.py sbbs_2009/modules/Lgamma.py sbbs_2009/modules/alignment.py sbbs_2009/modules/block.py sbbs_2009/modules/blocks.py sbbs_2009/modules/blocks_io.py sbbs_2009/modules/configure.py sbbs_2009/modules/main.py sbbs_2009/modules/phylip.py sbbs_2009/modules/rooted_tree.py sbbs_2009/modules/sequence.py sbbs_2009/test_homology.py |
diffstat | 12 files changed, 3193 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/sbbs_2009/blocks_finder.py Sat Jul 09 16:30:28 2011 +0400 1.3 @@ -0,0 +1,170 @@ 1.4 +#! /usr/bin/python 1.5 + 1.6 +import sys 1.7 +import traceback 1.8 + 1.9 +sys.path.append("./modules") 1.10 +import block 1.11 +import blocks 1.12 +import alignment 1.13 +import rooted_tree 1.14 + 1.15 + 1.16 +#def main(current_time): 1.17 +# import cgi 1.18 +# import os 1.19 +# form = cgi.FieldStorage() 1.20 +# if form.has_key("Alignment") and form["Alignment"].value!="": 1.21 +# temp_file=open("../jobs/"+str(current_time)+".job",'w') 1.22 +# temp_file.write(str(form["Alignment"].value)) 1.23 +# temp_file.close() 1.24 +# _generate_pending_page(str(current_time)) 1.25 +# if not os.fork(): 1.26 +# _main("../jobs/"+str(current_time)+".job") 1.27 +# logfile=open("../jobs/"+str(current_time)+".log",'w') 1.28 +# logfile.write("Done\n") 1.29 +# logfile.close() 1.30 +# else: 1.31 +# if form.has_key("uploaded_file") and form["uploaded_file"].value!="": 1.32 +# temp_file=open("../jobs/"+str(current_time)+".job",'w') 1.33 +# temp_file.write(form["uploaded_file"].value) 1.34 +# temp_file.close() 1.35 +# _generate_pending_page(str(current_time)) 1.36 +# if not os.fork(): 1.37 +# _main("../jobs/"+str(current_time)+".job") 1.38 +# logfile=open("../jobs/"+str(current_time)+".log",'w') 1.39 +# logfile.write("Done\n") 1.40 +# logfile.close() 1.41 +# else: 1.42 +# print "Content-Type: text/plain\n" 1.43 +# print "I can't see any input. :) Please, submit your alignment." 1.44 + 1.45 + 1.46 +def main(file_name): 1.47 + alignment1 = alignment.Alignment([]) 1.48 + alignment1.get_from_fasta(open(file_name)) 1.49 + #initialization of blocks and initial_block 1.50 + blocks1=blocks.Blocks(alignment, None, None, None) 1.51 + initial_block=block.Block(alignment1, alignment1.sequence_indexes_s, alignment1.position_indexes_a) 1.52 + initial_block.state="pending" 1.53 + #initialization of rooted_tree of blocks 1.54 + tree_of_blocks = rooted_tree.Rooted_tree() 1.55 + tree_of_blocks.root.child_vertices.append(rooted_tree.Vertex([initial_block],tree_of_blocks.root,[],None)) 1.56 + tree_of_blocks.root.content[0]=block.plug() 1.57 + #there's no need in stack of blocks: to end the process we just need all the children of the initial_block to have status "processed", "aligned" or "no alignment" 1.58 + #traverse of the rooted_tree 1.59 + #all this info is available via "sys" module "_something" internal function or "traceback" module 1.60 + current_vertex=tree_of_blocks.root.child_vertices[0] 1.61 + 1.62 + while current_vertex!=tree_of_blocks.root: 1.63 + aligned_block=current_vertex.content[0].get_aligned_block() 1.64 + 1.65 + if aligned_block != None: 1.66 + #This is upsize, I've found yet another glitch in it. So, I've turned it off as a whole for now: 1.67 + #aligned_block=current_vertex.content[0].upsize(aligned_block) 1.68 + current_vertex.content[0].state="in process" 1.69 + blocks1.aligned_blocks.append(aligned_block) 1.70 + current_vertex.child_vertices.append(rooted_tree.Vertex([aligned_block],current_vertex,[],None)) 1.71 + if aligned_block.block_of_gaps!=None: current_vertex.child_vertices.append(rooted_tree.Vertex([aligned_block.block_of_gaps],current_vertex,[],None)) 1.72 + complementary_blocks=current_vertex.content[0].find_complementary_blocks(aligned_block.position_indexes_a, aligned_block.sequence_indexes_a) 1.73 + for i in complementary_blocks: 1.74 + current_vertex.child_vertices.append(rooted_tree.Vertex([i],current_vertex,[],None)) 1.75 + current_vertex=_find_next_current_vertex(current_vertex) 1.76 + else: 1.77 + current_vertex=_find_next_current_vertex(current_vertex) 1.78 + 1.79 + blocks_for_print=tree_of_blocks.depth_first_traversal() 1.80 + return blocks_for_print 1.81 + 1.82 + 1.83 +def blocks2homology_file(blocks, homology_file): 1.84 + sys.path.append("../allpy/") 1.85 + from homology import MonomerHomology 1.86 + class_counter = 0 1.87 + for b in blocks: 1.88 + if b[0].content[0].state == "aligned": 1.89 + for sa in b[0].content[0].sequence_indexes_a: 1.90 + sequence_name = b[0].content[0].alignment.sequences[b[0].content[0].alignment.sequence_indexes_s[sa]].name 1.91 + for pa in b[0].content[0].position_indexes_a: 1.92 + if b[0].content[0].alignment.get_aa(sa,pa)!=None: 1.93 + monomer_id = (sequence_name, pa+1) 1.94 + MonomerHomology.write_monomer(homology_file, monomer_id, class_counter) 1.95 + class_counter+=1 1.96 + else: 1.97 + if b[0].content[0].state == "no_alignment": 1.98 + #print each monomer as a separate class 1.99 + for sa in b[0].content[0].sequence_indexes_a: 1.100 + sequence_name = b[0].content[0].alignment.sequences[b[0].content[0].alignment.sequence_indexes_s[sa]].name 1.101 + for pa in b[0].content[0].position_indexes_a: 1.102 + if b[0].content[0].alignment.get_aa(sa, pa) != None: 1.103 + monomer_id = (sequence_name, pa+1) 1.104 + MonomerHomology.write_monomer(homology_file, monomer_id, class_counter) 1.105 + class_counter+=1 1.106 + 1.107 + 1.108 + 1.109 +def _find_next_current_vertex(current_vertex): 1.110 + """ 1.111 + This is an internal routine 1.112 + """ 1.113 + flag=0 1.114 + while flag!=1: 1.115 + if current_vertex.content[0].state=="in process": 1.116 + second_flag=0 1.117 + for i in current_vertex.child_vertices: 1.118 + if i.content[0].state=="pending": 1.119 + current_vertex=i 1.120 + return current_vertex 1.121 + elif i.content[0].state=="in process": 1.122 + current_vertex=i 1.123 + second_flag=1 1.124 + if second_flag==0: 1.125 + current_vertex.content[0].state="processed" 1.126 + current_vertex=current_vertex.parental_vertex 1.127 + elif current_vertex.content[0].state=="processed": 1.128 + current_vertex=current_vertex.parental_vertex 1.129 + elif current_vertex.content[0].state=="no_alignment": 1.130 + current_vertex=current_vertex.parental_vertex 1.131 + elif current_vertex.content[0].state=="initial": 1.132 + return current_vertex 1.133 + elif current_vertex.content[0].state=="pending": 1.134 + return current_vertex 1.135 + 1.136 +def _generate_next_page(current_time): 1.137 + 1.138 + print "Content-Type: text/html\n" 1.139 + print "<html>" 1.140 + print "<head>" 1.141 + print "<title>Blocks finder - Please, select output format</title>" 1.142 + print "</head>" 1.143 + print "<body bgcolor=\"#FFFFFF\">" 1.144 + print " <H1> Your job's name is "+str(current_time)+"</H1>" 1.145 + print " <P><A href=\"exceller.py?"+current_time+".job\">Output as a plain file (suitable for converting it into Excel table)</A>" 1.146 + print " <P><A href=\"viewer.py?"+current_time+".job\">Visualisation of output</A>" 1.147 + print "</body>" 1.148 + print "</html>" 1.149 + 1.150 + sys.stdout.flush() 1.151 + 1.152 + 1.153 +def _generate_pending_page(current_time): 1.154 + 1.155 + print "Content-Type: text/html\n" 1.156 + print "<html>" 1.157 + print "<head>" 1.158 + print "<title>Blocks finder - Job's being processed</title>" 1.159 + print "<script language=\"JavaScript1.2\">" 1.160 + print " function reLoad(){" 1.161 + print " window.location=\'http://mouse.belozersky.msu.ru/~burkov/cgi-bin/whistler.py?"+str(current_time)+"\'" 1.162 + #print " setTimeout(\"reLoad()\",10000);" 1.163 + print " }" 1.164 + print "</script>" 1.165 + print "</head>" 1.166 + print "<body bgcolor=\"#FFFFFF\" onload=setTimeout(\'reLoad()\',10000)>" 1.167 + print " <H1> Your job\'s name is "+str(current_time)+"</H1>" 1.168 + print " <div>When it\'s done, you'll be redirected to the results page.</div>" 1.169 + print "</body>" 1.170 + print "</html>" 1.171 + 1.172 + sys.stdout.flush() 1.173 +
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/sbbs_2009/modules/Lgamma.py Sat Jul 09 16:30:28 2011 +0400 2.3 @@ -0,0 +1,234 @@ 2.4 +from math import * 2.5 + 2.6 +two52= 4.50359962737049600000e+15 2.7 +half= 5.00000000000000000000e-01 2.8 +one = 1.00000000000000000000e+00 2.9 +pi = 3.14159265358979311600e+00 2.10 +a0 = 7.72156649015328655494e-02 2.11 +a1 = 3.22467033424113591611e-01 2.12 +a2 = 6.73523010531292681824e-02 2.13 +a3 = 2.05808084325167332806e-02 2.14 +a4 = 7.38555086081402883957e-03 2.15 +a5 = 2.89051383673415629091e-03 2.16 +a6 = 1.19270763183362067845e-03 2.17 +a7 = 5.10069792153511336608e-04 2.18 +a8 = 2.20862790713908385557e-04 2.19 +a9 = 1.08011567247583939954e-04 2.20 +a10 = 2.52144565451257326939e-05 2.21 +a11 = 4.48640949618915160150e-05 2.22 +tc = 1.46163214496836224576e+00 2.23 +tf = -1.21486290535849611461e-01 2.24 +##/* tt = -(tail of tf) */ 2.25 +tt = -3.63867699703950536541e-18 2.26 +t0 = 4.83836122723810047042e-01 2.27 +t1 = -1.47587722994593911752e-01 2.28 +t2 = 6.46249402391333854778e-02 2.29 +t3 = -3.27885410759859649565e-02 2.30 +t4 = 1.79706750811820387126e-02 2.31 +t5 = -1.03142241298341437450e-02 2.32 +t6 = 6.10053870246291332635e-03 2.33 +t7 = -3.68452016781138256760e-03 2.34 +t8 = 2.25964780900612472250e-03 2.35 +t9 = -1.40346469989232843813e-03 2.36 +t10 = 8.81081882437654011382e-04 2.37 +t11 = -5.38595305356740546715e-04 2.38 +t12 = 3.15632070903625950361e-04 2.39 +t13 = -3.12754168375120860518e-04 2.40 +t14 = 3.35529192635519073543e-04 2.41 +u0 = -7.72156649015328655494e-02 2.42 +u1 = 6.32827064025093366517e-01 2.43 +u2 = 1.45492250137234768737e+00 2.44 +u3 = 9.77717527963372745603e-01 2.45 +u4 = 2.28963728064692451092e-01 2.46 +u5 = 1.33810918536787660377e-02 2.47 +v1 = 2.45597793713041134822e+00 2.48 +v2 = 2.12848976379893395361e+00 2.49 +v3 = 7.69285150456672783825e-01 2.50 +v4 = 1.04222645593369134254e-01 2.51 +v5 = 3.21709242282423911810e-03 2.52 +s0 = -7.72156649015328655494e-02 2.53 +s1 = 2.14982415960608852501e-01 2.54 +s2 = 3.25778796408930981787e-01 2.55 +s3 = 1.46350472652464452805e-01 2.56 +s4 = 2.66422703033638609560e-02 2.57 +s5 = 1.84028451407337715652e-03 2.58 +s6 = 3.19475326584100867617e-05 2.59 +r1 = 1.39200533467621045958e+00 2.60 +r2 = 7.21935547567138069525e-01 2.61 +r3 = 1.71933865632803078993e-01 2.62 +r4 = 1.86459191715652901344e-02 2.63 +r5 = 7.77942496381893596434e-04 2.64 +r6 = 7.32668430744625636189e-06 2.65 +w0 = 4.18938533204672725052e-01 2.66 +w1 = 8.33333333333329678849e-02 2.67 +w2 = -2.77777777728775536470e-03 2.68 +w3 = 7.93650558643019558500e-04 2.69 +w4 = -5.95187557450339963135e-04 2.70 +w5 = 8.36339918996282139126e-04 2.71 +w6 = -1.63092934096575273989e-03 2.72 +zero= 0.00000000000000000000e+00 2.73 + 2.74 +##inf = float('inf') 2.75 +##nan = float('nan') 2.76 +inf = float(9e999) 2.77 + 2.78 +def sin_pi(x): 2.79 + x = float(x) 2.80 + e,ix = frexp(x) 2.81 + if(abs(x)<0.25): 2.82 + return -sin(pi*x) 2.83 + y = -x ##/* x is assume negative */ 2.84 + 2.85 +## * argument reduction, make sure inexact flag not raised if input 2.86 +## * is an integer 2.87 + z = floor(y) 2.88 + if(z!=y): 2.89 + y *= 0.5 2.90 + y = 2.0*(y - floor(y)) ##/* y = |x| mod 2.0 */ 2.91 + n = int(y*4.0) 2.92 + else: 2.93 + if(abs(ix)>=53): 2.94 + y = zero 2.95 + n = 0 ##/* y must be even */ 2.96 + else: 2.97 + if(abs(ix)<52): 2.98 + z = y+two52 ##/* exact */ 2.99 + e,n=frexp(z) 2.100 + n &= 1 2.101 + y = n 2.102 + n<<= 2 2.103 + 2.104 + if n == 0: 2.105 + y = sin(pi*y) 2.106 + elif (n == 1 or n == 2): 2.107 + y = cos(pi*(0.5-y)) 2.108 + elif (n == 3 or n == 4): 2.109 + y = sin(pi*(one-y)) 2.110 + elif (n == 5 or n == 6): 2.111 + y = -cos(pi*(y-1.5)) 2.112 + else: 2.113 + y = sin(pi*(y-2.0)) 2.114 + 2.115 + z = cos(pi*(z+1.0)); 2.116 + return -y*z 2.117 + 2.118 +def Lgamma(x): 2.119 + '''return natural logarithm of gamma function of x 2.120 + raise ValueError if x is negative integer''' 2.121 + x = float(x) 2.122 + 2.123 + ##/* purge off +-inf, NaN, +-0, and negative arguments */ 2.124 + if ((x == inf) or (x == -inf)): 2.125 + return inf 2.126 +## if (x is nan): 2.127 +## return nan 2.128 + 2.129 + e,ix = frexp(x) 2.130 + nadj = 0 2.131 + signgamp = 1 2.132 + 2.133 + if ((e==0.0) and (ix==0)): 2.134 + return inf 2.135 + 2.136 + if (ix>1020): 2.137 + return inf 2.138 + 2.139 + if ((e != 0.0) and (ix<-71)): 2.140 + if (x<0): 2.141 + return -log(-x) 2.142 + else: 2.143 + return -log(x) 2.144 + 2.145 + if (e<0): 2.146 + if (ix>52): 2.147 + return inf ##one/zero 2.148 + t = sin_pi(x) 2.149 + if (t==zero): 2.150 + ##return inf 2.151 + raise ValueError('gamma not defined for negative integer') 2.152 + nadj = log(pi/fabs(t*x)) 2.153 + if (t<zero): 2.154 + signgamp = -1 2.155 + x = -x 2.156 + 2.157 + ##/* purge off 1 and 2 */ 2.158 + if ((x==2.0) or (x==1.0)): 2.159 + r = 0.0 2.160 + 2.161 + ##/* for x < 2.0 */ 2.162 + elif (ix<2): 2.163 + if(x<=0.9): ##/* lgamma(x) = lgamma(x+1)-log(x) */ 2.164 + r = -log(x) 2.165 + if (x>=0.7316): 2.166 + y = one-x 2.167 + z = y*y 2.168 + p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10)))) 2.169 + p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11))))) 2.170 + p = y*p1+p2 2.171 + r += (p-0.5*y) 2.172 + elif (x>=0.23164): 2.173 + y= x-(tc-one) 2.174 + z = y*y 2.175 + w = z*y 2.176 + p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))) ## /* parallel comp */ 2.177 + p2 = t1+w*(t4+w*(t7+w*(t10+w*t13))) 2.178 + p3 = t2+w*(t5+w*(t8+w*(t11+w*t14))) 2.179 + p = z*p1-(tt-w*(p2+y*p3)) 2.180 + r += (tf + p) 2.181 + else: 2.182 + y = x 2.183 + p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5))))) 2.184 + p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5)))) 2.185 + r += (-0.5*y + p1/p2) 2.186 + else: 2.187 + r = zero 2.188 + if(x>=1.7316): 2.189 + y=2.0-x ##/* [1.7316,2] */ 2.190 + z = y*y 2.191 + p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10)))) 2.192 + p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11))))) 2.193 + p = y*p1+p2 2.194 + r += (p-0.5*y) 2.195 + elif(x>=1.23164): 2.196 + y=x-tc ##/* [1.23,1.73] */ 2.197 + z = y*y 2.198 + w = z*y 2.199 + p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))) ## /* parallel comp */ 2.200 + p2 = t1+w*(t4+w*(t7+w*(t10+w*t13))) 2.201 + p3 = t2+w*(t5+w*(t8+w*(t11+w*t14))) 2.202 + p = z*p1-(tt-w*(p2+y*p3)) 2.203 + r += (tf + p) 2.204 + else: 2.205 + y=x-one 2.206 + p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5))))) 2.207 + p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5)))) 2.208 + r += (-0.5*y + p1/p2) 2.209 + ##/* x < 8.0 */ 2.210 + elif(ix<4): 2.211 + i = int(x) 2.212 + t = zero 2.213 + y = x-i 2.214 + p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))) 2.215 + q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6))))) 2.216 + r = half*y+p/q 2.217 + z = one ##/* lgamma(1+s) = log(s) + lgamma(s) */ 2.218 + while (i>2): 2.219 + i -=1 2.220 + z *= (y+i) 2.221 + r += log(z) 2.222 + 2.223 + ##/* 8.0 <= x < 2**58 */ 2.224 + elif (ix<58): 2.225 + t = log(x) 2.226 + z = one/x 2.227 + y = z*z 2.228 + w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6))))) 2.229 + r = (x-half)*(t-one)+w 2.230 + 2.231 + ##/* 2**58 <= x <= inf */ 2.232 + else: 2.233 + r = x*(log(x)-one) 2.234 + 2.235 + if (e<0): 2.236 + r = nadj - r 2.237 + return signgamp*r
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/sbbs_2009/modules/alignment.py Sat Jul 09 16:30:28 2011 +0400 3.3 @@ -0,0 +1,331 @@ 3.4 +#!/usr/bin/python 3.5 +""" 3.6 +Contain classes: 3.7 + Alignment 3.8 + AlignmentExceptions 3.9 + 3.10 + A bit of phylosophy. Class Alignment actually represents relation between 3.11 + Blocks, i.e. a system of blocks in the alignment, and Sequences, a list of Sequence objects 3.12 + 3.13 +""" 3.14 +import configure 3.15 +import os 3.16 +from sequence import Sequence 3.17 + 3.18 +class AlignmentExceptions(Exception): 3.19 + pass 3.20 + 3.21 + 3.22 +class Alignment(object): 3.23 + """ 3.24 + Alignment consists of a body and additional data 3.25 + Body rows are in 1-1 correspondence with subset of Sequence objects in the list Sequences 3.26 + No assuptions about the location and the order of that subset in the list are assumed 3.27 + Body columns are alignment positions 3.28 + Body elements are indexes of Sequence amino acids in Sequence object 3.29 + 3.30 + Data: 3.31 + MAIN BODY 3.32 + self.sequences # list of sequences, each member is a sequence object 3.33 + self.sequence_indexes_s # list of indexes in the list of sequences 3.34 + self.position_indexes_a # list of position indexes in the alignment; actually, 0, 1, .., Len(alignment)-1 3.35 + self.body # matrix of indexes; 3.36 + an index is the index of an aa in the sequence or None instead of gap symbols 3.37 + NEW!!! 3.38 + # aligned_blocks_in_sequence_indexes_a is an element of the list self.aligned_blocks_in_sequences below 3.39 + # list of tripples 3.40 + # [aligned_block_index,from_position_index_a,to_position_index_a], 3.41 + # decribing intersection (if exists) of an aligned block with a sequence 3.42 + # Those lists are stored in the list 3.43 + self.aligned_blocks_in_sequences 3.44 + 3.45 + Methods: 3.46 + +self.get_from_fasta # fasta file expected 3.47 + +self.add_aligned_sequence # It is assumed that string contains a sequence aligned to other sequences of the alignment 3.48 + +self.get_from_file # first, convert to fasta by seqret 3.49 + +self.get_aa # returns aa or None 3.50 + self.initiate_blocks 3.51 + self.add_aligned_block_to_sequence # Modifing self.aligned_blocks_in_sequences 3.52 + 3.53 + """ 3.54 + 3.55 + def __init__(self, sequences, sequence_indexes_s=None, position_indexes_a=None, body=None): 3.56 + self.sequences=sequences # a list of sequence objects 3.57 + self.sequence_indexes_s=sequence_indexes_s 3.58 + self.position_indexes_a=position_indexes_a 3.59 + self.body=body 3.60 + if self.sequence_indexes_s==None: 3.61 + self.sequence_indexes_s=[] 3.62 + if self.position_indexes_a==None: 3.63 + self.position_indexes_a=[] 3.64 + if self.body==None: 3.65 + self.body=[] 3.66 + 3.67 + def get_aa(self,sequence_index_a, position_index_a): 3.68 + """ 3.69 + Author: AAl 3.70 + Status: tested successfully in the frame of get_from_fasta 3.71 + 3.72 + sequence_index_a is an index in self.sequence_indexes_s, NOT IN the list seequences! BE CUREFULL!!! 3.73 + """ 3.74 + try: 3.75 + body_element=self.body[sequence_index_a][position_index_a] 3.76 + except IndexError, errmessage: 3.77 + raise AlignmentExceptions("AlignmentError, body_element: " + str(errmessage)) 3.78 + if body_element == None: 3.79 + return None 3.80 + else: 3.81 + try: 3.82 + sequence_index_s=self.sequence_indexes_s[sequence_index_a] 3.83 + except IndexError, errmessage: 3.84 + raise AlignmentException("AlignmentError, contradiction in the object:\ 3.85 + len(sequence_indexes_s) < len(body). " + + str(errmessage)) 3.86 + try: 3.87 + aa=self.sequences[sequence_index_s].string[body_element] 3.88 + except IndexError, errmessage: 3.89 + raise AlignmentException("AlignmentError, contradiction in the object:\ 3.90 + sequence_indexe_s from body is out of sequences. " + + str(errmessage)) 3.91 + return aa 3.92 + 3.93 + def get_from_file(self, file_name): 3.94 + """ 3.95 + Get alignment from a file with given filename 3.96 + ttt.msf: 3.97 + !!AA_MULTIPLE_ALIGNMENT 1.0 3.98 + 3.99 + ttt.msf MSF: 7 Type: P 06/02/09 CompCheck: 6874 .. 3.100 + 3.101 + Name: seq_1 Len: 7 Check: 2819 Weight: 1.00 3.102 + Name: seq_2 Len: 7 Check: 1818 Weight: 1.00 3.103 + Name: seq_3 Len: 7 Check: 2237 Weight: 1.00 3.104 + 3.105 + // 3.106 + 3.107 + 1 7 3.108 + seq_1 ~~acd~~ 3.109 + seq_2 aaa.def 3.110 + seq_3 aa.cde~ 3.111 + 3.112 + >>> sequences=[] 3.113 + >>> test_alignment=Alignment(sequences) 3.114 + >>> test_alignment.get_from_file("test.msf") 3.115 + >>> print len(sequences) 3.116 + 3 3.117 + >>> print test_alignment.sequences[0].string 3.118 + acd 3.119 + >>> print test_alignment.sequences[1].string 3.120 + aaadef 3.121 + >>> print test_alignment.sequences[2].string 3.122 + aacde 3.123 + >>> print test_alignment.body[0] 3.124 + [None, None, 0, 1, 2, None, None] 3.125 + >>> print test_alignment.body[1] 3.126 + [0, 1, 2, None, 3, 4, 5] 3.127 + >>> print test_alignment.body[2] 3.128 + [0, 1, None, 2, 3, 4, None] 3.129 + >>> print test_alignment.position_indexes_a 3.130 + [0, 1, 2, 3, 4, 5, 6] 3.131 + >>> print test_alignment.sequence_indexes_s 3.132 + [0, 1, 2] 3.133 + >>> print test_alignment.get_aa(2, 5) 3.134 + e 3.135 + >>> print test_alignment.get_aa( 0, 1) 3.136 + None 3.137 + """ 3.138 + import sys 3.139 + os.system("seqret " + file_name + " ../jobs/temp_alignment.fasta") 3.140 + try: 3.141 + f=open("../jobs/temp_alignment.fasta") 3.142 + self.get_from_fasta(f) 3.143 + f.close() 3.144 + except IOError, errmessage: 3.145 + #print errmessage 3.146 + #print "file_name = "+str(file_name) 3.147 + raise "Can not read alignment!!!" 3.148 + #os.system("rm ../jobs/temp_alignment.fasta") 3.149 + 3.150 + 3.151 + def get_from_fasta(self,file): 3.152 + r""" 3.153 + Author: AAl 3.154 + Status: tested successfully 3.155 + 3.156 + - Reads file with alignment in fasta format 3.157 + Sequence names are recognized according to fasta format (between ">" and space character) 3.158 + The rest of 1st line is considered as description 3.159 + Sequences with special names ("aligned" etc.) are ignored 3.160 + Duplications of sequence names are ALLOWED 3.161 + Whitespace symbols and numbers are ignored; see missing_characters in configure.py 3.162 + Empty lines are ignored 3.163 + All gap symbols ".", "~", "-" are replased by "-" 3.164 + - Creates self.body 3.165 + - Creates sequence objects and include them in sequences list 3.166 + - Creates sequence_indexes_s 3.167 + - Creates position_indexes 3.168 + 3.169 + 3.170 + test.fasta: 3.171 + >seq_1 first seq 3.172 + .-acd- 3.173 + 3.174 + >seq_2 second seq 3.175 + aaa-def 3.176 + >seq_3 3.177 + aa 3.178 + -cde. 3.179 + 3.180 + >>> sequences=[] 3.181 + >>> name="test" 3.182 + >>> test_alignment=Alignment(sequences) 3.183 + >>> f=open("test.fasta") 3.184 + >>> test_alignment.get_from_fasta(f) 3.185 + >>> f.close() 3.186 + >>> print len(sequences) 3.187 + 3 3.188 + >>> print test_alignment.sequences[0].string 3.189 + acd 3.190 + >>> print test_alignment.sequences[1].string 3.191 + aaadef 3.192 + >>> print test_alignment.sequences[2].string 3.193 + aacde 3.194 + >>> print test_alignment.body[0] 3.195 + [None, None, 0, 1, 2, None, None] 3.196 + >>> print test_alignment.body[1] 3.197 + [0, 1, 2, None, 3, 4, 5] 3.198 + >>> print test_alignment.body[2] 3.199 + [0, 1, None, 2, 3, 4, None] 3.200 + >>> print test_alignment.position_indexes_a 3.201 + [0, 1, 2, 3, 4, 5, 6] 3.202 + >>> print test_alignment.sequence_indexes_s 3.203 + [0, 1, 2] 3.204 + >>> print test_alignment.get_aa(2, 5) 3.205 + e 3.206 + >>> print test_alignment.get_aa( 0, 1) 3.207 + None 3.208 + """ 3.209 +#Read file 3.210 + file_lines=file.readlines() 3.211 +#Initialise 3.212 + sequence_line_num=0 # number of file line describing a sequence; 3.213 + sequence_string="" # sequence from file written in one string; 3.214 + # gap sybbols changed to "-" ; length does not controlled 3.215 + sequence_name="" # ">name description" is expected 3.216 + sequence_description="" 3.217 + self.sequence_indexes=[] 3.218 + self.position_indexes=[] 3.219 + self.body =[] 3.220 + num_of_ignored_strings=0 3.221 + 3.222 + for line in file_lines: 3.223 + 3.224 +#Ignore empty line 3.225 + if len(line.strip())==0: 3.226 + continue 3.227 + 3.228 +#1st sequence line 3.229 + if line[0]==">": 3.230 + if sequence_line_num > 0 : 3.231 +#ignore special names 3.232 + if sequence_name.lower() in configure.special_sequence_names: 3.233 + num_of_ignored_strings=num_of_ignored_strings+1 3.234 +#Close previouse sequence 3.235 + else: 3.236 + new_sequence=self.add_aligned_sequence(sequence_name,sequence_description,sequence_string) 3.237 +#Open new sequence 3.238 + sequence_line_num=1 3.239 + sequence_name = line.split()[0][1:].strip() 3.240 + if len(line.split()) > 1: 3.241 + sequence_description=line.split()[1].strip() 3.242 + else: 3.243 + sequence_description="" 3.244 + sequence_string = "" 3.245 + else: 3.246 +#Extend sequence 3.247 + sequence_line_num=sequence_line_num + 1 3.248 + for y in line.strip(): 3.249 + if not y in configure.missing_characters: 3.250 + if not y in configure.gap_symbols: 3.251 + sequence_string = sequence_string + y.lower() 3.252 + else: 3.253 + sequence_string = sequence_string + "-" 3.254 +#Close last sequence 3.255 + if sequence_line_num>1: 3.256 + new_sequence=self.add_aligned_sequence(sequence_name,sequence_description,sequence_string) 3.257 + 3.258 +#Compute alignment width and height 3.259 + self.length=len(self.sequence_indexes_s) 3.260 + self.width=0 3.261 + for i in range(len(self.sequence_indexes_s)): 3.262 + if len(self.body[i]) > self.width: 3.263 + self.width=len(self.body[i]) 3.264 +#Change absent end positions to None 3.265 + for i in range(len(self.sequence_indexes_s)): 3.266 + if len(self.body[i]) < self.width: 3.267 + self.body[i] = self.body[i]+ [None]*(self.width-len(self.body[i])) 3.268 + 3.269 +#Creat position_indexes_a 3.270 + self.position_indexes_a=range(self.width) 3.271 + 3.272 + 3.273 + def add_aligned_sequence(self, sequence_name, sequence_description, sequence_string): 3.274 + """ 3.275 + Author: AAl 3.276 + Status: tested successfully in the frame of get_from_fasta 3.277 + """ 3.278 +#####!!!!!! sequence = Sequence(self.sequences) ##### CHANGED TO NEXT LINE! 3.279 + sequence = Sequence(self.sequences) 3.280 + result=sequence.add_gapped_string(sequence_name, sequence_description, sequence_string) 3.281 + if result == None: 3.282 + return None 3.283 + else: 3.284 + self.sequence_indexes_s.append(result[0]) 3.285 + self.body.append(result[1]) 3.286 + 3.287 + def initiate_blocks(self,blocks): 3.288 + """ 3.289 + Author: AAl 3.290 + Status: should be compared with class Block definitions 3.291 + 3.292 + Create block object from self alignment 3.293 + Return block 3.294 + """ 3.295 +# Create blocks object 3.296 + blocks.alignment=self 3.297 + 3.298 +# Create block object 3.299 + sequence_indexes_in_self = range[len(self.sequence_indexes_s)] 3.300 + position_indexes_in_self = range[len(self.position_indexes_a)] 3.301 + block=Block(self, sequence_indexes_in_self, position_indexes_in_self) 3.302 + block.state="initial" 3.303 +# Include block in blocks 3.304 + blocks.list_of_blocks=[block] 3.305 + blocks.stack_of_blocks=[block] 3.306 + blocks.aligned_blocks=[] 3.307 + blocks.tree="..." 3.308 + 3.309 +# Create aligned_blocks_sequences relations 3.310 + self.aligned_blocks_sequences=[] 3.311 + for i in range(len(self.sequence_indexes_a)): 3.312 + self.aligned_blocks_sequences.append([]) 3.313 + 3.314 + 3.315 + return block 3.316 + 3.317 + 3.318 +if __name__ == "__main__": 3.319 + print "Hello!" 3.320 + import doctest 3.321 + doctest.testmod() 3.322 + 3.323 + 3.324 + 3.325 + 3.326 + 3.327 + 3.328 + 3.329 + 3.330 + 3.331 + 3.332 + 3.333 + 3.334 +
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/sbbs_2009/modules/block.py Sat Jul 09 16:30:28 2011 +0400 4.3 @@ -0,0 +1,1016 @@ 4.4 +#!/usr/bin/python 4.5 +""" 4.6 +Contain classes: 4.7 + Block 4.8 + Blocks 4.9 +Contains function: 4.10 + Lgamma 4.11 +""" 4.12 +import os 4.13 +from rooted_tree import Rooted_tree 4.14 +import configure 4.15 +import alignment 4.16 +from Lgamma import Lgamma 4.17 +import inspect 4.18 +import sys 4.19 +import traceback 4.20 + 4.21 +# 4.22 +#class Blocks(object): 4.23 +# """ 4.24 +# Author: AAl 4.25 +# Status: designed 4.26 +# 4.27 +# Data: 4.28 +# list_of_blocks is a list of all Block objects of an alignment 4.29 +# a block is identified by its index in the list_of_blocks 4.30 +# tree_of_blocks is a rooted_tree object for all blocks of an alignment 4.31 +# tree_of_blocks.content = [block,...] 4.32 +# stack_of_blocks is a list of undelivered blocks 4.33 +# the list is delivered as stack 4.34 +# aligned_blocks is a list of aligned blocks 4.35 +# 4.36 +# 4.37 +# vertexes_of_blocks # list; instead of tree_of_blocks 4.38 +# 4.39 +# 4.40 +# Methods 4.41 +# self.show_blocks 4.42 +# self.add_aligned block 4.43 +# """ 4.44 +# 4.45 +# 4.46 +# def __init__(self,alignment=None,list_of_blocks=None,stack_of_blocks=None,aligned_blocks=None,tree_of_blocks=None): 4.47 +# self.alignment=alignment 4.48 +# self.list_of_blocks=list_of_blocks 4.49 +# self.tree_of_blocks=tree_of_blocks 4.50 +# self.stack_of_blocks=stack_of_blocks 4.51 +# self.aligned_blocks=aligned_blocks 4.52 +# 4.53 +# if self.list_of_blocks==None: 4.54 +# self.list_of_blocks=[] 4.55 +# if self.tree_of_blocks==None: 4.56 +# self.tree_of_blocks=Rooted_tree() 4.57 +# if self.stack_of_blocks==None: 4.58 +# self.stack_of_blocks=[] 4.59 +# if self.aligned_blocks==None: 4.60 +# self.aligned_blocks=None 4.61 +# 4.62 +# 4.63 +# 4.64 +# 4.65 +# def add_aligned_block(self, parent_block, aligned_block): 4.66 +# """ 4.67 +# Author: AAl 4.68 +# Status: in progress 4.69 +# 4.70 +# 4.71 +# """ 4.72 +# 4.73 +##Left new block to the list of blocks and stack of blocks 4.74 +# left_block_width= aligned_block.position_indexes_a - parent_block.position_indexes_a 4.75 +# if left_block_width < configure.minimal_block_width: 4.76 +# no_left_block=1 4.77 +# else: 4.78 +# no_left_block=0 4.79 +# left_block=Block( 4.80 +# parent_block.alignment, 4.81 +# parent_block.sequence_indexes_a, 4.82 +# parent_block.position_indexes_a[0:left_block_width] 4.83 +# ) 4.84 +# left_block.state="processing" 4.85 +# self.list_of_blocks.append(left_block) 4.86 +# self.stack_of_blocks.append(left_block) 4.87 +# 4.88 +# self.tree_of_blocks.add.... 4.89 +# 4.90 +# 4.91 +#************* 4.92 +# 4.93 +#class Blocks(object): 4.94 +# """ 4.95 +# Author: AAl 4.96 +# Status: designed 4.97 +# 4.98 +# Data: 4.99 +# list_of_blocks is a list of all Block objects of an alignment 4.100 +# a block is identified by its index in the list_of_blocks 4.101 +# tree_of_blocks is a rooted_tree object for all blocks of an alignment 4.102 +# tree_of_blocks.content = [block,...] 4.103 +# stack_of_blocks is a list of undelivered blocks 4.104 +# the list is delivered as stack 4.105 +# aligned_blocks is a list of aligned blocks 4.106 +# 4.107 +# 4.108 +# vertexes_of_blocks # list; instead of tree_of_blocks 4.109 +# 4.110 +# 4.111 +# Methods 4.112 +# self.show_blocks 4.113 +# self.add_aligned block 4.114 +# """ 4.115 +# 4.116 +# 4.117 +# def __init__(self,alignment=None,list_of_blocks=None,stack_of_blocks=None,aligned_blocks=None,tree_of_blocks=None): 4.118 +# self.alignment=alignment 4.119 +# self.list_of_blocks=list_of_blocks 4.120 +# self.tree_of_blocks=tree_of_blocks 4.121 +# self.stack_of_blocks=stack_of_blocks 4.122 +# self.aligned_blocks=aligned_blocks 4.123 +# 4.124 +# if self.list_of_blocks==None: 4.125 +# self.list_of_blocks=[] 4.126 +# if self.tree_of_blocks==None: 4.127 +# self.tree_of_blocks=Rooted_tree() 4.128 +# if self.stack_of_blocks==None: 4.129 +# self.stack_of_blocks=[] 4.130 +# if self.aligned_blocks==None: 4.131 +# self.aligned_blocks=None 4.132 +# 4.133 +# 4.134 +# 4.135 +# def add_aligned_block(self, parent_block, aligned_block): 4.136 +# """ 4.137 +# Should be written 4.138 +# self.add_aligned_block(aligned_block) 4.139 +# 4.140 +# """ 4.141 +# 4.142 +##Left new block to the list of blocks and stack of blocks 4.143 +# left_block_width= aligned_block.position_indexes_a - parent_block.position_indexes_a 4.144 +# if left_block_width < configure.minimal_block_width: 4.145 +# no_left_block=1 4.146 +# else: 4.147 +# no_left_block=0 4.148 +# left_block=Block( 4.149 +# parent_block.alignment, 4.150 +# parent_block.sequence_indexes_a, 4.151 +# parent_block.position_indexes_a[0:left_block_width] 4.152 +# ) 4.153 +# left_block.state="processing" 4.154 +# self.list_of_blocks.append(left_block) 4.155 +# self.stack_of_blocks.append(left_block) 4.156 +# 4.157 +# self.tree_of_blocks.add.... 4.158 +# 4.159 +# 4.160 +#************* 4.161 +import blocks 4.162 + 4.163 +class plug(object): 4.164 + def __init__(self): 4.165 + self.state="initial" 4.166 + 4.167 +class Block(object): 4.168 + """ 4.169 + Author: AAl 4.170 + Status: designed 4.171 + 4.172 + Data 4.173 + MAIN BODY 4.174 + self.alignment # Alignment object 4.175 + self.sequence_indexes_a # list of sequence indexes; 4.176 + each index is the index in alignment.sequence_indexes_a. Be cureful!!! 4.177 + self.position_indexes_a # list of position indexes; 4.178 + each index is the index in alignment.position_indexes_a. Be cureful!!! 4.179 + self.position_indexes_p # 4.180 + List of indexes; 4.181 + Each index is an index of a position in the parent block positions 4.182 + self.state # one of strings: 4.183 + "initial" !!! Move to Blocks - May be not? (Burkov) 4.184 + "aligned" 4.185 + "no_alignment" 4.186 + "pending" 4.187 + "in process" 4.188 + "processed" 4.189 + 4.190 + BLOCK EQUIPMENT 4.191 + self.rooted_tree # each vertex's content, for instance rooted_tree.root.content is a list[_^M_][_$_] 4.192 + # Vertex.content[0] is the respective sequence's name (None for nodes)[_^M_][_$_] 4.193 + # Vertex.content[1] is its <sum of subtree lengthes + length of the way from root to Vertex>[_^M_][_$ 4.194 + 4.195 + self.gapped_positions # sorted list of position indexes 4.196 + self.unalignable_regions # unalignable_regions 4.197 + sorted list of lists [from,to,"why",shortest,longest] 4.198 + from and to are indexes of positions 4.199 + "why" is a string either "gapped" or "ss_contradiction" 4.200 + may be, something else 4.201 + shortest is the length of the shortest insertion in unalignable region 4.202 + longest is the length of the longest insertion in unalignable region 4.203 + self.alignable_regions #[from,to] 4.204 + self.conserved_positions # sorted list of (position_index, position_consensus, position_percent) 4.205 + 4.206 + OPTIONAL 4.207 + self.consensus_ss # ss is for secondary structure 4.208 + consensus is created by comparison of ss of all available 3D data 4.209 + self.important_aas # list of tuples (position_index,sequence_indexes?,type of activity 4.210 + 4.211 + 4.212 + METHODS: 4.213 + 4.214 + EXTERNAL 4.215 + self.show() # Display block in parent block in alignment 4.216 + +self.get_aligned_block # 4.217 + 4.218 + INTERNAL 4.219 + +self._delete_empty_positions() # Delete all empty positions 4.220 + self.detect_gapped_positions() # 4.221 + For each position 4.222 + - if there is a gap: push position_index to self.gapped_positions 4.223 + - detect unalignable_regions : [from,to,"gapped",shortest,longest] , i.e. 4.224 + maximal segments having gapped positions at the ends such that 4.225 + an interval between any neighbor gapped positions inside is less than configure.minimal_block_width 4.226 + "from" and "to" are exactly the marginal gapped positions of the region 4.227 + shortest is the length of the shortest insertion in unalignable region 4.228 + longest is the length of the longest insertion in unalignable region 4.229 + - retuns non_gapped_positions, which DON'T BELONG TO UNALIGNED REGIONS 4.230 + 4.231 + self.detect_conserved_positions(non_gapped_positions) 4.232 + - takes non_gapped_positions, which DON'T BELONG TO UNALIGNED REGIONS on input 4.233 + - if position is conserved (and isn't in unalignable region) then add 4.234 + (position_index, position_group, position_fraction) to self.conserved_positions 4.235 + 4.236 + +self.to_fasta() # Creates temporfary file tmp_block.fasta 4.237 + # sequence names are nums made from alignment sequence_indexes_a 4.238 + 4.239 + +self.create_rooted_tree() # Result is rooted_tree object self.rooted_tree 4.240 + self.get_vertical_block(self) 4.241 + # return block or None 4.242 + if None change self.state either to 4.243 + "aligned" or to 4.244 + "no_vertical_blocks" 4.245 + self.create_parenthetic_structure_with_EMBOSS_Kimura_UPGMA(self,block) 4.246 + # Use external programs 4.247 + # return brackets 4.248 + self.get_subset_from_rooted_tree(candidate_nodes) 4.249 + #returns list [[list_of_sequence_indexes_a][list_of_candidate_nodes]] 4.250 + self.add_aligned_block(aligned_block) 4.251 + 4.252 + self.downsize(aligned_block) 4.253 + self.upside(aligned_block) 4.254 + 4.255 + To get an aa, the following command is used: 4.256 + self.alignment.get_aa(sequense_index_a,position_index_a) 4.257 + 4.258 + POSTPONED 4.259 + self.get_subsets_from_unrooted_tree() 4.260 + 4.261 + """ 4.262 + 4.263 + 4.264 + def __init__(self , alignment, sequence_indexes_a, position_indexes_a, is_gapped=None): 4.265 + self.alignment=alignment 4.266 + self.sequence_indexes_a=sequence_indexes_a 4.267 + self.position_indexes_a=position_indexes_a 4.268 + if is_gapped==None: self.block_of_gaps=self._delete_empty_positions() 4.269 + else: self.block_of_gaps=None 4.270 +#************* 4.271 +# self.position_indexes_p=self.delete_empty_positions() 4.272 + def _delete_empty_positions(self): 4.273 +#************* 4.274 + """ 4.275 + Author: AAl 4.276 + State: seems to work 4.277 + 4.278 + Delete all empty positions in the block. 4.279 + Return list of position indexes in initial - with empty positions - state 4.280 + Empty positions appear in horizontal sub-block. 4.281 + Therefore that list contains indexes of all positions in parent block 4.282 + """ 4.283 + new_position_indexes=[] # indexes in the alignment are shown 4.284 + number_of_empty_positions=0 4.285 + position_indexes_in_parent=[] # for each new+position its index before deletion is shown# 4.286 + positions_in_gap_block=[] 4.287 + for index in range(len(self.position_indexes_a)): 4.288 + position_index=self.position_indexes_a[index] 4.289 + flag=0 # if empty" 4.290 + for sequence_index in self.sequence_indexes_a: 4.291 + if self.alignment.get_aa(sequence_index,position_index) <> None: 4.292 + flag=1 4.293 + break 4.294 + if flag <> 0 : 4.295 + new_position_indexes.append(position_index) 4.296 + position_indexes_in_parent.append(index) 4.297 + else: 4.298 + positions_in_gap_block.append(position_index) 4.299 + self.position_indexes_a = new_position_indexes 4.300 + if len(positions_in_gap_block)!=0: 4.301 + block_of_gaps=Block(self.alignment, self.sequence_indexes_a, positions_in_gap_block, 1) 4.302 + block_of_gaps.state="no_alignment" 4.303 + return block_of_gaps 4.304 + else: 4.305 + return None 4.306 + # Seems that number_of_empty_positions is useless; what for do we store position_indexes_in_parent if we alter the position_indexes_a? I've killed it - Burkov 4.307 + 4.308 + def to_fasta(self,f): 4.309 + """ 4.310 + Author: AAl 4.311 + State: 4.312 + Creates temporary file "temp_block.fasta" in current directory 4.313 + Sequence names are sequence num's (sequence_index+1) 4.314 + """ 4.315 + for i in range(len(self.sequence_indexes_a)): 4.316 + sequence="" 4.317 + f.write(">"+str(self.sequence_indexes_a[i]+1)+"\n") 4.318 + for j in self.position_indexes_a: 4.319 + if self.alignment.get_aa(self.sequence_indexes_a[i],j): 4.320 + sequence=sequence+self.alignment.get_aa(self.sequence_indexes_a[i],j) 4.321 + else: 4.322 + sequence=sequence+"-" 4.323 + for k in range( (len(sequence)-1)/60+1 ): 4.324 + sequence_line=sequence[k*60:k*60+60] 4.325 + f.write(sequence_line+"\n") 4.326 + 4.327 + return [len(self.sequence_indexes_a),len(self.position_indexes_a)] 4.328 + 4.329 + 4.330 + def create_distance_matrix(self, method="simple"): 4.331 + ''' 4.332 + create_distance_matrix 4.333 + return object of class Phy_matrix containing distances between sequences of the block 4.334 + Author: S.A.S. 4.335 + ''' 4.336 + import phylip 4.337 + from phylip import Phy_matrix 4.338 + mat=Phy_matrix(()) 4.339 + for i1 in range(len(self.sequence_indexes_a)): 4.340 + mat=mat.addobject(str(self.sequence_indexes_a[i1]+1)) 4.341 + for i2 in range(i1): 4.342 + npositions=0 4.343 + distance=0.0 4.344 + for j in self.position_indexes_a: 4.345 + #aa1 = self.alignment.get_aa(i1,j) 4.346 + #aa2 = self.alignment.get_aa(i2,j) 4.347 + aa1 = self.alignment.get_aa(self.sequence_indexes_a[i1],j) 4.348 + aa2 = self.alignment.get_aa(self.sequence_indexes_a[i2],j) 4.349 + if aa1 and aa2: 4.350 + npositions += 1 4.351 + if aa1.upper() != aa2.upper(): 4.352 + distance += 1.0 4.353 + if npositions == 0: 4.354 + mat=mat.setdist(str(self.sequence_indexes_a[i1]+1), str(self.sequence_indexes_a[i2]+1), 1.0) 4.355 + else: 4.356 + mat=mat.setdist(str(self.sequence_indexes_a[i1]+1), str(self.sequence_indexes_a[i2]+1), distance/npositions) 4.357 + return mat 4.358 + 4.359 + def create_rooted_tree(self, mode="external"): 4.360 + """ 4.361 + create_rooted_tree 4.362 + if mode = "external" (default) create parenthetic structure by external programs fprotdist and fneighbor 4.363 + if mode = "semiexternal" create matrix by internal method create_distance_matrix(), then parenthetic structure by fneighbor 4.364 + create rooted_tree object 4.365 + parenthetic_structure to tree 4.366 + """ 4.367 + if mode == "external": 4.368 + f=open("./jobs/temptree.fasta","w") 4.369 + self.to_fasta(f) 4.370 + f.close() 4.371 + 4.372 + os.system("fprotdist ./jobs/temptree.fasta -out ./jobs/temptree.dist -method d 2>>./jobs/temptree.log >>./jobs/temptree.log ") 4.373 + os.system("fneighbor ./jobs/temptree.dist -outfile ./jobs/temptree.nj -outtreefile ./jobs/temptree.tre -treetype u 2>>./jobs/temptree.log >>./jobs/temptree.log") 4.374 + f=open("./jobs/temptree.tre", "r") 4.375 + tree=f.readlines() 4.376 + f.close() 4.377 + line="" 4.378 + parenthetic_structure=line.join(tree).replace("\n","").replace("\r","").replace("\t","").replace(" ","") 4.379 + elif mode == "semiexternal": 4.380 + mat = self.create_distance_matrix() 4.381 + #if mat.max() == 0: return False 4.382 + f=open("./jobs/temptree.dist","w") 4.383 + f.write(str(mat)) 4.384 + f.close() 4.385 + os.system("fneighbor ./jobs/temptree.dist -outfile ./jobs/temptree.nj -outtreefile ./jobs/temptree.tre -treetype u 2>>./jobs/temptree.log >>./jobs/temptree.log") 4.386 + f=open("./jobs/temptree.tre", "r") 4.387 + tree=f.readlines() 4.388 + f.close() 4.389 + line="" 4.390 + parenthetic_structure=line.join(tree).replace("\n","").replace("\r","").replace("\t","").replace(" ","") 4.391 + else: 4.392 + raise "Method "+mode+" unknown" 4.393 + # end if - elif 4.394 + self.rooted_tree=Rooted_tree() 4.395 + self.rooted_tree.parenthetic_structure_to_tree(parenthetic_structure) 4.396 + os.system("rm ./jobs/temptree.*") 4.397 + return True 4.398 + #sys.exit() 4.399 + 4.400 + def detect_gapped_positions(self): 4.401 + """ 4.402 + Author: Burkov 4.403 + Status: one test passed; 4.404 + 4.405 + constructs: 4.406 + self.gapped_positions 4.407 + self.unalignable_regions 4.408 + 4.409 + returns: 4.410 + non_gapped_positions (list of positions OUTSIDE unalignable_regions, that are non-gapped) 4.411 + 4.412 + >>> import block 4.413 + >>> test_alignment=alignment.Alignment([]) 4.414 + >>> test_alignment.get_from_file("test_1.msf") 4.415 + >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a) 4.416 + >>> block1.detect_gapped_positions() 4.417 + [0, 1, 2, 3, 4, 5, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 32, 33, 34, 35, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] 4.418 + """ 4.419 + self.unalignable_regions=[] 4.420 + self.gapped_positions=[] 4.421 + non_gapped_positions=[] 4.422 + #search for gapped postitions 4.423 + for i in self.position_indexes_a: 4.424 + is_gapped=0 4.425 + for j in self.sequence_indexes_a: 4.426 + if self.alignment.get_aa(j,i)==None: 4.427 + is_gapped=1 4.428 + break 4.429 + if is_gapped==1: 4.430 + self.gapped_positions.append(i) 4.431 + #search for unalignable regions 4.432 + #print "gapped_positions = "+str(self.gapped_positions) 4.433 + #first, consider the beginning of the analyzed block 4.434 + for i in self.position_indexes_a: 4.435 + if len(self.gapped_positions)>0:#proove that the accepted block's beginning is longer than configure.minimal_block_width 4.436 + if self.position_indexes_a.index(self.gapped_positions[0]) - self.position_indexes_a[0]+1 >= configure.minimal_block_width: 4.437 + if i>=self.gapped_positions[0]: break 4.438 + else: non_gapped_positions.append(i) 4.439 + else:#if no gaps at all, the block should be longer than configure.minimal_block_width 4.440 + if len(self.position_indexes_a) >= configure.minimal_block_width: 4.441 + non_gapped_positions.append(i) 4.442 + #consider all the intervals between gaps: they should be greater than 4.443 + flag=0 4.444 + for i in self.gapped_positions: 4.445 + if flag==0: 4.446 + left_margin=i 4.447 + right_margin=i 4.448 + flag=1 4.449 + elif flag==1: 4.450 + if self.position_indexes_a.index(i) - self.position_indexes_a.index(right_margin)-1<configure.minimal_block_width: 4.451 + right_margin=i 4.452 + else: 4.453 + self.unalignable_regions.append([left_margin,right_margin,"gapped",None,None]) 4.454 + for x in range(self.position_indexes_a.index(right_margin)+1, self.position_indexes_a.index(i)): 4.455 + non_gapped_positions.append(self.position_indexes_a[x]) 4.456 + left_margin=i 4.457 + right_margin=i 4.458 + #consider the ending of the analyzed block 4.459 + if flag==1: 4.460 + if len(self.position_indexes_a)-1+1-self.position_indexes_a.index(right_margin)-1<configure.minimal_block_width: 4.461 + right_margin=len(self.position_indexes_a)-1 4.462 + self.unalignable_regions.append([left_margin,right_margin,"gapped",None,None]) 4.463 + else: 4.464 + self.unalignable_regions.append([left_margin,right_margin,"gapped",None,None]) 4.465 + for x in range(self.position_indexes_a.index(right_margin)+1, len(self.position_indexes_a)-1+1): 4.466 + non_gapped_positions.append(self.position_indexes_a[x]) 4.467 + #fill in the shortest and longest 4.468 + for x in self.unalignable_regions: 4.469 + longest=0 4.470 + shortest=x[1]-x[0]+1 4.471 + for j in self.sequence_indexes_a: 4.472 + current=0 4.473 + for i in range(x[0],x[1]+1): 4.474 + if self.alignment.get_aa(j,i)!=None: current=current+1 4.475 + if current>longest: longest=current 4.476 + if current<shortest: shortest=current 4.477 + x[3]=shortest 4.478 + x[4]=longest 4.479 + 4.480 + return non_gapped_positions 4.481 + 4.482 + 4.483 + def detect_conserved_positions(self, non_gapped_positions): 4.484 + """ 4.485 + Author: Burkov 4.486 + Status: in progress 4.487 + 4.488 + requires self.rooted_tree 4.489 + constucts self.conserved_positions 4.490 + ???should return list of alignable regions??? 4.491 + 4.492 + >>> import block 4.493 + >>> test_alignment=alignment.Alignment([]) 4.494 + >>> test_alignment.get_from_file("test_1.msf") 4.495 + >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a) 4.496 + >>> block1.create_rooted_tree() 4.497 + >>> block1.detect_conserved_positions(block1.detect_gapped_positions()) 4.498 + """ 4.499 + 4.500 + #search for conserved_positions (position_index, position_consensus, position_percent) 4.501 + self.conserved_positions=[] 4.502 + #self.rooted_tree.calculate_root_subtree_sum_of_lengthes() 4.503 + total_sum_of_lengthes=self.rooted_tree.root.content[1] 4.504 + if total_sum_of_lengthes!=0.0: 4.505 + for i in non_gapped_positions: 4.506 + current_leader=None 4.507 + for k,k_ in configure.groups_of_aa_1.items(): 4.508 + #search for the sequences_which_contain_this_group 4.509 + sequences_which_contain_this_group=[] 4.510 + for j in self.sequence_indexes_a: 4.511 + if k_[0].lower().rfind(self.alignment.get_aa(j,i))!=-1: 4.512 + sequences_which_contain_this_group.append(j) 4.513 + #search for the leaves corresponding to sequences_which_contain_this_group 4.514 + set_of_leaves=[] 4.515 + for j in sequences_which_contain_this_group: 4.516 + for l in self.rooted_tree.leaves: 4.517 + if str(j+1)==l.content[0]: 4.518 + set_of_leaves.append(l) 4.519 + break 4.520 + if self.rooted_tree.set_of_leaves_to_sum_of_lengthes_of_root_subtree(set_of_leaves)/total_sum_of_lengthes>=configure.conservation_threshold: 4.521 + #if i==372: 4.522 + #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" set_of_leaves = "+str(set_of_leaves)+", k = "+str(k)+", total_sum_of_lengthes = "+str(total_sum_of_lengthes)+", self.rooted_tree.set_of_leaves_to_sum_of_lengthes_of_root_subtree(set_of_leaves) = "+str(self.rooted_tree.set_of_leaves_to_sum_of_lengthes_of_root_subtree(set_of_leaves))+", sequences_which_contain_this_group = "+str(sequences_which_contain_this_group) 4.523 + #for a in self.rooted_tree.depth_first_traversal(): 4.524 + #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" nesting depth = "+str(a[1])+", parental_edge = "+str(a[0].parental_edge)+", content = "+str(a[0].content) 4.525 + if current_leader==None: 4.526 + current_leader = (i,k,self.rooted_tree.set_of_leaves_to_sum_of_lengthes_of_root_subtree(set_of_leaves)/total_sum_of_lengthes) 4.527 + else: 4.528 + if configure.groups_of_aa_1[current_leader[1]][2]>configure.groups_of_aa_1[k][2]: 4.529 + current_leader = (i,k,self.rooted_tree.set_of_leaves_to_sum_of_lengthes_of_root_subtree(set_of_leaves)/total_sum_of_lengthes) 4.530 + if current_leader!=None: 4.531 + self.conserved_positions.append(current_leader) 4.532 + else: 4.533 + for i in non_gapped_positions: 4.534 + for k,k_ in configure.groups_of_aa_1.items(): 4.535 + if len(k_[0])==1 and k_[0].lower().rfind(self.alignment.get_aa(self.sequence_indexes_a[0],i))!=-1: 4.536 + self.conserved_positions.append((i,k,1.0)) 4.537 + 4.538 + 4.539 + def check_significance(self, subblock1_conserved_positions, subblock2_conserved_positions, formula=1): 4.540 + """ 4.541 + Author: Burkov 4.542 + State: scheduled 4.543 + 4.544 + Assumes, that rooted_trees for subblocks are already constructed 4.545 + """ 4.546 + if formula==1: 4.547 + if self._formula_1(subblock1_conserved_positions, subblock2_conserved_positions): return True 4.548 + else: return False 4.549 + elif formula==2: 4.550 + pass 4.551 + elif formula==3: 4.552 + pass 4.553 + 4.554 + 4.555 + def _formula_1(self, subblock1_conserved_positions, subblock2_conserved_positions): 4.556 + """ 4.557 + This is the implementation of the most primitive formula 4.558 + """ 4.559 + import math 4.560 + #calculate the sum of logprobabilities of groups in actual block 4.561 + actual_logprobability=0 4.562 + for i in range(len(self.conserved_positions)): 4.563 + actual_logprobability=actual_logprobability+configure.groups_of_aa_1[self.conserved_positions[i][1]][2] 4.564 + #calculate the length and maximal_number_of_conserved_positions 4.565 + if len(subblock1_conserved_positions)>len(subblock2_conserved_positions): maximal_possible_number_of_conserved_positions=len(subblock2_conserved_positions) 4.566 + else: maximal_possible_number_of_conserved_positions=len(subblock1_conserved_positions) 4.567 + length=0 4.568 + for i in self.position_indexes_a: 4.569 + length=length+1 4.570 + if (len(subblock1_conserved_positions)+len(subblock2_conserved_positions))<length: minimal_possible_number_of_conserved_positions=0 4.571 + else: minimal_possible_number_of_conserved_positions=len(subblock1_conserved_positions)+len(subblock2_conserved_positions)-length 4.572 + if minimal_possible_number_of_conserved_positions<len(self.conserved_positions): minimal_possible_number_of_conserved_positions=len(self.conserved_positions) 4.573 + #denominator and numerator outcomes counts 4.574 + denominator = round(math.exp(Lgamma(length+1)-Lgamma(len(subblock1_conserved_positions)+1)-Lgamma(length-len(subblock1_conserved_positions)+1)+Lgamma(length+1)-Lgamma(len(subblock2_conserved_positions)+1)-Lgamma(length-len(subblock2_conserved_positions)+1))) 4.575 + numerator=0; 4.576 + for a in range(minimal_possible_number_of_conserved_positions, maximal_possible_number_of_conserved_positions+1): 4.577 + numerator=numerator+round(math.exp(Lgamma(length+1)-Lgamma(len(subblock1_conserved_positions)-a+1)-Lgamma(length-len(subblock1_conserved_positions)-len(subblock2_conserved_positions)+a+1)-Lgamma(len(subblock2_conserved_positions)-a+1)-Lgamma(a+1))) 4.578 + #a rough heuristic formula is implemented here 4.579 + probability = numerator / denominator * math.pow(2,actual_logprobability) 4.580 + if probability>=configure.probability_threshold: 4.581 + #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" SPLIT CONDEMNED" 4.582 + return False 4.583 + else: 4.584 + #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" SPLIT APPROVED" 4.585 + return True 4.586 + #MAY BE I SHOULD MULTIPLY BY C_a^self.conserved_positions, but there's a risk of probability exceeding 1 4.587 + 4.588 + def traverse_block_splits(self): 4.589 + r""" 4.590 + Author: Burkov 4.591 + State: scheduled 4.592 + 4.593 + Consequently traverses the nodes and vertices of self.rooted_tree, using rooted_tree.next_vertex, 4.594 + at each step splits the block onto two subblocks, finds out, whether their alignment is relevant 4.595 + or not and if it is, goes to the next step. If the alignment is irrelevant at least at one step, 4.596 + the block is discarded. Otherwise, the block is considered relevant. 4.597 + 4.598 + Requires self.create_rooted_tree and self.rooted_tree.calculate_root_subtree_sum_of_lengthes 4.599 + to be called first. 4.600 + 4.601 + >>> import block 4.602 + >>> import alignment 4.603 + >>> test_alignment=alignment.Alignment([]) 4.604 + >>> test_alignment.get_from_file("test_1.msf") 4.605 + >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a) 4.606 + >>> block1.create_rooted_tree() 4.607 + >>> block1.rooted_tree.calculate_root_subtree_sum_of_lengthes() 4.608 + >>> block1.traverse_block_splits() 4.609 + """ 4.610 + candidate_vertices=[] 4.611 + for i in self.rooted_tree.root.child_vertices: 4.612 + candidate_vertices.append(i) 4.613 + 4.614 + self.detect_conserved_positions(self.detect_gapped_positions()) 4.615 + #iterations 4.616 + flag=1 4.617 + while candidate_vertices!=[]: 4.618 + current_vertex,candidate_vertices=self.rooted_tree.next_vertex(candidate_vertices) 4.619 + if current_vertex.child_vertices==[]: 4.620 + #if leaf 4.621 + sequence_indexes_a1=[int(current_vertex.content[0])-1] 4.622 + complement_set_of_leaves=self.rooted_tree.complement_set_of_leaves([current_vertex]) 4.623 + sequence_indexes_a2=[] 4.624 + for i in complement_set_of_leaves: 4.625 + sequence_indexes_a2.append(int(i.content[0])-1) 4.626 + subblock1=Block(self.alignment, sequence_indexes_a1, self.position_indexes_a) 4.627 + subblock2=Block(self.alignment, sequence_indexes_a2, self.position_indexes_a) 4.628 + #subblock1.rooted_tree=self.rooted_tree.chop_the_tree(current_vertex) 4.629 + subblock2.rooted_tree=self.rooted_tree.chop_the_tree(complement_set_of_leaves) 4.630 + #print "mark 1" 4.631 + #print "current_vertex_content = "+str(current_vertex.content) 4.632 + #print "subblock1_sequence_indexes_a = "+str(subblock1.sequence_indexes_a) 4.633 + subblock1._initialization_of_conserved_positions_for_single_sequences() 4.634 + #print "mark 2" 4.635 + #print "subblock1.sequence_indexes_a = "+str(subblock1.sequence_indexes_a) 4.636 + #print "subblock2.sequence_indexes_a = "+str(subblock2.sequence_indexes_a) 4.637 + if len(subblock2.sequence_indexes_a)==1: 4.638 + subblock2._initialization_of_conserved_positions_for_single_sequences() 4.639 + else: 4.640 + subblock2.detect_conserved_positions(subblock2.detect_gapped_positions()) 4.641 + if self.check_significance(subblock1.conserved_positions,subblock2.conserved_positions,1): 4.642 + pass 4.643 + else: 4.644 + flag=0 4.645 + break 4.646 + else: 4.647 + #if node 4.648 + set_of_leaves=self.rooted_tree.node_to_set_of_leaves(current_vertex) 4.649 + sequence_indexes_a1=[] 4.650 + for i in set_of_leaves: 4.651 + sequence_indexes_a1.append(int(i.content[0])-1) 4.652 + complement_set_of_leaves=self.rooted_tree.complement_set_of_leaves(set_of_leaves) 4.653 + sequence_indexes_a2=[] 4.654 + for i in complement_set_of_leaves: 4.655 + sequence_indexes_a2.append(int(i.content[0])-1) 4.656 + subblock1=Block(self.alignment, sequence_indexes_a1, self.position_indexes_a) 4.657 + subblock2=Block(self.alignment, sequence_indexes_a2, self.position_indexes_a) 4.658 + subblock1.rooted_tree=self.rooted_tree.chop_the_tree(set_of_leaves) 4.659 + subblock2.rooted_tree=self.rooted_tree.chop_the_tree(complement_set_of_leaves) 4.660 + #print "mark 3" 4.661 + #print "subblock1_sequence_indexes_a = "+str(subblock1.sequence_indexes_a) 4.662 + if len(subblock1.sequence_indexes_a)==1: 4.663 + subblock1._initialization_of_conserved_positions_for_single_sequences() 4.664 + else: 4.665 + subblock1.detect_conserved_positions(subblock1.detect_gapped_positions()) 4.666 + #print "mark 4" 4.667 + if len(subblock2.sequence_indexes_a)==1: 4.668 + subblock2._initialization_of_conserved_positions_for_single_sequences() 4.669 + else: 4.670 + #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" subblock2.__dict__ = "+str(subblock2.__dict__) 4.671 + subblock2.detect_conserved_positions(subblock2.detect_gapped_positions()) 4.672 + if self.check_significance(subblock1.conserved_positions,subblock2.conserved_positions,1): 4.673 + pass 4.674 + else: 4.675 + flag=0 4.676 + break 4.677 + if flag==1: 4.678 + #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" SUCCESS" 4.679 + return True 4.680 + else: 4.681 + #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" FAILURE" 4.682 + return False 4.683 + 4.684 + 4.685 + def _initialization_of_conserved_positions_for_single_sequences(self): 4.686 + """ 4.687 + A function for internal usage only 4.688 + """ 4.689 + 4.690 + self.conserved_positions=[] 4.691 + for i in self.position_indexes_a: 4.692 + if self.alignment.get_aa(0,i)!=None: 4.693 + self.conserved_positions.append((i,self.alignment.get_aa(0,i),1.0)) 4.694 + 4.695 + def get_vertical_block(self): 4.696 + """ 4.697 + alignable_regions # sorted list of tuples: 4.698 + (from, to,from_index_in_conserved,to_index_in_conserved) 4.699 + from, to are indexes of positions, positions must be conserved 4.700 + from_index_conserved, to_index_conserved are 4.701 + indexes in self.conserved_positions 4.702 + returns next alignable region 4.703 + 4.704 + >>> import block 4.705 + >>> import alignment 4.706 + >>> test_alignment=alignment.Alignment([]) 4.707 + >>> test_alignment.get_from_file("test_3.msf") 4.708 + >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a) 4.709 + >>> block1.create_rooted_tree() 4.710 + >>> block1.rooted_tree.calculate_root_subtree_sum_of_lengthes() 4.711 + >>> block1.get_vertical_block() 4.712 + """ 4.713 + import copy 4.714 + self.detect_conserved_positions(self.detect_gapped_positions()) 4.715 + self.detect_alignable_regions() 4.716 + #print "self.alignable_regions = "+str(self.alignable_regions) 4.717 + for i in self.alignable_regions: 4.718 + if len(i)>0: 4.719 + position_indexes_of_current_subblock=[] 4.720 + for j in self.position_indexes_a[self.position_indexes_a.index(i[0][0]):self.position_indexes_a.index(i[0][1])+1]: 4.721 + position_indexes_of_current_subblock.append(j) 4.722 + current_subblock=Block(self.alignment, self._leaves_to_indexes_of_sequences(), position_indexes_of_current_subblock) 4.723 + current_subblock.rooted_tree=copy.deepcopy(self.rooted_tree)#this is a plug 4.724 + #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" current_subblock.__dict__ = "+str(current_subblock.__dict__) 4.725 + if current_subblock.traverse_block_splits(): 4.726 + current_subblock.state="aligned" 4.727 + return current_subblock 4.728 + return None 4.729 + 4.730 + 4.731 + def detect_alignable_regions(self): 4.732 + """ 4.733 + Should be run after detect_gapped_positions and detect_conserved_positions 4.734 + Creates: 4.735 + self.alignable_regions=[(from,to), (from1,to1), (from2, to2)] 4.736 + """ 4.737 + #!!! SHOULD CONSIDER THE CASES, WHEN THERE ARE NO GAPS OR NO CONSERVED POSITIONS !!! 4.738 + self.alignable_regions=[] 4.739 + #first special iteration 4.740 + left_margin=self.position_indexes_a[0]-1 4.741 + if len(self.gapped_positions) == 0: 4.742 + right_margin=self.position_indexes_a[len(self.position_indexes_a)-1]+1 4.743 + conserved_positions_on_current_interval=[] 4.744 + for j in self.conserved_positions: 4.745 + if j[0]>left_margin and j[0]<right_margin: 4.746 + conserved_positions_on_current_interval.append(j[0]) 4.747 + self.alignable_regions.append(self._find_pairs(conserved_positions_on_current_interval)) 4.748 + else: 4.749 + right_margin=self.gapped_positions[0] 4.750 + conserved_positions_on_current_interval=[] 4.751 + for j in self.conserved_positions: 4.752 + if j[0]>left_margin and j[0]<right_margin: 4.753 + conserved_positions_on_current_interval.append(j[0]) 4.754 + self.alignable_regions.append(self._find_pairs(conserved_positions_on_current_interval)) 4.755 + #normal iterations 4.756 + for i in range(1,len(self.gapped_positions)-1): 4.757 + left_margin=self.gapped_positions[i] 4.758 + right_margin=self.gapped_positions[i+1] 4.759 + conserved_positions_on_current_interval=[] 4.760 + for j in self.conserved_positions: 4.761 + if j[0]>left_margin and j[0]<right_margin: 4.762 + conserved_positions_on_current_interval.append(j[0]) 4.763 + self.alignable_regions.append(self._find_pairs(conserved_positions_on_current_interval)) 4.764 + #last special iteration 4.765 + conserved_positions_on_current_interval=[] 4.766 + left_margin=self.gapped_positions[-1] 4.767 + right_margin=self.position_indexes_a[-1]+1 4.768 + for j in self.conserved_positions: 4.769 + if j[0]>left_margin and j[0]<right_margin: 4.770 + conserved_positions_on_current_interval.append(j[0]) 4.771 + self.alignable_regions.append(self._find_pairs(conserved_positions_on_current_interval)) 4.772 + 4.773 + 4.774 + def _find_pairs(self, conserved_positions_on_current_interval): 4.775 + """ 4.776 + Internal method 4.777 + The output is sorted so, that the length of the output element decreases 4.778 + 4.779 + >>> import block 4.780 + >>> import alignment 4.781 + >>> test_alignment=alignment.Alignment([]) 4.782 + >>> test_alignment.get_from_file("test_3.msf") 4.783 + >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a) 4.784 + >>> block1.create_rooted_tree() 4.785 + >>> block1.rooted_tree.calculate_root_subtree_sum_of_lengthes() 4.786 + >>> block1.detect_conserved_positions(block1.detect_gapped_positions()) 4.787 + >>> block1._find_pairs([4,5,20,22]) 4.788 + [(4, 22), (5, 22), (5, 20)] 4.789 + """ 4.790 + output=[] 4.791 + if len(conserved_positions_on_current_interval)>1: 4.792 + flag=0 4.793 + i=0 4.794 + j=(len(conserved_positions_on_current_interval))-1 4.795 + while (i<j): 4.796 + output.append((conserved_positions_on_current_interval[i],conserved_positions_on_current_interval[j])) 4.797 + length1=0 4.798 + length2=0 4.799 + for a in range(self.position_indexes_a.index(conserved_positions_on_current_interval[i+1]), self.position_indexes_a.index(conserved_positions_on_current_interval[j])+1): 4.800 + length1+=1 4.801 + for a in range(self.position_indexes_a.index(conserved_positions_on_current_interval[i]), self.position_indexes_a.index(conserved_positions_on_current_interval[j-1])+1): 4.802 + length2+=1 4.803 + if length1>length2: i+=1 4.804 + else: j-=1 4.805 + return output 4.806 + 4.807 + def downsize(self,aligned_block): 4.808 + """ 4.809 + Should be written 4.810 + """ 4.811 + pass 4.812 + def upsize(self,aligned_block): 4.813 + """ 4.814 + Once tested 4.815 + 4.816 + >>> import alignment 4.817 + >>> import block 4.818 + >>> test_alignment = alignment.Alignment([]) 4.819 + >>> test_alignment.get_from_file("test_3.msf") 4.820 + >>> block1=block.Block(test_alignment, [2,3,4,5], [14,15,16,17,18,19]) 4.821 + >>> aligned_block = block.Block(test_alignment, [2,3,4], [14,15,16,17,18]) 4.822 + >>> aligned_block.create_rooted_tree() 4.823 + >>> aligned_block.rooted_tree.calculate_root_subtree_sum_of_lengthes() 4.824 + >>> aligned_block.detect_conserved_positions(aligned_block.detect_gapped_positions()) 4.825 + >>> output_block=block1.upsize(aligned_block) 4.826 + >>> print output_block.__dict__ 4.827 + """ 4.828 + added_sequences=[] 4.829 + for i in self.sequence_indexes_a: 4.830 + if i in aligned_block.sequence_indexes_a: 4.831 + pass 4.832 + else: 4.833 + flag=1; 4.834 + for j in range(aligned_block.position_indexes_a[0], aligned_block.position_indexes_a[-1]+1): 4.835 + if j in self.position_indexes_a: 4.836 + if j in aligned_block.position_indexes_a: 4.837 + if self.alignment.get_aa(i, j)==None: 4.838 + flag=0; 4.839 + else: 4.840 + if self.alignment.get_aa(i, j)!=None: 4.841 + flag=0; 4.842 + if flag==1: 4.843 + one_sequence_block=Block(self.alignment, [i], aligned_block.position_indexes_a) 4.844 + one_sequence_block._initialization_of_conserved_positions_for_single_sequences() 4.845 + united_block_sequence_indexes_a=aligned_block.sequence_indexes_a[:] 4.846 + united_block_sequence_indexes_a.append(i) 4.847 + united_block_sequence_indexes_a.sort() 4.848 + united_block=Block(self.alignment, united_block_sequence_indexes_a, aligned_block.position_indexes_a) 4.849 + united_block.create_rooted_tree("semiexternal") 4.850 + united_block.rooted_tree.calculate_root_subtree_sum_of_lengthes() 4.851 + united_block.detect_conserved_positions(self.detect_gapped_positions()) 4.852 + if united_block.check_significance(one_sequence_block.conserved_positions, aligned_block.conserved_positions,1): 4.853 + added_sequences.append(i); 4.854 + output_block_sequence_indexes_a=aligned_block.sequence_indexes_a[:] 4.855 + output_block_sequence_indexes_a.extend(added_sequences) 4.856 + output_block=Block(self.alignment, output_block_sequence_indexes_a, aligned_block.position_indexes_a) 4.857 + #!!!Bugfix: aligned_block positions_indexes_a should be changed to something, probably t the span between indexes of the first and last found conserved positions!!! 4.858 + #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" output_block.__dict__ = "+str(output_block.__dict__) 4.859 + output_block.create_rooted_tree("semiexternal") 4.860 + output_block.rooted_tree.calculate_root_subtree_sum_of_lengthes() 4.861 + output_block.detect_conserved_positions(output_block.detect_gapped_positions()) 4.862 + output_block.state="aligned" 4.863 + return output_block 4.864 + 4.865 + 4.866 + def show(self): 4.867 + """ 4.868 + Should be designed 4.869 + """ 4.870 + 4.871 + def _leaves_to_indexes_of_sequences(self): 4.872 + """ 4.873 + This is an internal method 4.874 + """ 4.875 + indexes_of_sequences=[] 4.876 + for i in self.rooted_tree.leaves: 4.877 + indexes_of_sequences.append(int(i.content[0])-1) 4.878 + indexes_of_sequences.sort() 4.879 + return indexes_of_sequences 4.880 + 4.881 + def get_aligned_block(self): 4.882 + """ 4.883 + Author: AAl (Beware, I've changed some parts of the method, even including the interface - Burkov) 4.884 + State: 4.885 + 4.886 + Returns None or aligned block and several new blocks in stack. 4.887 + If None, the state of current block is set "no_alignment" 4.888 + 4.889 + >>> import alignment 4.890 + >>> import block 4.891 + >>> test_alignment=alignment.Alignment([]) 4.892 + >>> test_alignment.get_from_file("test_3.msf") 4.893 + >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a) 4.894 + >>> block1.get_aligned_block() 4.895 + """ 4.896 +# Create tree 4.897 + if self.create_rooted_tree("semiexternal") == False: 4.898 + print "Content-type: text/plain\n" 4.899 + print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" Tree construction failed!!!" 4.900 + sys.exit() 4.901 + else: 4.902 + #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" self.__dict__ = "+str(self.__dict__) 4.903 + self.rooted_tree.calculate_root_subtree_sum_of_lengthes() 4.904 + candidate_nodes=[self.rooted_tree.root] 4.905 + while len(candidate_nodes) > 0: 4.906 + # Get current_node and new candidate_nodes 4.907 + current_node, candidate_nodes = self.rooted_tree.next_node(candidate_nodes) 4.908 + # Define leaves_indexes for current_node 4.909 + set_of_leaves=self.rooted_tree.node_to_set_of_leaves(current_node) 4.910 + leaves_indexes=[] 4.911 + for leaf in set_of_leaves: 4.912 + leaves_indexes.append(int(leaf.content[0])-1) 4.913 + leaves_indexes.sort() 4.914 + 4.915 + if len(set_of_leaves) >= 2: # 2=minimal number of sequences in aligned block 4.916 + # Define horizontal_block 4.917 + horizontal_block=Block(self.alignment, leaves_indexes, self.position_indexes_a ) 4.918 + #for a in self.rooted_tree.depth_first_traversal(): 4.919 + # print "Original rooted tree::nesting depth = "+str(a[1])+", parental_edge = "+str(a[0].parental_edge)+", content = "+str(a[0].content) 4.920 + if horizontal_block.position_indexes_a!=[]: horizontal_block.rooted_tree=self.rooted_tree.chop_the_tree(set_of_leaves) 4.921 + else: continue 4.922 + # Get vertical block in horizontal block 4.923 + #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" horizontal_block.__dict__ = "+str(horizontal_block.__dict__) 4.924 + aligned_block=horizontal_block.get_vertical_block() 4.925 + if aligned_block <> None: 4.926 + 4.927 + # Refine aligned_block 4.928 + #self.upsize(aligned_block) 4.929 + # Reorganize blocks appropriately 4.930 + 4.931 + return aligned_block 4.932 + self.state = "no_alignment" 4.933 + return None 4.934 + 4.935 + 4.936 + 4.937 + def find_complementary_blocks(self, aligned_block_position_indexes_a, aligned_block_sequence_indexes_a): 4.938 + """ 4.939 + Author: Burkov 4.940 + State: in progress 4.941 + 4.942 + Given the aligned_block, this method returns 0, 1, 2, 3, 4, 5 or 6 nonempty blocks, 4.943 + which compose the "self" block along with the aligned_block 4.944 + 4.945 + >>> import alignment 4.946 + >>> import block 4.947 + >>> test_alignment=alignment.Alignment([]) 4.948 + >>> test_alignment.get_from_file("test_3.msf") 4.949 + >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a) 4.950 + >>> block2=block1.get_aligned_block() 4.951 + >>> block1.find_complementary_blocks(block2.position_indexes_a, block2.sequence_indexes_a) 4.952 + """ 4.953 + #A problem: we will never detect the unalignable regions 4.954 + #(SHOULD I REMEMBER THEM, WHEN I DELETE_EMPTY_POSITIONS AND RETURN THEM, WHEN I FIND THAT THE BLOCK IS ALIGNED?) 4.955 + 4.956 + output=[] 4.957 + import copy 4.958 + left_neighbor_position_indexes_a=[] 4.959 + right_neighbor_position_indexes_a=[] 4.960 + for i in self.position_indexes_a: 4.961 + if i<aligned_block_position_indexes_a[0]: 4.962 + left_neighbor_position_indexes_a.append(i) 4.963 + elif i>aligned_block_position_indexes_a[len(aligned_block_position_indexes_a)-1]: 4.964 + right_neighbor_position_indexes_a.append(i) 4.965 + vertical_neighbor_sequence_indexes_a=copy.deepcopy(self.sequence_indexes_a) 4.966 + for i in aligned_block_sequence_indexes_a: 4.967 + vertical_neighbor_sequence_indexes_a.remove(i) 4.968 + if left_neighbor_position_indexes_a != []: 4.969 + left_neighbor=Block(self.alignment, self.sequence_indexes_a, left_neighbor_position_indexes_a) 4.970 + left_neighbor.state="pending" 4.971 + if left_neighbor.position_indexes_a!=[]: output.append(left_neighbor) 4.972 + if left_neighbor.block_of_gaps!=None: output.append(left_neighbor.block_of_gaps) 4.973 + if vertical_neighbor_sequence_indexes_a != [] :#if no sequences or 1 sequence 4.974 + #remember to include the gapped positions into the vertical neighbor 4.975 + list_of_positions_in_vertical_neighbor=[] 4.976 + for i in range(aligned_block_position_indexes_a[0], aligned_block_position_indexes_a[-1]+1): 4.977 + list_of_positions_in_vertical_neighbor.append(i) 4.978 + vertical_neighbor=Block(self.alignment, vertical_neighbor_sequence_indexes_a, list_of_positions_in_vertical_neighbor) 4.979 + if len(vertical_neighbor_sequence_indexes_a) != 1: vertical_neighbor.state="pending" 4.980 + else: vertical_neighbor.state="no_alignment" 4.981 + if vertical_neighbor.position_indexes_a!=[]: output.append(vertical_neighbor) 4.982 + if vertical_neighbor.block_of_gaps!=None: output.append(vertical_neighbor.block_of_gaps) 4.983 + if right_neighbor_position_indexes_a != []: 4.984 + right_neighbor=Block(self.alignment, self.sequence_indexes_a, right_neighbor_position_indexes_a) 4.985 + right_neighbor.state="pending" 4.986 + if right_neighbor.position_indexes_a!=[]: output.append(right_neighbor) 4.987 + if right_neighbor.block_of_gaps!=None: output.append(right_neighbor.block_of_gaps) 4.988 + return output 4.989 + 4.990 + 4.991 +###### OPTIONAL 4.992 + def get_ss(self): 4.993 + """ 4.994 + Construct self.consensus_ss 4.995 + """ 4.996 + def get_important_aa(self): 4.997 + 4.998 + """ 4.999 + Construct self.important_aas 4.1000 + """ 4.1001 + def get_subsets_from_unrooted_tree(self): 4.1002 + """ 4.1003 + - Creats tree object 4.1004 + - Convert tree to list subsets of subset objects 4.1005 + - Sort subsets 4.1006 + Result is a list "subsets" 4.1007 + """ 4.1008 + subsets=[] 4.1009 + return subsets 4.1010 + 4.1011 + 4.1012 + 4.1013 + 4.1014 +############################################################################################################### 4.1015 + 4.1016 +if __name__ == "__main__": 4.1017 + print "Hello!" 4.1018 + import doctest 4.1019 + doctest.testmod()
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/sbbs_2009/modules/blocks.py Sat Jul 09 16:30:28 2011 +0400 5.3 @@ -0,0 +1,43 @@ 5.4 +import rooted_tree 5.5 + 5.6 +class Blocks(object): 5.7 + """ 5.8 + Author: AAl 5.9 + Status: designed 5.10 + 5.11 + Data: 5.12 + list_of_blocks is a list of all Block objects of an alignment 5.13 + a block is identified by its index in the list_of_blocks 5.14 + tree_of_blocks is a rooted_tree object for all blocks of an alignment 5.15 + tree_of_blocks.content = [block,...] 5.16 + -stack_of_blocks is a list of undelivered blocks 5.17 + the list is delivered as stack (I've killed it - no need in it - (Burkov)) 5.18 + aligned_blocks is a list of aligned blocks 5.19 + 5.20 + Methods 5.21 + self.show_blocks 5.22 + self.add_aligned block 5.23 + """ 5.24 + 5.25 + def __init__(self,alignment=None,list_of_blocks=None,aligned_blocks=None,tree_of_blocks=None): 5.26 + self.alignment=alignment 5.27 + self.list_of_blocks=list_of_blocks 5.28 + self.tree_of_blocks=tree_of_blocks 5.29 + self.aligned_blocks=aligned_blocks 5.30 + 5.31 + if self.list_of_blocks==None: 5.32 + self.list_of_blocks=[] 5.33 + if self.tree_of_blocks==None: 5.34 + self.tree_of_blocks=rooted_tree.Rooted_tree() 5.35 + if self.aligned_blocks==None: 5.36 + self.aligned_blocks=[] 5.37 + 5.38 + 5.39 + def add_aligned_block(self, parent_block, aligned_block): 5.40 + """ 5.41 + Should be written 5.42 + self.add_aligned_block(aligned_block) 5.43 + 5.44 + """ 5.45 + 5.46 +
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/sbbs_2009/modules/blocks_io.py Sat Jul 09 16:30:28 2011 +0400 6.3 @@ -0,0 +1,279 @@ 6.4 +#! /usr/bin/python 6.5 + 6.6 +import configure 6.7 +import block 6.8 +import blocks 6.9 +import alignment 6.10 +import pickle 6.11 + 6.12 +def tree_of_blocks_to_plain_file(tree_of_blocks, file_name, input_file_name): 6.13 + """ 6.14 + """ 6.15 + blocks=tree_of_blocks.depth_first_traversal() 6.16 + file_content="Alignment_id\tblock_#\tsequence_name\tpositions_indexes\n" 6.17 + for i, i_ in enumerate(blocks): 6.18 + if i_[0].content[0].state=="aligned": 6.19 + for j in i_[0].content[0].sequence_indexes_a: 6.20 + file_content+=str(input_file_name)+"\t"+str(i)+"\t"+str(i_[0].content[0].alignment.sequences[j].name)+"\t" 6.21 + buffer="" 6.22 + for k in i_[0].content[0].position_indexes_a: 6.23 + buffer+=str(k)+" " 6.24 + if (len(buffer)!=0): buffer=buffer[:-1] 6.25 + file_content+=buffer+"\n" 6.26 + file=open(file_name,"w") 6.27 + file.write(file_content) 6.28 + file.close() 6.29 + 6.30 + 6.31 +def tree_of_blocks_to_html(input_alignment_file_name, tree_of_blocks, alignment, main_file_name="main.html", directry_file_name="directry.html", blocks_file_name="blocks.html"): 6.32 + """ 6.33 + This function shall create 3 html documents - frameholder and 2 frames: 6.34 + directry.html - contains tree of blocks 6.35 + blocks.html - contains tables of alignment with marked blocks and conserved positions 6.36 + """ 6.37 + #main frame creation 6.38 + blocks=tree_of_blocks.depth_first_traversal() 6.39 + main_file=open(main_file_name,"w") 6.40 + main_content="" 6.41 + main_content+="<Html>\n<Head>\n<Title>Blocks found in alignment "+str(input_alignment_file_name)+"</Title>\n</Head>\n" 6.42 + main_content+="<script>\nvar body=new Array();\n" 6.43 + for i, i_ in enumerate(alignment.sequence_indexes_s): 6.44 + main_content+="body["+str(i)+"] = new Array(" 6.45 + buffer="" 6.46 + for j, j_ in enumerate(alignment.body[i_]): 6.47 + if alignment.get_aa(i_,j)!=None: 6.48 + buffer+="\""+str(alignment.get_aa(i_,j)).upper()+"\"," 6.49 + else: 6.50 + buffer+="\"-\"," 6.51 + if len(buffer)!=0: buffer=buffer[:-1] 6.52 + main_content+=buffer 6.53 + main_content+=");\n" 6.54 + main_content+="var sequences_names = new Array(" 6.55 + buffer="" 6.56 + for i in alignment.sequences: 6.57 + buffer+="\""+str(i.name)+"\"," 6.58 + if len(buffer)!=0: buffer=buffer[:-1] 6.59 + main_content+=buffer 6.60 + main_content+=");\n" 6.61 + 6.62 + main_content+="var groups_names = new Array(" 6.63 + buffer="" 6.64 + for i, i_ in configure.groups_of_aa_1.items(): 6.65 + buffer+="\""+str(i)+"\"," 6.66 + if (len(buffer)!=0): buffer=buffer[:-1] 6.67 + main_content+=buffer 6.68 + main_content+=");\n" 6.69 + 6.70 + main_content+="var groups_content = new Array();\n" 6.71 + counter=0 6.72 + for i, i_ in configure.groups_of_aa_1.items(): 6.73 + main_content+="groups_content["+str(counter)+"] = new Array(" 6.74 + buffer="" 6.75 + for j in range(len(i_[0])): 6.76 + buffer+="\""+str(i_[0][j:j+1])+"\"," 6.77 + if (len(buffer)!=0): buffer=buffer[:-1] 6.78 + main_content+=buffer 6.79 + main_content+=");\n" 6.80 + counter+=1 6.81 + 6.82 + main_content+="</script>\n" 6.83 + main_content+=" <Frameset cols=\"200,*\">\n <Frame src=\""+str(input_alignment_file_name)+".directry.html\" name=\"directry\">\n <Frame src=\""+str(input_alignment_file_name)+".blocks.html\" name=\"blocks\">\n </Frameset>\n</Html>" 6.84 + main_file.write(main_content) 6.85 + main_file.close() 6.86 + 6.87 + #directry frame creation 6.88 + directry_content="" 6.89 + directry_content+="<Html>\n<Head>\n<Title>Tree of blocks</Title>\n</Head>\n<Body bgcolor=\"#FFFFFF\">\n<script>\n" 6.90 + #function, that checks, whether current aminoacid in conserved position is in conserved group or not 6.91 + directry_content+="function check_conservation(block_groups_in_conserved_positions, groups_names, groups_content, block_conserved_position, body_element){\n" 6.92 + directry_content+=" var i;\n var j;\n" 6.93 + directry_content+=" for (i=0;i<groups_names.length;i++){\n" 6.94 + directry_content+=" if (groups_names[i]==block_groups_in_conserved_positions[block_conserved_position]){\n" 6.95 + directry_content+=" var flag=0;\n" 6.96 + directry_content+=" for (j=0;j<groups_content[i].length;j++){\n" 6.97 + directry_content+=" if (body_element==groups_content[i][j]){\n" 6.98 + directry_content+=" flag=1;break;\n" 6.99 + directry_content+=" }\n" 6.100 + directry_content+=" }\n" 6.101 + directry_content+=" if (flag==1) {return true;}\n" 6.102 + directry_content+=" else {return false;}\n" 6.103 + directry_content+=" }\n" 6.104 + directry_content+=" }\n" 6.105 + directry_content+="}\n" 6.106 + #block-drawing function 6.107 + directry_content+="function draw_a_block(block_sequences, block_positions, block_parental_block_remaining_sequences, block_parental_block_positions, block_state, block_conserved_positions, block_groups_in_conserved_positions, block_name){\n" 6.108 + directry_content+=" parent.blocks.document.getElementsByTagName('P')[2].innerHTML=\"block_state = \"+block_state;\n" 6.109 + directry_content+=" parent.blocks.document.getElementsByTagName('P')[1].innerHTML=block_name;\n" 6.110 + directry_content+=" var i;\n var j;\n var k;\n var l;\n" 6.111 + directry_content+=" for (i=1;i<parent.body.length+1;i++){\n" 6.112 + directry_content+=" for (j=1;j<parent.body[0].length+1;j++){\n" 6.113 + directry_content+=" parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#FFFFFF';\n" 6.114 + directry_content+=" }\n" 6.115 + directry_content+=" }\n" 6.116 + directry_content+=" for (i=1;i<block_sequences.length+1;i++){\n" 6.117 + directry_content+=" parent.blocks.document.getElementById(\"alignment\").rows[i].cells[0].innerText=parent.sequences_names[block_sequences[i-1]];\n" 6.118 + directry_content+=" for (j=1;j<parent.body[0].length+1;j++){\n" 6.119 + directry_content+=" parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].innerText=parent.body[block_sequences[i-1]][j-1];\n" 6.120 + directry_content+=" var flag=0;\n" 6.121 + directry_content+=" for (k=0;k<block_positions.length;k++){\n" 6.122 + directry_content+=" if (block_positions[k]==j-1) {flag=1;break;}\n" 6.123 + directry_content+=" }\n" 6.124 + directry_content+=" if (flag==1){\n" 6.125 + directry_content+=" if (block_state==\"aligned\"){\n" 6.126 + directry_content+=" var innerflag=0;\n" 6.127 + directry_content+=" for (k=0;k<block_conserved_positions.length;k++){\n" 6.128 + directry_content+=" if (block_conserved_positions[k]==j-1){innerflag=1;break;}\n" 6.129 + directry_content+=" parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#AAAAFF';\n" 6.130 + directry_content+=" }\n" 6.131 + directry_content+=" if (innerflag==1){\n" 6.132 + directry_content+=" if (check_conservation(block_groups_in_conserved_positions, parent.groups_names, parent.groups_content, k, parent.body[block_sequences[i-1]][j-1])==true){parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#7777FF';}\n" 6.133 + directry_content+=" else{\n" 6.134 + directry_content+=" parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#AAAAFF';\n" 6.135 + directry_content+=" }\n" 6.136 + directry_content+=" }\n" 6.137 + directry_content+=" else {parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#AAAAFF';}\n" 6.138 + directry_content+=" }\n" 6.139 + directry_content+=" else{parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#AAAAFF';}\n" 6.140 + directry_content+=" }\n" 6.141 + directry_content+=" else{\n" 6.142 + directry_content+=" for (l=0;l<block_parental_block_positions.length;l++){\n" 6.143 + directry_content+=" if (block_parental_block_positions[l]==j-1){parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#AAFFAA';break;}\n" 6.144 + directry_content+=" }\n" 6.145 + directry_content+=" }\n" 6.146 + directry_content+=" }\n" 6.147 + directry_content+=" }\n" 6.148 + directry_content+=" var counter=block_sequences.length+1;\n" 6.149 + directry_content+=" for (i=1;i<block_parental_block_remaining_sequences.length+1;i++){\n" 6.150 + directry_content+=" parent.blocks.document.getElementById(\"alignment\").rows[counter].cells[0].innerText=parent.sequences_names[block_parental_block_remaining_sequences[i-1]];\n" 6.151 + directry_content+=" for (j=1;j<parent.body[0].length+1;j++){\n" 6.152 + directry_content+=" parent.blocks.document.getElementById(\"alignment\").rows[counter].cells[j].innerText=parent.body[block_parental_block_remaining_sequences[i-1]][j-1];\n" 6.153 + directry_content+=" var flag=0;\n" 6.154 + directry_content+=" for (k=0;k<block_parental_block_positions.length;k++){\n" 6.155 + directry_content+=" if (block_parental_block_positions[k]==j-1) {flag=1;break;}\n" 6.156 + directry_content+=" }\n" 6.157 + directry_content+=" if (flag==1){\n" 6.158 + directry_content+=" parent.blocks.document.getElementById(\"alignment\").rows[counter].cells[j].bgColor='#AAFFAA';\n" 6.159 + directry_content+=" }\n" 6.160 + directry_content+=" }\n" 6.161 + directry_content+=" counter++;\n" 6.162 + directry_content+=" }\n" 6.163 + directry_content+=" for (i=1;i<parent.body.length+1;i++){\n" 6.164 + directry_content+=" var flag=0;\n" 6.165 + directry_content+=" for (j=1;j<block_sequences.length+1;j++){\n" 6.166 + directry_content+=" if (i-1==block_sequences[j-1]){flag=1;break;}\n" 6.167 + directry_content+=" }\n" 6.168 + directry_content+=" for (j=0;j<block_parental_block_remaining_sequences.length;j++){\n" 6.169 + directry_content+=" if (i-1==block_parental_block_remaining_sequences[j]){flag=1;break;}\n" 6.170 + directry_content+=" }\n" 6.171 + directry_content+=" if (flag==0) {\n" 6.172 + directry_content+=" parent.blocks.document.getElementById(\"alignment\").rows[counter].cells[0].innerText=parent.sequences_names[i-1];\n" 6.173 + directry_content+=" for (j=1;j<parent.body[0].length+1;j++){\n" 6.174 + directry_content+=" parent.blocks.document.getElementById(\"alignment\").rows[counter].cells[j].innerText=parent.body[i-1][j-1];\n" 6.175 + directry_content+=" }\n" 6.176 + directry_content+=" counter++;\n" 6.177 + directry_content+=" }\n" 6.178 + directry_content+=" }\n" 6.179 + directry_content+="}\n" 6.180 + 6.181 + #traverse the tree_of_blocks depth-first 6.182 + for i,i_ in enumerate(blocks): 6.183 + if (i_[0]==tree_of_blocks.root): continue 6.184 + directry_content+="var block"+str(i)+"_sequences=new Array(" 6.185 + buffer="" 6.186 + for j in i_[0].content[0].sequence_indexes_a: 6.187 + buffer+="\""+str(j)+"\"," 6.188 + if len(buffer)!=0: buffer=buffer[:-1] 6.189 + directry_content+=buffer 6.190 + directry_content+=");\n" 6.191 + 6.192 + directry_content+="var block"+str(i)+"_positions=new Array(" 6.193 + buffer="" 6.194 + for j in i_[0].content[0].position_indexes_a: 6.195 + buffer+="\""+str(j)+"\"," 6.196 + if len(buffer)!=0: buffer=buffer[:-1] 6.197 + directry_content+=buffer 6.198 + directry_content+=");\n" 6.199 + 6.200 + directry_content+="var block"+str(i)+"_conserved_positions=new Array(" 6.201 + buffer="" 6.202 + if i_[0].content[0].state=="aligned": 6.203 + for j in i_[0].content[0].conserved_positions: 6.204 + buffer+="\""+str(j[0])+"\"," 6.205 + if len(buffer)!=0: buffer=buffer[:-1] 6.206 + directry_content+=buffer 6.207 + directry_content+=");\n" 6.208 + 6.209 + directry_content+="var block"+str(i)+"_groups_in_conserved_positions=new Array(" 6.210 + buffer="" 6.211 + if i_[0].content[0].state=="aligned": 6.212 + for j in i_[0].content[0].conserved_positions: 6.213 + buffer+="\""+str(j[1])+"\"," 6.214 + if len(buffer)!=0: buffer=buffer[:-1] 6.215 + directry_content+=buffer 6.216 + directry_content+=");\n" 6.217 + 6.218 + directry_content+="var block"+str(i)+"_state=\""+str(i_[0].content[0].state)+"\";\n" 6.219 + 6.220 + if i_[0].parental_vertex!=tree_of_blocks.root: 6.221 + 6.222 + directry_content+="var block"+str(i)+"_parental_block_remaining_sequences=new Array(" 6.223 + buffer="" 6.224 + for j in i_[0].parental_vertex.content[0].sequence_indexes_a: 6.225 + if (j in i_[0].content[0].sequence_indexes_a) == False : buffer+="\""+str(j)+"\"," 6.226 + if len(buffer)!=0: buffer=buffer[:-1] 6.227 + directry_content+=buffer 6.228 + directry_content+=");\n" 6.229 + 6.230 + directry_content+="var block"+str(i)+"_parental_block_positions=new Array(" 6.231 + buffer="" 6.232 + for j in i_[0].parental_vertex.content[0].position_indexes_a: 6.233 + buffer+="\""+str(j)+"\"," 6.234 + if len(buffer)!=0: buffer=buffer[:-1] 6.235 + directry_content+=buffer 6.236 + directry_content+=");\n" 6.237 + else: 6.238 + directry_content+="var block"+str(i)+"_parental_block_remaining_sequences=new Array();\n" 6.239 + directry_content+="var block"+str(i)+"_parental_block_positions=new Array();\n" 6.240 + 6.241 + directry_content+="</script>\n" 6.242 + for i, i_ in enumerate(blocks): 6.243 + directry_content+="<A href=\"javascript:void(draw_a_block(block"+str(i)+"_sequences, block"+str(i)+"_positions, block"+str(i)+"_parental_block_remaining_sequences, block"+str(i)+"_parental_block_positions, block"+str(i)+"_state, block"+str(i)+"_conserved_positions, block"+str(i)+"_groups_in_conserved_positions, \'block"+str(i)+"\'))\">" 6.244 + for j in range(i_[1]-1): 6.245 + directry_content+="  |" 6.246 + if i_[1]!=1:directry_content+="-" 6.247 + directry_content+="block"+str(i) 6.248 + if i_[0].content[0].state=="aligned": directry_content+=" +" 6.249 + elif i_[0].content[0].state=="no_alignment": directry_content+=" -" 6.250 + directry_content+="</A><Br>\n" 6.251 + directry_content+="</Body>\n</Html>" 6.252 + directry_file=open(directry_file_name,"w") 6.253 + directry_file.write(directry_content) 6.254 + directry_file.close() 6.255 + 6.256 + #blocks frame creation 6.257 + #create blocks_content 6.258 + blocks_content="" 6.259 + blocks_content+="<Html>\n<Head>\n<Title>Blocks found in alignment "+str(input_alignment_file_name)+"</Title>\n</Head>\n<Body bgcolor=\"#FFFFFF\">\n" 6.260 + blocks_content+="<P> Blue color in tables designates current block, green color designates current block's parental block.</P><Br><Br>\n" 6.261 + blocks_content+="<P></P>\n" 6.262 + blocks_content+="<P></P>\n" 6.263 + blocks_content+="<Table border=\"1\" id=\"alignment\">\n" 6.264 + blocks_content+=" <Tr>\n" 6.265 + blocks_content+=" <Th>Sequences names</Th>\n" 6.266 + for i in alignment.position_indexes_a: 6.267 + blocks_content+=" <Th>"+str(int(i)+1)+"</Th>\n" 6.268 + blocks_content+=" </Tr>\n" 6.269 + for i, i_ in enumerate(alignment.body): 6.270 + blocks_content+=" <Tr>\n" 6.271 + blocks_content+=" <Td>"+str(alignment.sequences[i].name)+"</Td>\n" 6.272 + for j,j_ in enumerate(i_): 6.273 + if alignment.get_aa(i,j)!=None: 6.274 + blocks_content+=" <Td>"+str(alignment.get_aa(i,j)).upper()+"</Td>\n" 6.275 + else: 6.276 + blocks_content+=" <Td>-</Td>\n" 6.277 + blocks_content+=" </Tr>\n" 6.278 + blocks_content+="</Body>\n</Html>" 6.279 + blocks_file=open(blocks_file_name, "w") 6.280 + blocks_file.write(blocks_content) 6.281 + blocks_file.close() 6.282 +
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/sbbs_2009/modules/configure.py Sat Jul 09 16:30:28 2011 +0400 7.3 @@ -0,0 +1,85 @@ 7.4 +#!/usr/bin/python 7.5 +""" 7.6 +Author: AAl 7.7 +Status: Format accepted Feb.6, 2009 7.8 + 7.9 +All parameters are contained here. 7.10 + 7.11 +To use parameters one need import configure and use names as follows: 7.12 +configure.minimal_block_size 7.13 + 7.14 + 7.15 +Vocabulary 7.16 + Suffixes 7.17 + _a # Alignment 7.18 + _b # Block 7.19 + _s # Sequence 7.20 + _p # Parent 7.21 + _aa # amino acid residue 7.22 + _num # number of an element; 1, 2, ... Typically num=index+1 7.23 + _ss # secondary structure 7.24 + 7.25 + Words 7.26 + position # position in alignment, syn. column 7.27 + alignment 7.28 + index # 0, 1, ... 7.29 + gap_position # a position containing at least one aa_symbol and at least one gap symbol "-" 7.30 + empty_position # contains gap symbols "-" only 7.31 + body #alignment markup 7.32 + 7.33 + 7.34 +""" 7.35 + 7.36 +missing_characters=set([" ",'\t','\n',"0","1","2","3","4","5","6","7","8","9",'\r']) 7.37 + 7.38 +gap_symbols=set([".","-","~"]) # all are immidiatly changed to "-" 7.39 + 7.40 +aa_characters=set(["a","c","d","e","f","g","h","i","k","l","m","n","p","q","r","s","t","v","w","y","x"]) 7.41 + 7.42 +special_sequence_names = set(["aligned", "ss", "sa", "domains", "contact"]) #to be compared with words with lowered letters 7.43 + 7.44 +alignment_filename = "my_alignment.fasta" 7.45 + 7.46 +minimal_block_width = 3 7.47 + 7.48 +# {group_id, (letters, weight, description)} 7.49 +# Weight is log_2 of group frequency 7.50 + 7.51 +groups_of_aa_1={ 7.52 + "b":("IVLMFA",None,-1.671163536,None), 7.53 + "t":("AGSTC", None,-1.625934282,None), 7.54 + "u":("HKR", None,-2.805912948,None), 7.55 + "l":("IVLM", None,-2.321928095,None), 7.56 + "c":("IV", None,-3.23786383 ,None), 7.57 + "a":("FWY", None,-3.53951953 ,None), 7.58 + "d":("DE", None,-3.095419565,None), 7.59 + "e":("KR", None,-3.13289427 ,None), 7.60 + "g":("DN", None,-3.279283757,None), 7.61 + "f":("EQ", None,-3.395928676,None), 7.62 + "i":("ST", None,-2.805912948,None), 7.63 + "h":("AG", None,-2.756330919,None), 7.64 + "A":("A", None,-3.756330919,None), 7.65 + "G":("G", None,-3.756330919,None), 7.66 + "S":("S", None,-3.625934282,None), 7.67 + "T":("T", None,-4.011587974,None), 7.68 + "C":("C", None,-4.921390165,None), 7.69 + "P":("P", None,-4.321928095,None), 7.70 + "N":("N", None,-4.506352666,None), 7.71 + "H":("H", None,-5.10780329 ,None), 7.72 + "K":("K", None,-3.795859283,None), 7.73 + "R":("R", None,-4.573466862,None), 7.74 + "D":("D", None,-4.083141235,None), 7.75 + "E":("E", None,-4.10780329 ,None), 7.76 + "Q":("Q", None,-4.756330919,None), 7.77 + "I":("I", None,-4.717856771,None), 7.78 + "L":("L", None,-3.717856771,None), 7.79 + "V":("V", None,-3.878321443,None), 7.80 + "F":("F", None,-4.64385619 ,None), 7.81 + "W":("W", None,-6.265344567,None), 7.82 + "Y":("Y", None,-4.921390165,None), 7.83 + "M":("M", None,-5.795859283,None) 7.84 + } 7.85 + 7.86 +conservation_threshold=0.95 7.87 + 7.88 +probability_threshold=1.0/800
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/sbbs_2009/modules/main.py Sat Jul 09 16:30:28 2011 +0400 8.3 @@ -0,0 +1,124 @@ 8.4 +#!usr/bin/python 8.5 + 8.6 +# Test changes 8.7 +pass 8.8 + 8.9 +import configure 8.10 +import alignment 8.11 +import block 8.12 +import blocks 8.13 +import rooted_tree 8.14 +from optparse import OptionParser 8.15 +import blocks_io 8.16 + 8.17 +import time 8.18 +import inspect 8.19 + 8.20 +def _find_next_current_vertex(current_vertex): 8.21 + """ 8.22 + This is an internal routine 8.23 + """ 8.24 + flag=0 8.25 + while flag!=1: 8.26 + if current_vertex.content[0].state=="in process": 8.27 + second_flag=0 8.28 + for i in current_vertex.child_vertices: 8.29 + if i.content[0].state=="pending": 8.30 + current_vertex=i 8.31 + return current_vertex 8.32 + elif i.content[0].state=="in process": 8.33 + current_vertex=i 8.34 + second_flag=1 8.35 + if second_flag==0: 8.36 + current_vertex.content[0].state="processed" 8.37 + current_vertex=current_vertex.parental_vertex 8.38 + elif current_vertex.content[0].state=="processed": 8.39 + current_vertex=current_vertex.parental_vertex 8.40 + elif current_vertex.content[0].state=="no_alignment": 8.41 + current_vertex=current_vertex.parental_vertex 8.42 + elif current_vertex.content[0].state=="initial": 8.43 + return current_vertex 8.44 + elif current_vertex.content[0].state=="pending": 8.45 + return current_vertex 8.46 + 8.47 +#class plug(object): 8.48 +# def __init__(self): 8.49 +# self.state="initial" 8.50 + 8.51 + 8.52 + 8.53 + 8.54 +""" 8.55 +Borya Nagaev - BNagaev@gmail.com 8.56 + 8.57 +""" 8.58 +""" 8.59 +The main routine 8.60 + 8.61 +Author: Burkov 8.62 +State: in progress 8.63 + 8.64 +This is the main routine 8.65 +To run the program, type: 8.66 + 8.67 +python main.py test_3.msf > log.txt 8.68 +""" 8.69 + 8.70 +#parsing the arguments 8.71 +parser=OptionParser() 8.72 +options, args = parser.parse_args() 8.73 +alignment = alignment.Alignment([]) 8.74 + 8.75 +print args[0] 8.76 + 8.77 +alignment.get_from_file(args[0]) 8.78 +#initialization of blocks and initial_block 8.79 +blocks=blocks.Blocks(alignment, None, None, None) 8.80 +initial_block=block.Block(alignment, alignment.sequence_indexes_s, alignment.position_indexes_a) 8.81 +initial_block.state="pending" 8.82 +#initialization of rooted_tree of blocks 8.83 +tree_of_blocks = rooted_tree.Rooted_tree() 8.84 +tree_of_blocks.root.child_vertices.append(rooted_tree.Vertex([initial_block],tree_of_blocks.root,[],None)) 8.85 +tree_of_blocks.root.content[0]=block.plug() 8.86 +#there's no need in stack of blocks: to end the process we just need all the children of the initial_block to have status "processed", "aligned" or "no alignment" 8.87 +#traverse of the rooted_tree 8.88 + 8.89 +print ">"+str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" time ="+str(time.time()) 8.90 +#print "time at"+inspect.currentframe().f_back.f_lineno+" = "+str(time.time()) 8.91 +#all this info is available via "sys" module "_something" internal function or "traceback" module 8.92 +current_vertex=tree_of_blocks.root.child_vertices[0] 8.93 +while current_vertex!=tree_of_blocks.root: 8.94 + print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" current_vertex.content[0].__dict__ = "+str(current_vertex.content[0].__dict__) 8.95 + print ">"+str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" time ="+str(time.time()) 8.96 + aligned_block=current_vertex.content[0].get_aligned_block() 8.97 + if aligned_block != None: 8.98 + aligned_block=current_vertex.content[0].upsize(aligned_block) 8.99 + print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" aligned_block.__dict__ = "+str(aligned_block.__dict__) 8.100 + if aligned_block.block_of_gaps!=None: print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" aligned_block.block_of_gaps.__dict__ = "+str(aligned_block.block_of_gaps.__dict__) 8.101 + current_vertex.content[0].state="in process" 8.102 + blocks.aligned_blocks.append(aligned_block) 8.103 + current_vertex.child_vertices.append(rooted_tree.Vertex([aligned_block],current_vertex,[],None)) 8.104 + if aligned_block.block_of_gaps!=None: current_vertex.child_vertices.append(rooted_tree.Vertex([aligned_block.block_of_gaps],current_vertex,[],None)) 8.105 + print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+"before find_complementary_blocks" 8.106 + print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" time at mark 2 = "+str(time.time()) 8.107 + complementary_blocks=current_vertex.content[0].find_complementary_blocks(aligned_block.position_indexes_a, aligned_block.sequence_indexes_a) 8.108 + for i in complementary_blocks: 8.109 + current_vertex.child_vertices.append(rooted_tree.Vertex([i],current_vertex,[],None)) 8.110 + print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" i.__dict__ = "+str(i.__dict__) 8.111 + current_vertex=_find_next_current_vertex(current_vertex) 8.112 + else: 8.113 + current_vertex=_find_next_current_vertex(current_vertex) 8.114 + 8.115 +for i in blocks.aligned_blocks: 8.116 + print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" found_aligned_blocks.__dict__ = "+str(i.__dict__)+"\n\n" 8.117 + 8.118 +blocks_for_print=tree_of_blocks.depth_first_traversal() 8.119 +for i in blocks_for_print: 8.120 + print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" nesting_depth = "+str(i[1])+" current_block's dict = "+str(i[0].content[0].__dict__) 8.121 + 8.122 +import pickle 8.123 +storage=open(str(args[0])+'.pcl','wb') 8.124 +pickle.dump(tree_of_blocks, storage) 8.125 +storage.close() 8.126 + 8.127 +blocks_io.tree_of_blocks_to_html(args[0], tree_of_blocks, alignment, args[0]+".main.html", args[0]+".directry.html", args[0]+".blocks.html")
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/sbbs_2009/modules/phylip.py Sat Jul 09 16:30:28 2011 +0400 9.3 @@ -0,0 +1,275 @@ 9.4 +#!/usr/bin/python 9.5 +""" 9.6 +Contains classes and functions for information exchange with the "PHYLIP" package 9.7 +Classes: 9.8 + Phy_matrix: distance matrix 9.9 + WrongFileFormat: technical (exception for errors while reading file) 9.10 + 9.11 +Functions: 9.12 + readmatrix(filename): read file with distance matrix in PHYLIP 9.13 + format; return object of Phy_matrix class 9.14 + 9.15 +""" 9.16 +class Phy_matrix(object): 9.17 + """ 9.18 + Distance matrix 9.19 + 9.20 + Author: S.A.S. 9.21 + Status: in progress 9.22 + 9.23 + Data: 9.24 + self.number # number of objects (int) 9.25 + self.names # names of objects (tuple of strings) 9.26 + self.matrix # distances between objects (dictionary with keys = unordered pairs of names 9.27 + # and float values) 9.28 + 9.29 + Methods 9.30 + __init__(self, names=()) # initializing with zero distances 9.31 + __str__(self) # output in PHYLIP format 9.32 + __mul__(self,m) # multiply all distances by a coefficicent m 9.33 + __div__(self,d) # divide all distances by a divisor d 9.34 + max(self) # return the maximal value of distance 9.35 + hasneg(self) # if there is a negative distance (True or False)? 9.36 + index(self, name) # index of object (1 to self.number) 9.37 + name(self, index) # name of the object with the given index (1 to self.number) 9.38 + distance(self, name1, name2) # return the element of the matrix (the distance between two objects) 9.39 + distance_n(self, index1, index2) # the same, objects are given by their indices 9.40 + setdist(self, name1, name2, d) # return new Phy_matrix that differs from *self* by the distance between 9.41 + # two given objects that is set to d 9.42 + setdist_n(self, index1, index2, d) # the same, objects are given by their indices 9.43 + addobject(self, name) # return new Phy_matrix that differs from *self* by the added object having 9.44 + # the given name, the last possible index and zero distances to all others 9.45 + permutate(self, name1, name2) # return new Phy_matrix with two permutated objects 9.46 + permutate_n(self, index1, index2) # the same, objects are given by their indices 9.47 + 9.48 + Planned method 9.49 + wpgma(self) # rooted tree from distances (wpgma method) 9.50 + """ 9.51 + 9.52 + def __init__(self, names=()): 9.53 + self.number=len(names) 9.54 + self.names=names 9.55 + self.matrix=dict() 9.56 + for _i in range(1, self.number): 9.57 + for _j in range(0, _i): 9.58 + if names[_i] == names[_j]: 9.59 + raise "Duplicate name "+names[_i] 9.60 + else: 9.61 + self.matrix[frozenset((names[_i], names[_j]))] = 0.0 9.62 + 9.63 + def __str__(self): 9.64 + if (self.number) > 99999: 9.65 + _result="Too large matrix to print!!!" 9.66 + else: 9.67 + _result = "%5i" % self.number 9.68 + for _i in range(self.number): 9.69 + if len(self.names[_i]) < 11: 9.70 + _row = "\n" + (self.names[_i]).ljust(10) 9.71 + else: 9.72 + _row = "\n" + "Obj_" + str(_i).ljust(6) 9.73 + _counter = 1 9.74 + for _j in range(self.number): 9.75 + if _counter == 7: 9.76 + _row = _row + "\n" 9.77 + _counter = 0 9.78 + if _i == _j: 9.79 + _row = _row + " 0.000000" 9.80 + else: 9.81 + _row = _row + "%10.6f" % self.matrix[frozenset((self.names[_i], self.names[_j]))] 9.82 + _counter += 1 9.83 + _result = _result + _row 9.84 + return _result 9.85 + 9.86 + def __mul__(self, m): 9.87 + _result = Phy_matrix(self.names) 9.88 + for _pair in self.matrix.iterkeys(): 9.89 + _result.matrix[_pair] = self.matrix[_pair]*m 9.90 + return _result 9.91 + 9.92 + def __div__(self, d): 9.93 + _result = Phy_matrix(self.names) 9.94 + for _pair in self.matrix.iterkeys(): 9.95 + _result.matrix[_pair] = self.matrix[_pair]/d 9.96 + return _result 9.97 + 9.98 + def max(self): 9.99 + return max(self.matrix.values()) 9.100 + 9.101 + def hasneg(self): 9.102 + _result = False 9.103 + for dist in self.matrix.values(): 9.104 + if dist < 0: _result = True 9.105 + return _result 9.106 + 9.107 + def index(self, name): 9.108 + _result = 0 9.109 + for _i in range(self.number): 9.110 + if self.names[_i] == name: 9.111 + _result = _i+1 9.112 + return _result 9.113 + 9.114 + def name(self, index): 9.115 + _result = "" 9.116 + if index > self.number: 9.117 + print "Content-type=text/plain\n" 9.118 + print "Index %i is out of range!\n" % index 9.119 + elif index > 0: 9.120 + _result = self.names[index-1] 9.121 + return _result 9.122 + 9.123 + def distance(self, name1, name2): 9.124 + _result = 0.0 9.125 + _n1 = self.index(name1) 9.126 + if _n1 == 0: 9.127 + print "Content-type=text/plain\n" 9.128 + print "Name \""+name1+"\" does not exist!\n" 9.129 + _n2 = self.index(name2) 9.130 + if _n2 == 0: 9.131 + print "Content-type=text/plain\n" 9.132 + print "Name \""+name2+"\" does not exist!\n" 9.133 + if (_n1 > 0) and (_n2 > 0) and (_n1 != _n2): 9.134 + _result = self.matrix[frozenset((name1, name2))] 9.135 + return _result 9.136 + 9.137 + def distance_n(self, i, j): 9.138 + _result = 0.0 9.139 + if (i > 0) and (j > 0) and (i != j): 9.140 + _result = self.matrix[frozenset((self.name(i), self.name(j)))] 9.141 + return _result 9.142 + 9.143 + def setdist(self, name1, name2, d): 9.144 + if name1 == name2: 9.145 + raise "Impossible to set distance between two identical objects!\n" 9.146 + else: 9.147 + _result = Phy_matrix(self.names) 9.148 + _pair0 = frozenset((name1, name2)) 9.149 + for _pair in self.matrix.iterkeys(): 9.150 + if _pair0 == _pair: 9.151 + _result.matrix[_pair] = d 9.152 + else: 9.153 + _result.matrix[_pair] = self.matrix[_pair] 9.154 + return _result 9.155 + 9.156 + def setdist_n(self, i, j, d): 9.157 + if i == j: 9.158 + raise "Impossible to set distance between two identical objects!\n" 9.159 + else: 9.160 + _result = Phy_matrix(self.names) 9.161 + _pair0 = frozenset((self.name(i), self.name(j))) 9.162 + for _pair in self.matrix.iterkeys(): 9.163 + if _pair0 == _pair: 9.164 + _result.matrix[_pair] = d 9.165 + else: 9.166 + _result.matrix[_pair] = self.matrix[_pair] 9.167 + return _result 9.168 + 9.169 + def addobject(self, name): 9.170 + if self.matrix.has_key(name): 9.171 + _result = self.copy() 9.172 + else: 9.173 + _newnames = self.names + (name,) 9.174 + _result = Phy_matrix(_newnames) 9.175 + _result.matrix = self.matrix.copy() 9.176 + for _name in self.names: 9.177 + _result.matrix[frozenset((_name,name))] = 0.0 9.178 + return _result 9.179 + 9.180 + def permutate(self, name1, name2): 9.181 + if name1==name2: 9.182 + result = self.copy() 9.183 + else: 9.184 + _i = self.index(name1) 9.185 + _j = self.index(name2) 9.186 + if (_i==0) or (_j==0): 9.187 + result = self.copy() 9.188 + else: 9.189 + _min = min(_i-1,_j-1) 9.190 + _max = max(_i-1,_j-1) 9.191 + _newnames = self.names[:_min] + (self.names[_max],) + self.names[_min+1:_max] + (self.names[_min],) + self.names[_max+1:] 9.192 + _result = Phy_matrix(_newnames) 9.193 + _result.matrix = self.matrix.copy() 9.194 + return _result 9.195 + 9.196 + def permutate_n(self, i, j): 9.197 + if (i==j) or (i==0) or (j==0): 9.198 + result = self.copy() 9.199 + else: 9.200 + _min = min(i-1,j-1) 9.201 + _max = max(i-1,j-1) 9.202 + _newnames = self.names[:_min] + (self.names[_max],) + self.names[_min+1:_max] + (self.names[_min],) + self.names[_max+1:] 9.203 + _result = Phy_matrix(_newnames) 9.204 + _result.matrix = self.matrix.copy() 9.205 + return _result 9.206 + 9.207 + 9.208 +# class phy_matrix 9.209 + 9.210 +class WrongFileFormat(Exception): 9.211 + pass 9.212 + 9.213 +def readmatrix(filename): 9.214 + ''' 9.215 + Reading distance matrix from a file in PHYLIP format defined by its name 9.216 + Author: S.A.S. 9.217 + 9.218 + ''' 9.219 + #import sys # for debug purposes! 9.220 + infile = open(filename, "r") 9.221 + line = infile.readline() 9.222 + try: 9.223 + number = int(line) 9.224 + except ValueError: 9.225 + raise WrongFileFormat, "First line of \""+filename+"\" does not contain the number of objects" 9.226 + if number == 0: 9.227 + raise WrongFileFormat, "Zero number of objects in "+filename 9.228 + names = () 9.229 + counter = 0 9.230 + matrix = dict() 9.231 + tempmatrix = () 9.232 + row = () 9.233 + for line in infile.readlines(): 9.234 + if counter == 0: 9.235 + names = names + (line[0:10].strip(),) 9.236 + if (number < 6): 9.237 + numelements = number 9.238 + else: 9.239 + numelements = 6 9.240 + if len(line) < 10*(numelements+1) : 9.241 + raise WrongFileFormat, "Too short row in file " + filename 9.242 + 9.243 + for i in range(numelements): 9.244 + try: 9.245 + row = row + (float(line[10*(i+1):10*(i+2)]),) 9.246 + except ValueError: 9.247 + raise WrongFileFormat, "Non-numeric data in "+filename 9.248 + counter += numelements 9.249 + else: 9.250 + if (number - counter < 7): 9.251 + numelements = number - counter 9.252 + else: 9.253 + numelements = 7 9.254 + if len(line) < 10*numelements : 9.255 + raise WrongFileFormat, "Too short row in file" + filename 9.256 + 9.257 + for i in range(numelements): 9.258 + try: 9.259 + row = row + (float(line[10*i:10*(i+1)]),) 9.260 + except ValueError: 9.261 + raise WrongFileFormat, "Non-numeric data in "+filename 9.262 + counter += numelements 9.263 + if counter == number: 9.264 + counter = 0 9.265 + tempmatrix = tempmatrix + (row,) 9.266 + row = () 9.267 + # for all lines of the file 9.268 + infile.close() 9.269 + if (counter != 0) or (len(names) != number): 9.270 + raise WrongFileFormat, "Wrong number of objects in "+filename 9.271 + 9.272 + result = Phy_matrix(names) 9.273 + for i in range(1, result.number): 9.274 + for j in range(0, i): 9.275 + result.matrix[frozenset((names[i], names[j]))] = tempmatrix[i][j] 9.276 + return result 9.277 +# readmatrix 9.278 +
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/sbbs_2009/modules/rooted_tree.py Sat Jul 09 16:30:28 2011 +0400 10.3 @@ -0,0 +1,507 @@ 10.4 +""" 10.5 +The module comtains classes 10.6 + Rooted_tree 10.7 + Vertex 10.8 + 10.9 +Dictionary: 10.10 + vertex - any vertex of the tree, including nodes and leaves 10.11 + node - internal vertex 10.12 + leaf - terminal vertex 10.13 + edge 10.14 + root 10.15 + parenthetic structure - str without "\n" or "\r", which ends with ";", 10.16 + contains parentheses, corresponding to nodes e.g. "(a,b,c);" corresponds 10.17 + to a tree, with leaves a, b and c arise from the root; optionnaly may 10.18 + contain lengths (minus and dot symbols allowed) on edges: "(a:1.5,b:2,c:-3.5)" 10.19 + subtree, corresponding to a node (or lying under a node) 10.20 + root subtree, corresponding to the node - union of edges+nodes, leading 10.21 + from the root to that node and subtree under that node 10.22 + 10.23 + 10.24 +""" 10.25 + 10.26 + 10.27 +class Rooted_tree(object): 10.28 + 10.29 + """ 10.30 + Authors: Burkov 10.31 + Status: in progress (most methods seem to work) 10.32 + 10.33 + Data: 10.34 + self.root # contains Vertex object which is the root 10.35 + self.leaves # contains list of tree leaves as Vertex objects 10.36 + 10.37 + Methods 10.38 + +self.parenthetic_structure_to_tree # 10.39 + +self.next_node(self, candidate_nodes) # Return a list [next_node,[new_candidate_nodes]] 10.40 + +self.node_to_set_of_leaves(self, node) # Return a set of leaves under the node 10.41 + A leaf is a Vertex object 10.42 + +self.complement(self, set_of_leaves) # Return complement set of leaves 10.43 + +self.chop_the_tree(self, set_of_leaves) # Returns a susbtree of the original tree as a 10.44 + Rooted_tree object, that conatins only the 10.45 + leaves in set_of_leaves 10.46 + +self.set_of_leaves_to_sum_of_lengthes_of_root_subtree(self, set_of_leaves) # Returns sum_of_lengthes_of_root_subtree, 10.47 + which contains only lists in set_of_leaves 10.48 + NEW (AAl)!!! 10.49 + self.add_node(self,content, parental_vertex,parental_edge) 10.50 + # Add edges and a node; 10.51 + local tree rearrangement 10.52 + Return index of new node 10.53 + The method is needed for a tree of blocks 10.54 + # Parental_edge contains one of strings: 10.55 + # "left" means block is left complement in parent 10.56 + # "right" means block is right complement in parent 10.57 + # "bottom" means block is bottom complement in parent 10.58 + 10.59 + """ 10.60 + def __init__(self): 10.61 + self.root=Vertex(["root"],None,[],None) 10.62 + self.leaves=[] 10.63 + 10.64 + def add_node(self, vertex): 10.65 + 10.66 + vertex.parental_vertex.child_vertices.append(vertex) 10.67 + if vertex.parental_vertex in self.leaves: 10.68 + pass 10.69 +# ... add vertex to leaves 10.70 + 10.71 + def parenthetic_structure_to_tree(self,parenthetic_structure,testflag=0): 10.72 + r""" 10.73 + This function gets parenthetic structure of the tree (ending with ";" ; lenghts of tree edges may be 10.74 + stated or not; tree may be binary or not) 10.75 + by definition root's parent = None; 10.76 + The tree is traversed width-first upon construction; currently processed nodes are stored in queue; 10.77 + with their parental vertices and indexes of the respective wax and wane (indeed, wax and wane in the 10.78 + parenthetic structure correspond to a node). 10.79 + 10.80 + //Note that sequence names are nums made from alignment.sequence_indexes 10.81 + //Therefore, vertices reffer to that indexes 10.82 + 10.83 + >>> import rooted_tree 10.84 + >>> tree=rooted_tree.Rooted_tree() 10.85 + >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);') 10.86 + >>> print "tree.root.child_vertices[0].content = "+str(tree.root.child_vertices[0].content) 10.87 + tree.root.child_vertices[0].content = ['a'] 10.88 + >>> print "tree.root.child_vertices[0].parental_edge = "+str(tree.root.child_vertices[0].parental_edge) 10.89 + tree.root.child_vertices[0].parental_edge = 10 10.90 + >>> print "tree.root.child_vertices[1].content = "+str(tree.root.child_vertices[1].content) 10.91 + tree.root.child_vertices[1].content = [None] 10.92 + >>> print "tree.root.child_vertices[1].parental_edge = "+str(tree.root.child_vertices[1].parental_edge) 10.93 + tree.root.child_vertices[1].parental_edge = 2.0 10.94 + >>> print "tree.root.child_vertices[1].child_vertices[0].content = "+str(tree.root.child_vertices[1].child_vertices[0].content) 10.95 + tree.root.child_vertices[1].child_vertices[0].content = ['b'] 10.96 + >>> print "tree.root.child_vertices[1].child_vertices[0].parental_edge = "+str(tree.root.child_vertices[1].child_vertices[0].parental_edge) 10.97 + tree.root.child_vertices[1].child_vertices[0].parental_edge = 8.0 10.98 + >>> print "tree.root.child_vertices[1].child_vertices[1].content = "+str(tree.root.child_vertices[1].child_vertices[1].content) 10.99 + tree.root.child_vertices[1].child_vertices[1].content = [None] 10.100 + >>> print "tree.root.child_vertices[1].child_vertices[1].parental_edge = "+str(tree.root.child_vertices[1].child_vertices[1].parental_edge) 10.101 + tree.root.child_vertices[1].child_vertices[1].parental_edge = 5.0 10.102 + >>> print "tree.root.child_vertices[1].child_vertices[1].child_vertices[0].content = "+str(tree.root.child_vertices[1].child_vertices[1].child_vertices[0].content) 10.103 + tree.root.child_vertices[1].child_vertices[1].child_vertices[0].content = ['c'] 10.104 + >>> print "tree.root.child_vertices[1].child_vertices[1].child_vertices[0].parental_edge = "+str(tree.root.child_vertices[1].child_vertices[1].child_vertices[0].parental_edge) 10.105 + tree.root.child_vertices[1].child_vertices[1].child_vertices[0].parental_edge = 3.0 10.106 + >>> print "tree.root.child_vertices[1].child_vertices[1].child_vertices[1].content = "+str(tree.root.child_vertices[1].child_vertices[1].child_vertices[1].content) 10.107 + tree.root.child_vertices[1].child_vertices[1].child_vertices[1].content = ['d'] 10.108 + >>> print "tree.root.child_vertices[1].child_vertices[1].child_vertices[1].parental_edge = "+str(tree.root.child_vertices[1].child_vertices[1].child_vertices[1].parental_edge) 10.109 + tree.root.child_vertices[1].child_vertices[1].child_vertices[1].parental_edge = 3.0 10.110 + 10.111 + 10.112 + """ 10.113 + 10.114 + import re 10.115 + if testflag!=0: import sys 10.116 + queue=[(self.root,None,0,len(parenthetic_structure)-2)] #len(parenthetic_structure)-2 == index of last ')' 10.117 + while queue!=[]: 10.118 + #take the first element in the queue 10.119 + current_queue_element=queue[0] 10.120 + wax=0 10.121 + wane=0 10.122 + left_margin=current_queue_element[2]+1 10.123 + for i in xrange(current_queue_element[2],current_queue_element[3]+1,1): 10.124 + if parenthetic_structure[i]=="(": 10.125 + wax=wax+1 10.126 + elif parenthetic_structure[i]=="," or parenthetic_structure[i]==")":#child vertex found 10.127 + 10.128 + if wax==wane+1: 10.129 + #search for the branch length; if not length-oriented, length=None 10.130 + length_search_object=re.search('-?\d*\.?\d+$',parenthetic_structure[left_margin:i]) 10.131 + if length_search_object:#if length-oriented 10.132 + length=length_search_object.group() 10.133 + #add a new vertex to current_queue_element's children list 10.134 + current_queue_element[0].child_vertices.append(Vertex([],current_queue_element[0],[],length)) 10.135 + if testflag!=0: sys.stderr.write("left_margin = "+str(left_margin)+"| i = "+str(i)+"| length = " +str(length)+"\n") 10.136 + if testflag!=0: sys.stderr.write(str(queue)+"\n") 10.137 + if testflag!=0: sys.stderr.write(str(current_queue_element[0].content)+"\n") 10.138 + #check: node or leaf (are there parentheses inside) 10.139 + if None==re.search('[\(\)]',parenthetic_structure[left_margin:i]): 10.140 + if testflag!=0: sys.stderr.write("New leaf found!\n") 10.141 + #if leaf, find and fill in its name 10.142 + if length_search_object:#if length-oriented 10.143 + name=re.search('(.*):-?\d*\.?\d+$',parenthetic_structure[left_margin:i]).group(1) 10.144 + else:#if non-weight-oriented 10.145 + name=parenthetic_structure[left_margin:i] 10.146 + if testflag!=0: sys.stderr.write("name = "+str(name)+"\n") 10.147 + current_queue_element[0].child_vertices[len(current_queue_element[0].child_vertices)-1].content.append(name) 10.148 + self.leaves.append(current_queue_element[0].child_vertices[len(current_queue_element[0].child_vertices)-1]) 10.149 + else: 10.150 + if testflag!=0: sys.stderr.write("New node found:\n"+re.search('[\(\)]',parenthetic_structure[left_margin:i]).group()) 10.151 + #if node, append None insted of name and add it to queue 10.152 + current_queue_element[0].child_vertices[len(current_queue_element[0].child_vertices)-1].content.append(None) 10.153 + queue.append((current_queue_element[0].child_vertices[len(current_queue_element[0].child_vertices)-1],current_queue_element[0],left_margin,i)) 10.154 + left_margin=i+1 10.155 + if parenthetic_structure[i]==")": 10.156 + wane=wane+1 10.157 + queue.pop(0) 10.158 + 10.159 + 10.160 + def calculate_root_subtree_sum_of_lengthes(self, testflag=0): 10.161 + r""" 10.162 + The Block module will traverse the tree from the root downwards. But the calculation of any subtree's 10.163 + sum of lengthes traverses the tree from leaves upwards. So it makes sense to implement a method, that 10.164 + first calcualtes distnace from the root to each node, traversing the tree downwards, gets stuck on leaves 10.165 + and goes upwards, calculating subtree sum of lengthes for each node and adding it to the distance to that 10.166 + node from the root. 10.167 + Result for each node is stored in content[1]. 10.168 + For leaves distances to the root are stored in content[1] 10.169 + Subtree distances are stored in content[2] of each node 10.170 + 10.171 + >>> import rooted_tree 10.172 + >>> tree=rooted_tree.Rooted_tree() 10.173 + >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);') 10.174 + >>> tree.calculate_root_subtree_sum_of_lengthes() 10.175 + """ 10.176 + #creating a queue, contating the root 10.177 + if testflag!=0: import sys 10.178 + downward_queue=[] 10.179 + upward_queue=[] 10.180 + self.root.content.append(0) 10.181 + self.root.content.append(0) 10.182 + for i in self.root.child_vertices: 10.183 + downward_queue.append(i) 10.184 + #width-first traversing of the tree downwards 10.185 + if testflag!=0: sys.stderr.write("downward_queue = "+str(downward_queue)+"\n") 10.186 + while downward_queue!=[]: 10.187 + if testflag!=0: sys.stderr.write("downward_queue = "+str(downward_queue)+"\n") 10.188 + downward_queue[0].content.append(float(downward_queue[0].parental_edge) + downward_queue[0].parental_vertex.content[1]) 10.189 + if testflag!=0: sys.stderr.write("downward_queue[0].content = "+str(downward_queue[0].content)+"\n") 10.190 + downward_queue[0].content.append(0)#initialization of content[2] 10.191 + downward_queue[0].traversed=0#initialization of flag for upward traverse 10.192 + for i in downward_queue[0].child_vertices: 10.193 + downward_queue.append(i) 10.194 + if downward_queue[0].child_vertices==[]: 10.195 + upward_queue.append(downward_queue[0])#upward_queue intialized with leaves 10.196 + downward_queue.pop(0) 10.197 + #traversing the tree upwards 10.198 + while upward_queue!=[]: 10.199 + if testflag!=0: sys.stderr.write("upward_queue = "+str(upward_queue)+"\n") 10.200 + if upward_queue[0].parental_vertex!= None: 10.201 + upward_queue[0].parental_vertex.content[2]=upward_queue[0].parental_vertex.content[2]+upward_queue[0].content[2]+float(upward_queue[0].parental_edge) 10.202 + upward_queue[0].parental_vertex.content[1]=upward_queue[0].parental_vertex.content[1]+upward_queue[0].content[2]+float(upward_queue[0].parental_edge) 10.203 + upward_queue[0].traversed=1 10.204 + traverse_flag=1 10.205 + for i in upward_queue[0].parental_vertex.child_vertices: 10.206 + if i.traversed==0: traverse_flag=0 10.207 + if traverse_flag==1: 10.208 + upward_queue.append(upward_queue[0].parental_vertex) 10.209 + if testflag!=0: 10.210 + sys.stderr.write("upward_queue[0].content = "+str(upward_queue[0].content) +"\n") 10.211 + sys.stderr.write("upward_queue[0].parental_vertex.content = "+str(upward_queue[0].parental_vertex.content)+"\n") 10.212 + upward_queue.pop(0) 10.213 + #garbage collection 10.214 + for i in self.root.child_vertices: 10.215 + downward_queue.append(i) 10.216 + while downward_queue!=[]: 10.217 + del downward_queue[0].traversed 10.218 + for i in downward_queue[0].child_vertices: 10.219 + downward_queue.append(i) 10.220 + downward_queue.pop(0) 10.221 + 10.222 + 10.223 + def next_node(self,candidate_nodes): 10.224 + r""" 10.225 + Finds next node with the greatest root subrtee sum of lengthes among candidate_nodes, 10.226 + removes it from candidate_nodes and adds its child nodes (NOT vertices) to 10.227 + new_candidate_nodes 10.228 + 10.229 + Input: candidate_nodes 10.230 + Output: tuple (max, new_candidate_nodes) 10.231 + max - node with the gretest root subtree sum of lengthes among candidate_nodes 10.232 + new_candidate_nodes - array "candidate_nodes" without the "max" node, but with "max" 10.233 + children nodes instead (NOT nodes and leaves) 10.234 + 10.235 + >>> import rooted_tree 10.236 + >>> tree = rooted_tree.Rooted_tree() 10.237 + >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);') 10.238 + >>> tree.calculate_root_subtree_sum_of_lengthes() 10.239 + >>> candidate_nodes=[tree.root] 10.240 + >>> max, new_candidate_nodes = tree.next_node(candidate_nodes) 10.241 + """ 10.242 + 10.243 + #find a node among candidates with the greatest root subtree sum of lengthes 10.244 + new_candidate_nodes=candidate_nodes[:] 10.245 + if new_candidate_nodes!=[]: 10.246 + max_index=0 10.247 + for i_,i in enumerate(new_candidate_nodes): 10.248 + if i.content[1]>new_candidate_nodes[max_index].content[1]: 10.249 + max_index=i_ 10.250 + #update new_candidate_nodes 10.251 + max=new_candidate_nodes.pop(max_index) 10.252 + for i in max.child_vertices: 10.253 + if i.child_vertices!=[]: new_candidate_nodes.append(i) 10.254 + return (max,new_candidate_nodes) 10.255 + 10.256 + def next_vertex(self, candidate_vertices): 10.257 + r""" 10.258 + Finds next vertex with the greatest root subtree sum of lengthes among candidate_vertices, 10.259 + removes it from candidate_vertices and adds its child vertices to new_candidate_nodes 10.260 + 10.261 + Input: candidate_nodes 10.262 + Output: tuple (max, new_candidate_vertices) 10.263 + max - vertex with the gretest root subtree sum of lengthes among candidate_nodes 10.264 + new_candidate_vertices - array "candidate_vertices" without the "max" vertex, 10.265 + but with "max" children leaves instead 10.266 + 10.267 + >>> import rooted_tree 10.268 + >>> tree = rooted_tree.Rooted_tree() 10.269 + >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);') 10.270 + >>> tree.calculate_root_subtree_sum_of_lengthes() 10.271 + >>> candidate_vertices=[tree.root] 10.272 + >>> max, new_candidate_vertices=tree.next_vertex(candidate_vertices) 10.273 + 10.274 + """ 10.275 + #find a vertex among candidates with the greatest root subtree sum of lengthes 10.276 + new_candidate_vertices=candidate_vertices[:] 10.277 + if new_candidate_vertices!=[]: 10.278 + max_index=0 10.279 + for i_,i in enumerate(new_candidate_vertices): 10.280 + if i.content[1]>new_candidate_vertices[max_index].content[1]: 10.281 + max_index=i_ 10.282 + #update candidate_vertices 10.283 + max=new_candidate_vertices.pop(max_index) 10.284 + for i in max.child_vertices: 10.285 + new_candidate_vertices.append(i) 10.286 + return (max, new_candidate_vertices) 10.287 + 10.288 + def node_to_set_of_leaves(self, node): 10.289 + r""" 10.290 + Input: Vertex object 10.291 + Output: set_of_leaves list 10.292 + 10.293 + >>> import rooted_tree 10.294 + >>> tree=rooted_tree.Rooted_tree() 10.295 + >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);') 10.296 + >>> set_of_leaves=tree.node_to_set_of_leaves(tree.root.child_vertices[1]) 10.297 + >>> for i in set_of_leaves: 10.298 + ... print i.content 10.299 + ['b'] 10.300 + ['c'] 10.301 + ['d'] 10.302 + """ 10.303 + 10.304 + set_of_leaves=[] 10.305 + queue=[node] 10.306 + #traversing the tree downwards form the node 10.307 + while queue!=[]: 10.308 + current_vertex = queue.pop(0) 10.309 + for i in current_vertex.child_vertices: 10.310 + if i.child_vertices==[]: 10.311 + set_of_leaves.append(i) 10.312 + else: 10.313 + queue.append(i) 10.314 + return set_of_leaves 10.315 + 10.316 + 10.317 + def complement_set_of_leaves(self, set_of_leaves): 10.318 + r""" 10.319 + Input: set_of_leaves list of leaves as Vertex objects 10.320 + Output: complementary_leaves list of leaves as Vertex objects 10.321 + 10.322 + >>> import rooted_tree 10.323 + >>> tree=rooted_tree.Rooted_tree() 10.324 + >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);') 10.325 + >>> for i in tree.leaves: 10.326 + ... print i.content 10.327 + ['a'] 10.328 + ['b'] 10.329 + ['c'] 10.330 + ['d'] 10.331 + >>> set_of_leaves=[tree.leaves[0],tree.leaves[2]] 10.332 + >>> for i in tree.complement_set_of_leaves(set_of_leaves): 10.333 + ... print i.content 10.334 + ['b'] 10.335 + ['d'] 10.336 + """ 10.337 + complementary_leaves=[] 10.338 + #subtract the set_of_leaves from all_leaves to acieve complemenatry_leaves 10.339 + for i in self.leaves: 10.340 + if set_of_leaves.count(i)==0: 10.341 + complementary_leaves.append(i) 10.342 + return complementary_leaves 10.343 + 10.344 + 10.345 + def chop_the_tree(self,set_of_leaves): 10.346 + r""" 10.347 + !!!Returns a new tree object (makes copy somewhere in the beginning) 10.348 + Input: set_of_leaves as Vertex objects 10.349 + 10.350 + >>> import rooted_tree 10.351 + >>> tree=rooted_tree.Rooted_tree() 10.352 + >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);') 10.353 + >>> set_of_leaves=[tree.leaves[2],tree.leaves[3]] 10.354 + >>> tree1=tree.chop_the_tree(set_of_leaves) 10.355 + >>> print tree1.root.child_vertices[0].content 10.356 + ['c', 3.0, 0] 10.357 + >>> print tree1.root.child_vertices[1].content 10.358 + ['d', 3.0, 0] 10.359 + """ 10.360 + import sys 10.361 + import copy 10.362 + #temprorary variable for rset of leaves 10.363 + self.set_of_leaves=set_of_leaves 10.364 + tree=copy.deepcopy(self) 10.365 + del self.set_of_leaves 10.366 + #removing all outsider subtrees 10.367 + queue=tree.complement_set_of_leaves(tree.set_of_leaves) 10.368 + tree.leaves=tree.set_of_leaves 10.369 + del tree.set_of_leaves 10.370 + while queue!=[]: 10.371 + current=queue.pop(0) 10.372 + if current.child_vertices==[] and current!=tree.root: 10.373 + #sys.stderr.write("current = "+str(current)+"\n") 10.374 + #sys.stderr.write("current.parental_vertex.child_vertices = "+str(current.parental_vertex.child_vertices)+"\n") 10.375 + #sys.stderr.write("index of current = "+str(current.parental_vertex.child_vertices.index(current))+"\n") 10.376 + current.parental_vertex.child_vertices.pop(current.parental_vertex.child_vertices.index(current)) 10.377 + if current.parental_vertex.child_vertices==[] or (len(current.parental_vertex.child_vertices)==1 and current.parental_vertex==tree.root): 10.378 + queue.append(current.parental_vertex) 10.379 + elif len(current.child_vertices)==1 and current==tree.root: 10.380 + tree.root=current.child_vertices[0] 10.381 + current.child_vertices[0].content[0]="root" 10.382 + current.child_vertices[0].parental_vertex=current.parental_vertex 10.383 + queue.append(current.child_vertices[0]) 10.384 + 10.385 + queue=[tree.root] 10.386 + while queue!=[]: 10.387 + current=queue.pop(0) 10.388 + if len(current.child_vertices)==1: 10.389 + current.child_vertices[0].parental_vertex=current.parental_vertex 10.390 + #sys.stderr.write("current = "+str(current)+", current.content = "+str(current.content)+"\n") 10.391 + #sys.stderr.write("current.parental_vertex.child_vertices = "+str(current.parental_vertex.child_vertices)+"\n") 10.392 + #sys.stderr.write("index of current = "+str(current.parental_vertex.child_vertices.index(current))+"\n") 10.393 + current.parental_vertex.child_vertices.pop(current.parental_vertex.child_vertices.index(current)) 10.394 + current.parental_vertex.child_vertices.append(current.child_vertices[0]) 10.395 + current.child_vertices[0].parental_edge=float(current.child_vertices[0].parental_edge)+float(current.parental_edge) 10.396 + for i in current.child_vertices: 10.397 + queue.append(i) 10.398 + #REDO calculate_root_subtree_sum_of_lengthes 10.399 + if len(current.content)>1: 10.400 + for i in range(1,len(current.content)): 10.401 + del current.content[1] 10.402 + tree.calculate_root_subtree_sum_of_lengthes() 10.403 + return tree 10.404 + 10.405 + def set_of_leaves_to_sum_of_lengthes_of_root_subtree(self, set_of_leaves): 10.406 + r""" 10.407 + Input: set_of_leaves, which represent sequences with same groups in them in current position 10.408 + Output: sum_of_lengthes_of_root_subtree 10.409 + 10.410 + >>> import rooted_tree 10.411 + >>> tree=rooted_tree.Rooted_tree() 10.412 + >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);') 10.413 + >>> set_of_leaves=[tree.leaves[2],tree.leaves[3]] 10.414 + >>> print tree.set_of_leaves_to_sum_of_lengthes_of_root_subtree(set_of_leaves) 10.415 + 13.0 10.416 + """ 10.417 + 10.418 + import copy 10.419 + self.set_of_leaves=set_of_leaves 10.420 + tree=copy.deepcopy(self) 10.421 + del self.set_of_leaves 10.422 + queue=tree.complement_set_of_leaves(tree.set_of_leaves) 10.423 + while queue!=[]: 10.424 + current=queue.pop(0) 10.425 + if current.child_vertices==[] and current!=tree.root: 10.426 + current.parental_vertex.child_vertices.pop(current.parental_vertex.child_vertices.index(current)) 10.427 + if current.parental_vertex.child_vertices==[]: 10.428 + queue.append(current.parental_vertex) 10.429 + #sum of lengthes count 10.430 + sum=0 10.431 + queue.append(tree.root) 10.432 + while queue!=[]: 10.433 + current=queue.pop(0) 10.434 + for i in current.child_vertices: 10.435 + queue.append(i) 10.436 + sum=sum+float(i.parental_edge) 10.437 + return sum 10.438 + 10.439 + def depth_first_traversal(self): 10.440 + r""" 10.441 + Returns list of tuples of vertices and their nesting depthes. Root has nesting depth 0. 10.442 + Creates child_vertices_copy for each vertex of the tree 10.443 + 10.444 + """ 10.445 + #width-first traversal in order to create child_vertices_copy arrays 10.446 + for i in self.width_first_traversal(): 10.447 + i.child_vertices_copy=i.child_vertices[:] 10.448 + #depth-first traversal itself 10.449 + output=[] 10.450 + stack=[self.root] 10.451 + while stack!=[]: 10.452 + if stack[-1].child_vertices_copy!=[]: 10.453 + stack.append(stack[-1].child_vertices_copy.pop()) 10.454 + #calculate the nesting depth of stack[-1] 10.455 + counter=0 10.456 + current_vertex=stack[-1] 10.457 + while current_vertex!=self.root: 10.458 + counter+=1 10.459 + current_vertex=current_vertex.parental_vertex 10.460 + output.append((stack[-1],counter)) 10.461 + else: 10.462 + stack.pop() 10.463 + return output 10.464 + 10.465 + def width_first_traversal(self): 10.466 + """ 10.467 + Returns list of all vertices in width-first order 10.468 + """ 10.469 + output=[] 10.470 + queue=[self.root] 10.471 + while queue!=[]: 10.472 + current_vertex=queue.pop(0) 10.473 + output.append(current_vertex) 10.474 + for i in current_vertex.child_vertices: 10.475 + queue.append(i) 10.476 + return output 10.477 + 10.478 +class Vertex(object): 10.479 + r""" 10.480 + Represents a single vertex of the tree 10.481 + Data: 10.482 + content - the Vertex's content list (actually, anything we want to store in each particular case) 10.483 + 10.484 + parental_vertex - The parental Vertex object 10.485 + child_vertices - list of Vertex's children (as Vertex objects) 10.486 + 10.487 + parental_edge - any information concerning parental edge, we want to store 10.488 + """ 10.489 + 10.490 + def __init__(self, content=None, parental_vertex=None, child_vertices=None, parental_edge=None): 10.491 + self.content=content 10.492 + self.parental_vertex=parental_vertex 10.493 + self.child_vertices=child_vertices 10.494 + self.parental_edge=parental_edge 10.495 + if self.content==None: 10.496 + self.content=[] 10.497 + if self.child_vertices==None: 10.498 + self.child_vertices=[] 10.499 + 10.500 + 10.501 +if __name__=="__main__": 10.502 + print "Test"; 10.503 + import doctest; 10.504 + doctest.testmod(extraglobs={'this_is_a_test':True}) 10.505 + 10.506 + 10.507 + 10.508 + 10.509 + 10.510 +
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/sbbs_2009/modules/sequence.py Sat Jul 09 16:30:28 2011 +0400 11.3 @@ -0,0 +1,124 @@ 11.4 +#!/usr/bin/python 11.5 +""" 11.6 +Contain classes: 11.7 + Sequence 11.8 + PDB_to_sequence 11.9 + and other concerning 3D 11.10 +""" 11.11 +import configure 11.12 + 11.13 +class Sequence(object): 11.14 + """ 11.15 + Author: AAl 11.16 + 11.17 + Sequence is a string of aa characters and additinal data 11.18 + DATA: 11.19 + self.sequences # list of sequence objects 11.20 + self.string # string of aa characters; no gap symbols allowed 11.21 + self.name # default name for data export 11.22 + self.description # string 11.23 + 11.24 + Optional 11.25 + self.synonims # dictionary source:name; examples: gi:12345678 AC:P23456 11.26 + self.pdbs # list of pdb_to_sequence objects 11.27 + consensus_ss # 11.28 + important_aa # 11.29 + """ 11.30 +#####!!!!!! def __init__(self, sequences, name=None, description=None, string =None): ##### CHANGED TO NEXT LINE! 11.31 + def __init__(self, sequences, name=None, description=None, string=None): 11.32 + self.sequences=sequences 11.33 + self.name=name 11.34 + self.description=description 11.35 + self.string=string 11.36 + 11.37 + def add_gapped_string(self, sequence_name, sequence_description, sequence_string): 11.38 + r""" 11.39 + Degap sequence string 11.40 + Create sequence object, 11.41 + add it to sequences 11.42 + Return sequence index (in sequences) and string of aa_indexes 11.43 + >>> sequence_name="seq" 11.44 + >>> sequence_description="test sequence" 11.45 + >>> sequence_string="-a--cd--" 11.46 + >>> sequences=[] 11.47 + >>> aaa=Sequence(sequences) 11.48 + >>> print aaa.add_gapped_string(sequence_name,sequence_description,sequence_string) 11.49 + [0, [None, 0, None, None, 1, 2, None, None]] 11.50 + >>> print aaa.sequences[0].string 11.51 + acd 11.52 + 11.53 + """ 11.54 + self.name=sequence_name 11.55 + if self.name in configure.special_sequence_names: 11.56 + return None 11.57 + else: 11.58 + self_description=sequence_description 11.59 + self.string="" 11.60 + string_indexes=[] 11.61 + aa_index=0 11.62 + for x in sequence_string: 11.63 + if x=="-": 11.64 + string_indexes.append(None) 11.65 + else: 11.66 + self.string=self.string + x 11.67 + string_indexes.append(aa_index) 11.68 + aa_index=aa_index+1 11.69 + sequence_index=len(self.sequences) 11.70 + self.sequences.append(self) 11.71 + return [sequence_index, string_indexes] 11.72 + 11.73 + 11.74 + 11.75 +class Pdb_to_sequence(object): 11.76 + """" 11.77 + pdb_to_sequence data: 11.78 + pdb_code 11.79 + sequence_3D 11.80 + ss # ordered list of ss_elements 11.81 + ss_element=(from, to, type) 11.82 + from, to are indexes in sequence 11.83 + type is either "helix" or "strand" 11.84 + helixes 11.85 + strands # ordered list of strands 11.86 + strand=(from,to) 11.87 + sheets # list of beta_sheet objects 11.88 + """ 11.89 + def __init__(self, sequence): 11.90 + self.sequence=sequence 11.91 + 11.92 +class Beta_sheet(object): 11.93 + """ 11.94 + beta_sheet data: 11.95 + self.sequence # 11.96 + self.meridians # soretd list of sheet_meridian objects 11.97 + self.parallels # sorted list of sheet_parallel objects 11.98 + self.marginals # list of marginal aa residues in a sheet 11.99 + """ 11.100 + def __init__(self,sequence): 11.101 + self.sequence=sequence 11.102 +class Sheet_meridian(object): 11.103 + """ 11.104 + sheet_meridian data: 11.105 + strands # list of strands; typically, consists of one strand 11.106 + 11.107 + """ 11.108 +class Sheet_parallel(object): 11.109 + """ 11.110 + sheet_parallel data: 11.111 + self.num # number of the parellel in beta_sheet ? 11.112 + self.cross_rows # list of cross_rows; typically, consists of one cross_row 11.113 + cross_row=[aas, side] 11.114 + aas is a list of aa indexes insequence 11.115 + side is either "in" or "out" 11.116 + 11.117 + """ 11.118 + 11.119 + 11.120 +if __name__ == "__main__": 11.121 + print "Hello!" 11.122 + import doctest 11.123 + doctest.testmod() 11.124 + 11.125 + 11.126 + 11.127 +