Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/ccee8a6023f4
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 01:10:50 2012
Кодировка:
allpy: ccee8a6023f4

allpy

changeset 737:ccee8a6023f4

sbbs_2009 created. test_homology.py contains function that writes classes of homology to file.
author Boris Burkov <BurkovBA@gmail.com>
date Sat, 09 Jul 2011 16:30:28 +0400
parents eb2aa74497d5
children f66135a68ae5
files sbbs_2009/blocks_finder.py sbbs_2009/modules/Lgamma.py sbbs_2009/modules/alignment.py sbbs_2009/modules/block.py sbbs_2009/modules/blocks.py sbbs_2009/modules/blocks_io.py sbbs_2009/modules/configure.py sbbs_2009/modules/main.py sbbs_2009/modules/phylip.py sbbs_2009/modules/rooted_tree.py sbbs_2009/modules/sequence.py sbbs_2009/test_homology.py
diffstat 12 files changed, 3193 insertions(+), 0 deletions(-) [+]
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/sbbs_2009/blocks_finder.py	Sat Jul 09 16:30:28 2011 +0400
     1.3 @@ -0,0 +1,170 @@
     1.4 +#! /usr/bin/python
     1.5 +
     1.6 +import sys
     1.7 +import traceback
     1.8 +
     1.9 +sys.path.append("./modules")
    1.10 +import block
    1.11 +import blocks
    1.12 +import alignment
    1.13 +import rooted_tree
    1.14 +
    1.15 +
    1.16 +#def main(current_time):
    1.17 +#    import cgi
    1.18 +#    import os
    1.19 +#    form = cgi.FieldStorage()
    1.20 +#    if form.has_key("Alignment") and form["Alignment"].value!="":
    1.21 +#        temp_file=open("../jobs/"+str(current_time)+".job",'w')
    1.22 +#        temp_file.write(str(form["Alignment"].value))
    1.23 +#        temp_file.close()
    1.24 +#        _generate_pending_page(str(current_time))
    1.25 +#        if not os.fork():
    1.26 +#            _main("../jobs/"+str(current_time)+".job")
    1.27 +#            logfile=open("../jobs/"+str(current_time)+".log",'w')
    1.28 +#            logfile.write("Done\n")
    1.29 +#            logfile.close()
    1.30 +#    else:
    1.31 +#        if form.has_key("uploaded_file") and form["uploaded_file"].value!="":
    1.32 +#            temp_file=open("../jobs/"+str(current_time)+".job",'w')
    1.33 +#            temp_file.write(form["uploaded_file"].value)
    1.34 +#            temp_file.close()
    1.35 +#            _generate_pending_page(str(current_time))
    1.36 +#            if not os.fork():
    1.37 +#                _main("../jobs/"+str(current_time)+".job")
    1.38 +#                logfile=open("../jobs/"+str(current_time)+".log",'w')
    1.39 +#                logfile.write("Done\n")
    1.40 +#                logfile.close()
    1.41 +#        else:
    1.42 +#            print "Content-Type: text/plain\n"
    1.43 +#            print "I can't see any input. :) Please, submit your alignment."
    1.44 +
    1.45 +
    1.46 +def main(file_name):
    1.47 +    alignment1 = alignment.Alignment([])
    1.48 +    alignment1.get_from_fasta(open(file_name))
    1.49 +    #initialization of blocks and initial_block
    1.50 +    blocks1=blocks.Blocks(alignment, None, None, None)
    1.51 +    initial_block=block.Block(alignment1, alignment1.sequence_indexes_s, alignment1.position_indexes_a)
    1.52 +    initial_block.state="pending"
    1.53 +    #initialization of rooted_tree of blocks
    1.54 +    tree_of_blocks = rooted_tree.Rooted_tree()
    1.55 +    tree_of_blocks.root.child_vertices.append(rooted_tree.Vertex([initial_block],tree_of_blocks.root,[],None))
    1.56 +    tree_of_blocks.root.content[0]=block.plug()
    1.57 +    #there's no need in stack of blocks: to end the process we just need all the children of the initial_block to have status "processed", "aligned" or "no alignment"
    1.58 +    #traverse of the rooted_tree
    1.59 +    #all this info is available via "sys" module "_something" internal function or "traceback" module
    1.60 +    current_vertex=tree_of_blocks.root.child_vertices[0]
    1.61 +
    1.62 +    while current_vertex!=tree_of_blocks.root:
    1.63 +        aligned_block=current_vertex.content[0].get_aligned_block()
    1.64 +
    1.65 +        if aligned_block != None:
    1.66 +            #This is upsize, I've found yet another glitch in it. So, I've turned it off as a whole for now:
    1.67 +            #aligned_block=current_vertex.content[0].upsize(aligned_block)
    1.68 +            current_vertex.content[0].state="in process"
    1.69 +            blocks1.aligned_blocks.append(aligned_block)
    1.70 +            current_vertex.child_vertices.append(rooted_tree.Vertex([aligned_block],current_vertex,[],None))
    1.71 +            if aligned_block.block_of_gaps!=None: current_vertex.child_vertices.append(rooted_tree.Vertex([aligned_block.block_of_gaps],current_vertex,[],None))
    1.72 +            complementary_blocks=current_vertex.content[0].find_complementary_blocks(aligned_block.position_indexes_a, aligned_block.sequence_indexes_a)
    1.73 +            for i in complementary_blocks:
    1.74 +                current_vertex.child_vertices.append(rooted_tree.Vertex([i],current_vertex,[],None))
    1.75 +            current_vertex=_find_next_current_vertex(current_vertex)
    1.76 +        else:
    1.77 +            current_vertex=_find_next_current_vertex(current_vertex)
    1.78 +
    1.79 +    blocks_for_print=tree_of_blocks.depth_first_traversal()
    1.80 +    return blocks_for_print
    1.81 +
    1.82 +
    1.83 +def blocks2homology_file(blocks, homology_file):
    1.84 +    sys.path.append("../allpy/")
    1.85 +    from homology import MonomerHomology
    1.86 +    class_counter = 0
    1.87 +    for b in blocks:
    1.88 +        if b[0].content[0].state == "aligned":
    1.89 +            for sa in b[0].content[0].sequence_indexes_a:
    1.90 +                sequence_name = b[0].content[0].alignment.sequences[b[0].content[0].alignment.sequence_indexes_s[sa]].name
    1.91 +                for pa in b[0].content[0].position_indexes_a:
    1.92 +                    if b[0].content[0].alignment.get_aa(sa,pa)!=None:
    1.93 +                        monomer_id = (sequence_name, pa+1)
    1.94 +                        MonomerHomology.write_monomer(homology_file, monomer_id, class_counter)
    1.95 +            class_counter+=1
    1.96 +        else:
    1.97 +            if b[0].content[0].state == "no_alignment":
    1.98 +                #print each monomer as a separate class
    1.99 +                for sa in b[0].content[0].sequence_indexes_a:
   1.100 +                    sequence_name = b[0].content[0].alignment.sequences[b[0].content[0].alignment.sequence_indexes_s[sa]].name
   1.101 +                    for pa in b[0].content[0].position_indexes_a:
   1.102 +                        if b[0].content[0].alignment.get_aa(sa, pa) != None:
   1.103 +                            monomer_id = (sequence_name, pa+1)
   1.104 +                            MonomerHomology.write_monomer(homology_file, monomer_id, class_counter)
   1.105 +                            class_counter+=1
   1.106 +
   1.107 +
   1.108 +
   1.109 +def _find_next_current_vertex(current_vertex):
   1.110 +    """
   1.111 +    This is an internal routine
   1.112 +    """
   1.113 +    flag=0
   1.114 +    while flag!=1:
   1.115 +        if current_vertex.content[0].state=="in process":
   1.116 +            second_flag=0
   1.117 +            for i in current_vertex.child_vertices:
   1.118 +                if i.content[0].state=="pending":
   1.119 +                    current_vertex=i
   1.120 +                    return current_vertex
   1.121 +                elif i.content[0].state=="in process":
   1.122 +                    current_vertex=i
   1.123 +                    second_flag=1
   1.124 +            if second_flag==0:
   1.125 +                current_vertex.content[0].state="processed"
   1.126 +                current_vertex=current_vertex.parental_vertex
   1.127 +        elif current_vertex.content[0].state=="processed":
   1.128 +            current_vertex=current_vertex.parental_vertex
   1.129 +        elif current_vertex.content[0].state=="no_alignment":
   1.130 +            current_vertex=current_vertex.parental_vertex
   1.131 +        elif current_vertex.content[0].state=="initial":
   1.132 +            return current_vertex
   1.133 +        elif current_vertex.content[0].state=="pending":
   1.134 +            return current_vertex
   1.135 +
   1.136 +def _generate_next_page(current_time):
   1.137 +
   1.138 +    print "Content-Type: text/html\n"
   1.139 +    print "<html>"
   1.140 +    print "<head>"
   1.141 +    print "<title>Blocks finder - Please, select output format</title>"
   1.142 +    print "</head>"
   1.143 +    print "<body bgcolor=\"#FFFFFF\">"
   1.144 +    print "   <H1> Your job's name is "+str(current_time)+"</H1>"
   1.145 +    print "   <P><A href=\"exceller.py?"+current_time+".job\">Output as a plain file (suitable for converting it into Excel table)</A>"
   1.146 +    print "   <P><A href=\"viewer.py?"+current_time+".job\">Visualisation of output</A>"
   1.147 +    print "</body>"
   1.148 +    print "</html>"
   1.149 +
   1.150 +    sys.stdout.flush()
   1.151 +
   1.152 +
   1.153 +def _generate_pending_page(current_time):
   1.154 +
   1.155 +    print "Content-Type: text/html\n"
   1.156 +    print "<html>"
   1.157 +    print "<head>"
   1.158 +    print "<title>Blocks finder - Job's being processed</title>"
   1.159 +    print "<script language=\"JavaScript1.2\">"
   1.160 +    print "  function reLoad(){"
   1.161 +    print "  window.location=\'http://mouse.belozersky.msu.ru/~burkov/cgi-bin/whistler.py?"+str(current_time)+"\'"
   1.162 +    #print "  setTimeout(\"reLoad()\",10000);"
   1.163 +    print "  }"
   1.164 +    print "</script>"
   1.165 +    print "</head>"
   1.166 +    print "<body bgcolor=\"#FFFFFF\" onload=setTimeout(\'reLoad()\',10000)>"
   1.167 +    print "   <H1> Your job\'s name is "+str(current_time)+"</H1>"
   1.168 +    print "   <div>When it\'s done, you'll be redirected to the results page.</div>"
   1.169 +    print "</body>"
   1.170 +    print "</html>"
   1.171 +
   1.172 +    sys.stdout.flush()
   1.173 +
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/sbbs_2009/modules/Lgamma.py	Sat Jul 09 16:30:28 2011 +0400
     2.3 @@ -0,0 +1,234 @@
     2.4 +from math import *
     2.5 +
     2.6 +two52=  4.50359962737049600000e+15
     2.7 +half=  5.00000000000000000000e-01
     2.8 +one =  1.00000000000000000000e+00
     2.9 +pi  =  3.14159265358979311600e+00
    2.10 +a0  =  7.72156649015328655494e-02
    2.11 +a1  =  3.22467033424113591611e-01
    2.12 +a2  =  6.73523010531292681824e-02
    2.13 +a3  =  2.05808084325167332806e-02
    2.14 +a4  =  7.38555086081402883957e-03
    2.15 +a5  =  2.89051383673415629091e-03
    2.16 +a6  =  1.19270763183362067845e-03
    2.17 +a7  =  5.10069792153511336608e-04
    2.18 +a8  =  2.20862790713908385557e-04
    2.19 +a9  =  1.08011567247583939954e-04
    2.20 +a10 =  2.52144565451257326939e-05
    2.21 +a11 =  4.48640949618915160150e-05
    2.22 +tc  =  1.46163214496836224576e+00
    2.23 +tf  = -1.21486290535849611461e-01
    2.24 +##/* tt = -(tail of tf) */
    2.25 +tt  = -3.63867699703950536541e-18
    2.26 +t0  =  4.83836122723810047042e-01
    2.27 +t1  = -1.47587722994593911752e-01
    2.28 +t2  =  6.46249402391333854778e-02
    2.29 +t3  = -3.27885410759859649565e-02
    2.30 +t4  =  1.79706750811820387126e-02
    2.31 +t5  = -1.03142241298341437450e-02
    2.32 +t6  =  6.10053870246291332635e-03
    2.33 +t7  = -3.68452016781138256760e-03
    2.34 +t8  =  2.25964780900612472250e-03
    2.35 +t9  = -1.40346469989232843813e-03
    2.36 +t10 =  8.81081882437654011382e-04
    2.37 +t11 = -5.38595305356740546715e-04
    2.38 +t12 =  3.15632070903625950361e-04
    2.39 +t13 = -3.12754168375120860518e-04
    2.40 +t14 =  3.35529192635519073543e-04
    2.41 +u0  = -7.72156649015328655494e-02
    2.42 +u1  =  6.32827064025093366517e-01
    2.43 +u2  =  1.45492250137234768737e+00
    2.44 +u3  =  9.77717527963372745603e-01
    2.45 +u4  =  2.28963728064692451092e-01
    2.46 +u5  =  1.33810918536787660377e-02
    2.47 +v1  =  2.45597793713041134822e+00
    2.48 +v2  =  2.12848976379893395361e+00
    2.49 +v3  =  7.69285150456672783825e-01
    2.50 +v4  =  1.04222645593369134254e-01
    2.51 +v5  =  3.21709242282423911810e-03
    2.52 +s0  = -7.72156649015328655494e-02
    2.53 +s1  =  2.14982415960608852501e-01
    2.54 +s2  =  3.25778796408930981787e-01
    2.55 +s3  =  1.46350472652464452805e-01
    2.56 +s4  =  2.66422703033638609560e-02
    2.57 +s5  =  1.84028451407337715652e-03
    2.58 +s6  =  3.19475326584100867617e-05
    2.59 +r1  =  1.39200533467621045958e+00
    2.60 +r2  =  7.21935547567138069525e-01
    2.61 +r3  =  1.71933865632803078993e-01
    2.62 +r4  =  1.86459191715652901344e-02
    2.63 +r5  =  7.77942496381893596434e-04
    2.64 +r6  =  7.32668430744625636189e-06
    2.65 +w0  =  4.18938533204672725052e-01
    2.66 +w1  =  8.33333333333329678849e-02
    2.67 +w2  = -2.77777777728775536470e-03
    2.68 +w3  =  7.93650558643019558500e-04
    2.69 +w4  = -5.95187557450339963135e-04
    2.70 +w5  =  8.36339918996282139126e-04
    2.71 +w6  = -1.63092934096575273989e-03
    2.72 +zero=  0.00000000000000000000e+00
    2.73 +
    2.74 +##inf = float('inf')
    2.75 +##nan = float('nan')
    2.76 +inf = float(9e999)
    2.77 +
    2.78 +def sin_pi(x):
    2.79 +    x = float(x)
    2.80 +    e,ix = frexp(x)
    2.81 +    if(abs(x)<0.25):
    2.82 +        return -sin(pi*x)
    2.83 +    y = -x  ##/* x is assume negative */
    2.84 +
    2.85 +## * argument reduction, make sure inexact flag not raised if input
    2.86 +## * is an integer
    2.87 +    z = floor(y)
    2.88 +    if(z!=y):
    2.89 +        y *= 0.5
    2.90 +        y = 2.0*(y - floor(y))  ##/* y = |x| mod 2.0 */
    2.91 +        n = int(y*4.0)
    2.92 +    else:
    2.93 +        if(abs(ix)>=53):
    2.94 +            y = zero
    2.95 +            n = 0  ##/* y must be even */
    2.96 +        else:
    2.97 +            if(abs(ix)<52):
    2.98 +                z = y+two52 ##/* exact */
    2.99 +            e,n=frexp(z)
   2.100 +            n &= 1
   2.101 +            y  = n
   2.102 +            n<<= 2
   2.103 +
   2.104 +    if n == 0:
   2.105 +        y = sin(pi*y)
   2.106 +    elif (n == 1 or n == 2):
   2.107 +        y = cos(pi*(0.5-y))
   2.108 +    elif (n == 3 or n == 4):
   2.109 +        y = sin(pi*(one-y))
   2.110 +    elif (n == 5 or n == 6):
   2.111 +        y = -cos(pi*(y-1.5))
   2.112 +    else:
   2.113 +        y = sin(pi*(y-2.0))
   2.114 +
   2.115 +    z = cos(pi*(z+1.0));
   2.116 +    return -y*z
   2.117 +
   2.118 +def Lgamma(x):
   2.119 +    '''return natural logarithm of gamma function of x
   2.120 +    raise ValueError if x is negative integer'''
   2.121 +    x = float(x)
   2.122 +
   2.123 +    ##/* purge off +-inf, NaN, +-0, and negative arguments */
   2.124 +    if ((x == inf) or (x == -inf)):
   2.125 +        return inf
   2.126 +##    if (x is nan):
   2.127 +##        return nan
   2.128 +
   2.129 +    e,ix = frexp(x)
   2.130 +    nadj = 0
   2.131 +    signgamp = 1
   2.132 +
   2.133 +    if ((e==0.0) and (ix==0)):
   2.134 +        return inf
   2.135 +
   2.136 +    if (ix>1020):
   2.137 +        return inf
   2.138 +
   2.139 +    if ((e != 0.0) and (ix<-71)):
   2.140 +        if (x<0):
   2.141 +            return -log(-x)
   2.142 +        else:
   2.143 +            return -log(x)
   2.144 +
   2.145 +    if (e<0):
   2.146 +        if (ix>52):
   2.147 +            return inf ##one/zero
   2.148 +        t = sin_pi(x)
   2.149 +        if (t==zero):
   2.150 +            ##return inf
   2.151 +            raise ValueError('gamma not defined for negative integer')
   2.152 +        nadj = log(pi/fabs(t*x))
   2.153 +        if (t<zero):
   2.154 +            signgamp = -1
   2.155 +        x = -x
   2.156 +
   2.157 +    ##/* purge off 1 and 2 */
   2.158 +    if ((x==2.0) or (x==1.0)):
   2.159 +        r = 0.0
   2.160 +
   2.161 +    ##/* for x < 2.0 */
   2.162 +    elif (ix<2):
   2.163 +        if(x<=0.9): ##/* lgamma(x) = lgamma(x+1)-log(x) */
   2.164 +            r = -log(x)
   2.165 +            if (x>=0.7316):
   2.166 +                y = one-x
   2.167 +                z = y*y
   2.168 +                p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))))
   2.169 +                p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))))
   2.170 +                p  = y*p1+p2
   2.171 +                r  += (p-0.5*y)
   2.172 +            elif (x>=0.23164):
   2.173 +                y= x-(tc-one)
   2.174 +                z = y*y
   2.175 +                w = z*y
   2.176 +                p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))) ##	/* parallel comp */
   2.177 +                p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)))
   2.178 +                p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)))
   2.179 +                p  = z*p1-(tt-w*(p2+y*p3))
   2.180 +                r += (tf + p)
   2.181 +            else:
   2.182 +                y = x
   2.183 +                p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))))
   2.184 +                p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))))
   2.185 +                r += (-0.5*y + p1/p2)
   2.186 +        else:
   2.187 +            r = zero
   2.188 +            if(x>=1.7316):
   2.189 +                y=2.0-x ##/* [1.7316,2] */
   2.190 +                z = y*y
   2.191 +                p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))))
   2.192 +                p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))))
   2.193 +                p  = y*p1+p2
   2.194 +                r  += (p-0.5*y)
   2.195 +            elif(x>=1.23164):
   2.196 +                y=x-tc ##/* [1.23,1.73] */
   2.197 +                z = y*y
   2.198 +                w = z*y
   2.199 +                p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))) ##	/* parallel comp */
   2.200 +                p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)))
   2.201 +                p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)))
   2.202 +                p  = z*p1-(tt-w*(p2+y*p3))
   2.203 +                r += (tf + p)
   2.204 +            else:
   2.205 +                y=x-one
   2.206 +                p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))))
   2.207 +                p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))))
   2.208 +                r += (-0.5*y + p1/p2)
   2.209 +    ##/* x < 8.0 */
   2.210 +    elif(ix<4):
   2.211 +        i = int(x)
   2.212 +        t = zero
   2.213 +        y = x-i
   2.214 +        p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))))
   2.215 +        q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))))
   2.216 +        r = half*y+p/q
   2.217 +        z = one ##/* lgamma(1+s) = log(s) + lgamma(s) */
   2.218 +        while (i>2):
   2.219 +            i -=1
   2.220 +            z *= (y+i)
   2.221 +        r += log(z)
   2.222 +
   2.223 +    ##/* 8.0 <= x < 2**58 */
   2.224 +    elif (ix<58):
   2.225 +        t = log(x)
   2.226 +        z = one/x
   2.227 +        y = z*z
   2.228 +        w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))))
   2.229 +        r = (x-half)*(t-one)+w
   2.230 +
   2.231 +    ##/* 2**58 <= x <= inf */
   2.232 +    else:
   2.233 +        r =  x*(log(x)-one)
   2.234 +
   2.235 +    if (e<0):
   2.236 +        r = nadj - r
   2.237 +    return signgamp*r
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/sbbs_2009/modules/alignment.py	Sat Jul 09 16:30:28 2011 +0400
     3.3 @@ -0,0 +1,331 @@
     3.4 +#!/usr/bin/python
     3.5 +"""
     3.6 +Contain classes:
     3.7 +    Alignment
     3.8 +    AlignmentExceptions
     3.9 +
    3.10 +    A bit of phylosophy. Class Alignment actually represents relation between
    3.11 +    Blocks, i.e. a system of blocks in the alignment, and  Sequences, a list of Sequence objects
    3.12 +
    3.13 +"""
    3.14 +import configure
    3.15 +import os
    3.16 +from sequence import Sequence
    3.17 +
    3.18 +class AlignmentExceptions(Exception):
    3.19 +    pass
    3.20 +
    3.21 +
    3.22 +class Alignment(object):
    3.23 +    """
    3.24 +    Alignment consists of a body and additional data
    3.25 +    Body rows are in 1-1 correspondence with subset of Sequence objects in the list Sequences
    3.26 +        No assuptions about the location and the order of that subset in the list are assumed
    3.27 +    Body columns are alignment positions
    3.28 +    Body elements are indexes of Sequence amino acids in Sequence object
    3.29 +
    3.30 +    Data:
    3.31 +        MAIN BODY
    3.32 +            self.sequences           # list of sequences, each member is a sequence object
    3.33 +            self.sequence_indexes_s  # list of indexes in the list of sequences
    3.34 +            self.position_indexes_a  # list of position indexes in the alignment; actually, 0, 1, .., Len(alignment)-1
    3.35 +            self.body                # matrix  of indexes;
    3.36 +                                         an index is the index  of an aa in the sequence   or None instead of gap symbols
    3.37 +                 NEW!!!
    3.38 +                 # aligned_blocks_in_sequence_indexes_a is an element of the list self.aligned_blocks_in_sequences below
    3.39 +                 # list of tripples
    3.40 +                 #  [aligned_block_index,from_position_index_a,to_position_index_a],
    3.41 +                 #  decribing intersection (if exists) of an aligned block with a sequence
    3.42 +                 # Those lists are stored in the list
    3.43 +            self.aligned_blocks_in_sequences
    3.44 +
    3.45 +    Methods:
    3.46 +        +self.get_from_fasta                # fasta file expected
    3.47 +        +self.add_aligned_sequence          # It is assumed that string contains a sequence aligned to other sequences of the alignment
    3.48 +        +self.get_from_file                 # first, convert to fasta by seqret
    3.49 +        +self.get_aa                        # returns aa or None
    3.50 +         self.initiate_blocks
    3.51 +         self.add_aligned_block_to_sequence # Modifing self.aligned_blocks_in_sequences
    3.52 +
    3.53 +    """
    3.54 +
    3.55 +    def __init__(self, sequences, sequence_indexes_s=None, position_indexes_a=None, body=None):
    3.56 +        self.sequences=sequences       # a list of sequence objects
    3.57 +        self.sequence_indexes_s=sequence_indexes_s
    3.58 +        self.position_indexes_a=position_indexes_a
    3.59 +        self.body=body
    3.60 +        if self.sequence_indexes_s==None:
    3.61 +            self.sequence_indexes_s=[]
    3.62 +        if self.position_indexes_a==None:
    3.63 +            self.position_indexes_a=[]
    3.64 +        if self.body==None:
    3.65 +            self.body=[]
    3.66 +
    3.67 +    def get_aa(self,sequence_index_a, position_index_a):
    3.68 +        """
    3.69 +        Author: AAl
    3.70 +        Status: tested successfully in the frame of get_from_fasta
    3.71 +
    3.72 +        sequence_index_a is an index in self.sequence_indexes_s, NOT IN the list seequences! BE CUREFULL!!!
    3.73 +        """
    3.74 +        try:
    3.75 +            body_element=self.body[sequence_index_a][position_index_a]
    3.76 +        except IndexError, errmessage:
    3.77 +            raise AlignmentExceptions("AlignmentError, body_element: " + str(errmessage))
    3.78 +        if body_element == None:
    3.79 +            return None
    3.80 +        else:
    3.81 +            try:
    3.82 +                sequence_index_s=self.sequence_indexes_s[sequence_index_a]
    3.83 +            except IndexError, errmessage:
    3.84 +                raise AlignmentException("AlignmentError, contradiction in the object:\
    3.85 +                                          len(sequence_indexes_s) < len(body). " + + str(errmessage))
    3.86 +            try:
    3.87 +                aa=self.sequences[sequence_index_s].string[body_element]
    3.88 +            except IndexError, errmessage:
    3.89 +                raise AlignmentException("AlignmentError, contradiction in the object:\
    3.90 +                                          sequence_indexe_s from body is out of sequences. " + + str(errmessage))
    3.91 +            return aa
    3.92 +
    3.93 +    def get_from_file(self, file_name):
    3.94 +        """
    3.95 +        Get alignment from a file with given filename
    3.96 +        ttt.msf:
    3.97 +        !!AA_MULTIPLE_ALIGNMENT 1.0
    3.98 +
    3.99 +          ttt.msf MSF:  7 Type: P 06/02/09 CompCheck: 6874 ..
   3.100 +
   3.101 +          Name: seq_1      Len: 7  Check: 2819 Weight: 1.00
   3.102 +          Name: seq_2      Len: 7  Check: 1818 Weight: 1.00
   3.103 +          Name: seq_3      Len: 7  Check: 2237 Weight: 1.00
   3.104 +
   3.105 +        //
   3.106 +
   3.107 +                   1     7
   3.108 +        seq_1      ~~acd~~
   3.109 +        seq_2      aaa.def
   3.110 +        seq_3      aa.cde~
   3.111 +
   3.112 +        >>> sequences=[]
   3.113 +        >>> test_alignment=Alignment(sequences)
   3.114 +        >>> test_alignment.get_from_file("test.msf")
   3.115 +        >>> print len(sequences)
   3.116 +        3
   3.117 +        >>> print test_alignment.sequences[0].string
   3.118 +        acd
   3.119 +        >>> print test_alignment.sequences[1].string
   3.120 +        aaadef
   3.121 +        >>> print test_alignment.sequences[2].string
   3.122 +        aacde
   3.123 +        >>> print test_alignment.body[0]
   3.124 +        [None, None, 0, 1, 2, None, None]
   3.125 +        >>> print test_alignment.body[1]
   3.126 +        [0, 1, 2, None, 3, 4, 5]
   3.127 +        >>> print test_alignment.body[2]
   3.128 +        [0, 1, None, 2, 3, 4, None]
   3.129 +        >>> print test_alignment.position_indexes_a
   3.130 +        [0, 1, 2, 3, 4, 5, 6]
   3.131 +        >>> print test_alignment.sequence_indexes_s
   3.132 +        [0, 1, 2]
   3.133 +        >>> print test_alignment.get_aa(2, 5)
   3.134 +        e
   3.135 +        >>> print test_alignment.get_aa( 0, 1)
   3.136 +        None
   3.137 +        """
   3.138 +        import sys
   3.139 +        os.system("seqret " + file_name + " ../jobs/temp_alignment.fasta")
   3.140 +        try:
   3.141 +            f=open("../jobs/temp_alignment.fasta")
   3.142 +            self.get_from_fasta(f)
   3.143 +            f.close()
   3.144 +        except IOError, errmessage:
   3.145 +            #print errmessage
   3.146 +            #print "file_name = "+str(file_name)
   3.147 +            raise "Can not read alignment!!!"
   3.148 +        #os.system("rm ../jobs/temp_alignment.fasta")
   3.149 +
   3.150 +
   3.151 +    def get_from_fasta(self,file):
   3.152 +        r"""
   3.153 +        Author: AAl
   3.154 +        Status: tested successfully
   3.155 +
   3.156 +        - Reads file with alignment in fasta format
   3.157 +             Sequence names are recognized according to fasta format (between ">" and space character)
   3.158 +                The rest of 1st line is considered as description
   3.159 +             Sequences with special names ("aligned" etc.) are ignored
   3.160 +             Duplications of sequence names are ALLOWED
   3.161 +             Whitespace symbols and numbers are ignored; see missing_characters in configure.py
   3.162 +             Empty lines are ignored
   3.163 +             All gap symbols ".", "~", "-" are replased by "-"
   3.164 +        - Creates self.body
   3.165 +        - Creates sequence objects and include them in sequences list
   3.166 +        - Creates sequence_indexes_s
   3.167 +        - Creates position_indexes
   3.168 +
   3.169 +
   3.170 +        test.fasta:
   3.171 +        >seq_1  first seq
   3.172 +        .-acd-
   3.173 +
   3.174 +        >seq_2 second seq
   3.175 +        aaa-def
   3.176 +        >seq_3
   3.177 +        aa
   3.178 +        -cde.
   3.179 +
   3.180 +        >>> sequences=[]
   3.181 +        >>> name="test"
   3.182 +        >>> test_alignment=Alignment(sequences)
   3.183 +        >>> f=open("test.fasta")
   3.184 +        >>> test_alignment.get_from_fasta(f)
   3.185 +        >>> f.close()
   3.186 +        >>> print len(sequences)
   3.187 +        3
   3.188 +        >>> print test_alignment.sequences[0].string
   3.189 +        acd
   3.190 +        >>> print test_alignment.sequences[1].string
   3.191 +        aaadef
   3.192 +        >>> print test_alignment.sequences[2].string
   3.193 +        aacde
   3.194 +        >>> print test_alignment.body[0]
   3.195 +        [None, None, 0, 1, 2, None, None]
   3.196 +        >>> print test_alignment.body[1]
   3.197 +        [0, 1, 2, None, 3, 4, 5]
   3.198 +        >>> print test_alignment.body[2]
   3.199 +        [0, 1, None, 2, 3, 4, None]
   3.200 +        >>> print test_alignment.position_indexes_a
   3.201 +        [0, 1, 2, 3, 4, 5, 6]
   3.202 +        >>> print test_alignment.sequence_indexes_s
   3.203 +        [0, 1, 2]
   3.204 +        >>> print test_alignment.get_aa(2, 5)
   3.205 +        e
   3.206 +        >>> print test_alignment.get_aa( 0, 1)
   3.207 +        None
   3.208 +        """
   3.209 +#Read file
   3.210 +        file_lines=file.readlines()
   3.211 +#Initialise
   3.212 +        sequence_line_num=0     # number of file line describing a sequence;
   3.213 +        sequence_string=""      # sequence from file written in one string;
   3.214 +                                # gap sybbols changed to "-" ; length does not controlled
   3.215 +        sequence_name=""        # ">name description" is expected
   3.216 +        sequence_description=""
   3.217 +        self.sequence_indexes=[]
   3.218 +        self.position_indexes=[]
   3.219 +        self.body =[]
   3.220 +        num_of_ignored_strings=0
   3.221 +
   3.222 +        for line in file_lines:
   3.223 +
   3.224 +#Ignore empty line
   3.225 +            if len(line.strip())==0:
   3.226 +                continue
   3.227 +
   3.228 +#1st sequence line
   3.229 +            if line[0]==">":
   3.230 +                if sequence_line_num > 0 :
   3.231 +#ignore special names
   3.232 +                    if sequence_name.lower() in configure.special_sequence_names:
   3.233 +                        num_of_ignored_strings=num_of_ignored_strings+1
   3.234 +#Close previouse sequence
   3.235 +                    else:
   3.236 +                        new_sequence=self.add_aligned_sequence(sequence_name,sequence_description,sequence_string)
   3.237 +#Open new sequence
   3.238 +                sequence_line_num=1
   3.239 +                sequence_name = line.split()[0][1:].strip()
   3.240 +                if len(line.split()) > 1:
   3.241 +                    sequence_description=line.split()[1].strip()
   3.242 +                else:
   3.243 +                    sequence_description=""
   3.244 +                sequence_string = ""
   3.245 +            else:
   3.246 +#Extend sequence
   3.247 +                sequence_line_num=sequence_line_num + 1
   3.248 +                for y in line.strip():
   3.249 +                    if not y in configure.missing_characters:
   3.250 +                        if not y in configure.gap_symbols:
   3.251 +                            sequence_string = sequence_string + y.lower()
   3.252 +                        else:
   3.253 +                            sequence_string = sequence_string + "-"
   3.254 +#Close last sequence
   3.255 +        if sequence_line_num>1:
   3.256 +            new_sequence=self.add_aligned_sequence(sequence_name,sequence_description,sequence_string)
   3.257 +
   3.258 +#Compute alignment width and height
   3.259 +        self.length=len(self.sequence_indexes_s)
   3.260 +        self.width=0
   3.261 +        for i in range(len(self.sequence_indexes_s)):
   3.262 +            if len(self.body[i]) > self.width:
   3.263 +                self.width=len(self.body[i])
   3.264 +#Change absent end positions to None
   3.265 +        for i in range(len(self.sequence_indexes_s)):
   3.266 +            if len(self.body[i]) < self.width:
   3.267 +                self.body[i] = self.body[i]+ [None]*(self.width-len(self.body[i]))
   3.268 +
   3.269 +#Creat position_indexes_a
   3.270 +        self.position_indexes_a=range(self.width)
   3.271 +
   3.272 +
   3.273 +    def add_aligned_sequence(self, sequence_name, sequence_description, sequence_string):
   3.274 +         """
   3.275 +         Author: AAl
   3.276 +         Status: tested successfully in the frame of get_from_fasta
   3.277 +         """
   3.278 +#####!!!!!!   sequence = Sequence(self.sequences)  ##### CHANGED TO NEXT LINE!
   3.279 +         sequence = Sequence(self.sequences)
   3.280 +         result=sequence.add_gapped_string(sequence_name, sequence_description, sequence_string)
   3.281 +         if result == None:
   3.282 +             return None
   3.283 +         else:
   3.284 +             self.sequence_indexes_s.append(result[0])
   3.285 +             self.body.append(result[1])
   3.286 +
   3.287 +    def initiate_blocks(self,blocks):
   3.288 +         """
   3.289 +         Author: AAl
   3.290 +         Status: should be compared  with class Block definitions
   3.291 +
   3.292 +         Create block object from self alignment
   3.293 +         Return block
   3.294 +         """
   3.295 +# Create blocks object
   3.296 +         blocks.alignment=self
   3.297 +
   3.298 +# Create block object
   3.299 +         sequence_indexes_in_self = range[len(self.sequence_indexes_s)]
   3.300 +         position_indexes_in_self = range[len(self.position_indexes_a)]
   3.301 +         block=Block(self, sequence_indexes_in_self, position_indexes_in_self)
   3.302 +         block.state="initial"
   3.303 +# Include block in blocks
   3.304 +         blocks.list_of_blocks=[block]
   3.305 +         blocks.stack_of_blocks=[block]
   3.306 +         blocks.aligned_blocks=[]
   3.307 +         blocks.tree="..."
   3.308 +
   3.309 +# Create aligned_blocks_sequences relations
   3.310 +         self.aligned_blocks_sequences=[]
   3.311 +         for i in range(len(self.sequence_indexes_a)):
   3.312 +             self.aligned_blocks_sequences.append([])
   3.313 +
   3.314 +
   3.315 +         return block
   3.316 +
   3.317 +
   3.318 +if __name__ == "__main__":
   3.319 +    print "Hello!"
   3.320 +    import doctest
   3.321 +    doctest.testmod()
   3.322 +
   3.323 +
   3.324 +
   3.325 +
   3.326 +
   3.327 +
   3.328 +
   3.329 +
   3.330 +
   3.331 +
   3.332 +
   3.333 +
   3.334 +
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/sbbs_2009/modules/block.py	Sat Jul 09 16:30:28 2011 +0400
     4.3 @@ -0,0 +1,1016 @@
     4.4 +#!/usr/bin/python
     4.5 +"""
     4.6 +Contain classes:
     4.7 +    Block
     4.8 +    Blocks
     4.9 +Contains function:
    4.10 +    Lgamma
    4.11 +"""
    4.12 +import os
    4.13 +from rooted_tree import Rooted_tree
    4.14 +import configure
    4.15 +import alignment
    4.16 +from Lgamma import Lgamma
    4.17 +import inspect
    4.18 +import sys
    4.19 +import traceback
    4.20 +
    4.21 +#
    4.22 +#class Blocks(object):
    4.23 +#    """
    4.24 +#    Author: AAl
    4.25 +#    Status: designed
    4.26 +#
    4.27 +#    Data:
    4.28 +#        list_of_blocks  is a list of all Block objects of an alignment
    4.29 +#            a block is identified by its index in the list_of_blocks
    4.30 +#        tree_of_blocks  is a rooted_tree object for all blocks of an alignment
    4.31 +#            tree_of_blocks.content = [block,...]
    4.32 +#        stack_of_blocks is a list of undelivered blocks
    4.33 +#           the list is delivered  as stack
    4.34 +#        aligned_blocks is a list of aligned blocks
    4.35 +#
    4.36 +#
    4.37 +#        vertexes_of_blocks   # list; instead of tree_of_blocks
    4.38 +#
    4.39 +#
    4.40 +#    Methods
    4.41 +#        self.show_blocks
    4.42 +#        self.add_aligned block
    4.43 +#    """
    4.44 +#
    4.45 +#
    4.46 +#    def __init__(self,alignment=None,list_of_blocks=None,stack_of_blocks=None,aligned_blocks=None,tree_of_blocks=None):
    4.47 +#        self.alignment=alignment
    4.48 +#        self.list_of_blocks=list_of_blocks
    4.49 +#        self.tree_of_blocks=tree_of_blocks
    4.50 +#        self.stack_of_blocks=stack_of_blocks
    4.51 +#        self.aligned_blocks=aligned_blocks
    4.52 +#
    4.53 +#        if self.list_of_blocks==None:
    4.54 +#            self.list_of_blocks=[]
    4.55 +#        if self.tree_of_blocks==None:
    4.56 +#            self.tree_of_blocks=Rooted_tree()
    4.57 +#        if self.stack_of_blocks==None:
    4.58 +#            self.stack_of_blocks=[]
    4.59 +#        if self.aligned_blocks==None:
    4.60 +#            self.aligned_blocks=None
    4.61 +#
    4.62 +#
    4.63 +#
    4.64 +#
    4.65 +#    def add_aligned_block(self, parent_block, aligned_block):
    4.66 +#        """
    4.67 +#        Author: AAl
    4.68 +#        Status: in progress
    4.69 +#
    4.70 +#
    4.71 +#        """
    4.72 +#
    4.73 +##Left new block to the list of blocks and stack of blocks
    4.74 +#        left_block_width= aligned_block.position_indexes_a - parent_block.position_indexes_a
    4.75 +#        if left_block_width < configure.minimal_block_width:
    4.76 +#            no_left_block=1
    4.77 +#        else:
    4.78 +#            no_left_block=0
    4.79 +#            left_block=Block(
    4.80 +#                             parent_block.alignment,
    4.81 +#                             parent_block.sequence_indexes_a,
    4.82 +#                             parent_block.position_indexes_a[0:left_block_width]
    4.83 +#                            )
    4.84 +#            left_block.state="processing"
    4.85 +#            self.list_of_blocks.append(left_block)
    4.86 +#            self.stack_of_blocks.append(left_block)
    4.87 +#
    4.88 +#            self.tree_of_blocks.add....
    4.89 +#
    4.90 +#
    4.91 +#*************
    4.92 +#
    4.93 +#class Blocks(object):
    4.94 +#    """
    4.95 +#    Author: AAl
    4.96 +#    Status: designed
    4.97 +#
    4.98 +#    Data:
    4.99 +#        list_of_blocks  is a list of all Block objects of an alignment
   4.100 +#            a block is identified by its index in the list_of_blocks
   4.101 +#        tree_of_blocks  is a rooted_tree object for all blocks of an alignment
   4.102 +#            tree_of_blocks.content = [block,...]
   4.103 +#        stack_of_blocks is a list of undelivered blocks
   4.104 +#           the list is delivered  as stack
   4.105 +#        aligned_blocks is a list of aligned blocks
   4.106 +#
   4.107 +#
   4.108 +#        vertexes_of_blocks   # list; instead of tree_of_blocks
   4.109 +#
   4.110 +#
   4.111 +#    Methods
   4.112 +#        self.show_blocks
   4.113 +#        self.add_aligned block
   4.114 +#    """
   4.115 +#
   4.116 +#
   4.117 +#    def __init__(self,alignment=None,list_of_blocks=None,stack_of_blocks=None,aligned_blocks=None,tree_of_blocks=None):
   4.118 +#        self.alignment=alignment
   4.119 +#        self.list_of_blocks=list_of_blocks
   4.120 +#        self.tree_of_blocks=tree_of_blocks
   4.121 +#        self.stack_of_blocks=stack_of_blocks
   4.122 +#        self.aligned_blocks=aligned_blocks
   4.123 +#
   4.124 +#        if self.list_of_blocks==None:
   4.125 +#            self.list_of_blocks=[]
   4.126 +#        if self.tree_of_blocks==None:
   4.127 +#            self.tree_of_blocks=Rooted_tree()
   4.128 +#        if self.stack_of_blocks==None:
   4.129 +#            self.stack_of_blocks=[]
   4.130 +#        if self.aligned_blocks==None:
   4.131 +#            self.aligned_blocks=None
   4.132 +#
   4.133 +#
   4.134 +#
   4.135 +#    def add_aligned_block(self, parent_block, aligned_block):
   4.136 +#        """
   4.137 +#        Should be written
   4.138 +#                    self.add_aligned_block(aligned_block)
   4.139 +#
   4.140 +#        """
   4.141 +#
   4.142 +##Left new block to the list of blocks and stack of blocks
   4.143 +#        left_block_width= aligned_block.position_indexes_a - parent_block.position_indexes_a
   4.144 +#        if left_block_width < configure.minimal_block_width:
   4.145 +#            no_left_block=1
   4.146 +#        else:
   4.147 +#            no_left_block=0
   4.148 +#            left_block=Block(
   4.149 +#                             parent_block.alignment,
   4.150 +#                             parent_block.sequence_indexes_a,
   4.151 +#                             parent_block.position_indexes_a[0:left_block_width]
   4.152 +#                            )
   4.153 +#            left_block.state="processing"
   4.154 +#            self.list_of_blocks.append(left_block)
   4.155 +#            self.stack_of_blocks.append(left_block)
   4.156 +#
   4.157 +#            self.tree_of_blocks.add....
   4.158 +#
   4.159 +#
   4.160 +#*************
   4.161 +import blocks
   4.162 +
   4.163 +class plug(object):
   4.164 +    def __init__(self):
   4.165 +        self.state="initial"
   4.166 +
   4.167 +class Block(object):
   4.168 +    """
   4.169 +    Author: AAl
   4.170 +    Status: designed
   4.171 +
   4.172 +    Data
   4.173 +        MAIN BODY
   4.174 +            self.alignment           # Alignment object
   4.175 +            self.sequence_indexes_a    # list of sequence indexes;
   4.176 +                                         each index is the index in alignment.sequence_indexes_a. Be cureful!!!
   4.177 +            self.position_indexes_a    # list of position indexes;
   4.178 +                                         each index is the index in alignment.position_indexes_a. Be cureful!!!
   4.179 +            self.position_indexes_p   #
   4.180 +                                        List of indexes;
   4.181 +                                           Each index is an index of a position in the parent block positions
   4.182 +            self.state               # one of strings:
   4.183 +                                          "initial" !!! Move to Blocks - May be not? (Burkov)
   4.184 +                                          "aligned"
   4.185 +                                          "no_alignment"
   4.186 +                                          "pending"
   4.187 +                                          "in process"
   4.188 +                                          "processed"
   4.189 +
   4.190 +        BLOCK EQUIPMENT
   4.191 +            self.rooted_tree         # each vertex's content, for instance rooted_tree.root.content is a list[_^M_][_$_]
   4.192 +                                     # Vertex.content[0] is the respective sequence's name (None for nodes)[_^M_][_$_]
   4.193 +                                     # Vertex.content[1] is its <sum of subtree lengthes + length of the way from root to Vertex>[_^M_][_$
   4.194 +
   4.195 +            self.gapped_positions           # sorted list of position indexes
   4.196 +            self.unalignable_regions     # unalignable_regions
   4.197 +                                             sorted list of lists [from,to,"why",shortest,longest]
   4.198 +                                             from and to are indexes of positions
   4.199 +                                             "why" is a string either "gapped" or "ss_contradiction"
   4.200 +                                             may be, something else
   4.201 +                                             shortest is the length of the shortest insertion in unalignable region
   4.202 +                                             longest is the length of the longest insertion in unalignable region
   4.203 +            self.alignable_regions       #[from,to]
   4.204 +            self.conserved_positions     # sorted list of (position_index, position_consensus, position_percent)
   4.205 +
   4.206 +        OPTIONAL
   4.207 +            self.consensus_ss            # ss is for secondary structure
   4.208 +                                             consensus is created by comparison of ss of all available 3D data
   4.209 +            self.important_aas           # list of tuples (position_index,sequence_indexes?,type of activity
   4.210 +
   4.211 +
   4.212 +    METHODS:
   4.213 +
   4.214 +        EXTERNAL
   4.215 +        self.show()                     # Display block  in parent block in alignment
   4.216 +        +self.get_aligned_block         #
   4.217 +
   4.218 +        INTERNAL
   4.219 +        +self._delete_empty_positions()  # Delete all empty positions
   4.220 +        self.detect_gapped_positions()       #
   4.221 +                              For each position
   4.222 +                              - if there is a gap: push position_index to self.gapped_positions
   4.223 +                              - detect  unalignable_regions  : [from,to,"gapped",shortest,longest] , i.e.
   4.224 +                                 maximal segments having gapped positions at the ends such that
   4.225 +                                 an interval between any neighbor gapped positions inside is less than configure.minimal_block_width
   4.226 +                                 "from" and "to" are exactly the marginal gapped positions of the region
   4.227 +                                 shortest is the length of the shortest insertion in unalignable region
   4.228 +                                 longest is the length of the longest insertion in unalignable region
   4.229 +                              - retuns non_gapped_positions, which DON'T BELONG TO UNALIGNED REGIONS
   4.230 +
   4.231 +        self.detect_conserved_positions(non_gapped_positions)
   4.232 +                              - takes non_gapped_positions, which DON'T BELONG TO UNALIGNED REGIONS on input
   4.233 +                              - if position is conserved (and isn't in unalignable region) then add
   4.234 +                                  (position_index, position_group, position_fraction) to self.conserved_positions
   4.235 +
   4.236 +        +self.to_fasta()                # Creates temporfary file tmp_block.fasta
   4.237 +                                        #    sequence names are nums made  from alignment sequence_indexes_a
   4.238 +
   4.239 +        +self.create_rooted_tree()      # Result is rooted_tree object self.rooted_tree
   4.240 +        self.get_vertical_block(self)
   4.241 +                                        # return block or None
   4.242 +                                          if None change self.state either to
   4.243 +                                              "aligned" or to
   4.244 +                                              "no_vertical_blocks"
   4.245 +        self.create_parenthetic_structure_with_EMBOSS_Kimura_UPGMA(self,block)
   4.246 +                                        # Use external programs
   4.247 +                                        #     return brackets
   4.248 +        self.get_subset_from_rooted_tree(candidate_nodes)
   4.249 +                                        #returns list [[list_of_sequence_indexes_a][list_of_candidate_nodes]]
   4.250 +        self.add_aligned_block(aligned_block)
   4.251 +
   4.252 +        self.downsize(aligned_block)
   4.253 +        self.upside(aligned_block)
   4.254 +
   4.255 +             To get an aa, the following command is used:
   4.256 +              self.alignment.get_aa(sequense_index_a,position_index_a)
   4.257 +
   4.258 +        POSTPONED
   4.259 +        self.get_subsets_from_unrooted_tree()
   4.260 +
   4.261 +    """
   4.262 +
   4.263 +
   4.264 +    def __init__(self , alignment, sequence_indexes_a, position_indexes_a, is_gapped=None):
   4.265 +        self.alignment=alignment
   4.266 +        self.sequence_indexes_a=sequence_indexes_a
   4.267 +        self.position_indexes_a=position_indexes_a
   4.268 +        if is_gapped==None: self.block_of_gaps=self._delete_empty_positions()
   4.269 +        else: self.block_of_gaps=None
   4.270 +#*************
   4.271 +#        self.position_indexes_p=self.delete_empty_positions()
   4.272 +    def _delete_empty_positions(self):
   4.273 +#*************
   4.274 +        """
   4.275 +        Author: AAl
   4.276 +        State: seems to work
   4.277 +
   4.278 +        Delete all empty positions in the block.
   4.279 +        Return list of position indexes in initial - with empty positions - state
   4.280 +        Empty positions appear in horizontal sub-block.
   4.281 +        Therefore that list contains indexes  of all positions in parent block
   4.282 +        """
   4.283 +        new_position_indexes=[]        # indexes in the alignment are shown
   4.284 +        number_of_empty_positions=0
   4.285 +        position_indexes_in_parent=[]  # for each new+position its index before deletion is shown#
   4.286 +        positions_in_gap_block=[]
   4.287 +        for index in range(len(self.position_indexes_a)):
   4.288 +            position_index=self.position_indexes_a[index]
   4.289 +            flag=0   # if empty"
   4.290 +            for sequence_index in self.sequence_indexes_a:
   4.291 +                if self.alignment.get_aa(sequence_index,position_index) <> None:
   4.292 +                    flag=1
   4.293 +                    break
   4.294 +            if flag <> 0 :
   4.295 +                new_position_indexes.append(position_index)
   4.296 +                position_indexes_in_parent.append(index)
   4.297 +            else:
   4.298 +                positions_in_gap_block.append(position_index)
   4.299 +        self.position_indexes_a = new_position_indexes
   4.300 +        if len(positions_in_gap_block)!=0:
   4.301 +            block_of_gaps=Block(self.alignment, self.sequence_indexes_a, positions_in_gap_block, 1)
   4.302 +            block_of_gaps.state="no_alignment"
   4.303 +            return block_of_gaps
   4.304 +        else:
   4.305 +            return None
   4.306 +        # Seems that number_of_empty_positions is useless; what for do we store position_indexes_in_parent if we alter the position_indexes_a? I've killed it  - Burkov
   4.307 +
   4.308 +    def to_fasta(self,f):
   4.309 +        """
   4.310 +        Author: AAl
   4.311 +        State:
   4.312 +         Creates temporary file "temp_block.fasta" in current directory
   4.313 +           Sequence names are sequence num's (sequence_index+1)
   4.314 +        """
   4.315 +        for i in range(len(self.sequence_indexes_a)):
   4.316 +            sequence=""
   4.317 +            f.write(">"+str(self.sequence_indexes_a[i]+1)+"\n")
   4.318 +            for j in self.position_indexes_a:
   4.319 +                if self.alignment.get_aa(self.sequence_indexes_a[i],j):
   4.320 +                   sequence=sequence+self.alignment.get_aa(self.sequence_indexes_a[i],j)
   4.321 +                else:
   4.322 +                   sequence=sequence+"-"
   4.323 +            for k in range( (len(sequence)-1)/60+1 ):
   4.324 +                sequence_line=sequence[k*60:k*60+60]
   4.325 +                f.write(sequence_line+"\n")
   4.326 +
   4.327 +        return [len(self.sequence_indexes_a),len(self.position_indexes_a)]
   4.328 +
   4.329 +
   4.330 +    def create_distance_matrix(self, method="simple"):
   4.331 +        '''
   4.332 +        create_distance_matrix
   4.333 +        return object of class Phy_matrix containing distances between sequences of the block
   4.334 +        Author: S.A.S.
   4.335 +        '''
   4.336 +        import phylip
   4.337 +        from phylip import Phy_matrix
   4.338 +        mat=Phy_matrix(())
   4.339 +        for i1 in range(len(self.sequence_indexes_a)):
   4.340 +            mat=mat.addobject(str(self.sequence_indexes_a[i1]+1))
   4.341 +            for i2 in range(i1):
   4.342 +                npositions=0
   4.343 +                distance=0.0
   4.344 +                for j in self.position_indexes_a:
   4.345 +                    #aa1 = self.alignment.get_aa(i1,j)
   4.346 +                    #aa2 = self.alignment.get_aa(i2,j)
   4.347 +                    aa1 = self.alignment.get_aa(self.sequence_indexes_a[i1],j)
   4.348 +                    aa2 = self.alignment.get_aa(self.sequence_indexes_a[i2],j)
   4.349 +                    if aa1 and aa2:
   4.350 +                        npositions += 1
   4.351 +                        if aa1.upper() != aa2.upper():
   4.352 +                            distance += 1.0
   4.353 +                if npositions == 0:
   4.354 +                    mat=mat.setdist(str(self.sequence_indexes_a[i1]+1), str(self.sequence_indexes_a[i2]+1), 1.0)
   4.355 +                else:
   4.356 +                    mat=mat.setdist(str(self.sequence_indexes_a[i1]+1), str(self.sequence_indexes_a[i2]+1), distance/npositions)
   4.357 +        return mat
   4.358 +
   4.359 +    def create_rooted_tree(self, mode="external"):
   4.360 +        """
   4.361 +        create_rooted_tree
   4.362 +        if mode = "external" (default) create parenthetic structure by external programs fprotdist and fneighbor
   4.363 +        if mode = "semiexternal" create matrix by internal method create_distance_matrix(), then parenthetic structure by fneighbor
   4.364 +        create rooted_tree object
   4.365 +        parenthetic_structure to tree
   4.366 +        """
   4.367 +        if mode == "external":
   4.368 +            f=open("./jobs/temptree.fasta","w")
   4.369 +            self.to_fasta(f)
   4.370 +            f.close()
   4.371 +
   4.372 +            os.system("fprotdist ./jobs/temptree.fasta  -out ./jobs/temptree.dist -method d 2>>./jobs/temptree.log >>./jobs/temptree.log ")
   4.373 +            os.system("fneighbor ./jobs/temptree.dist -outfile ./jobs/temptree.nj -outtreefile ./jobs/temptree.tre -treetype u 2>>./jobs/temptree.log >>./jobs/temptree.log")
   4.374 +            f=open("./jobs/temptree.tre", "r")
   4.375 +            tree=f.readlines()
   4.376 +            f.close()
   4.377 +            line=""
   4.378 +            parenthetic_structure=line.join(tree).replace("\n","").replace("\r","").replace("\t","").replace(" ","")
   4.379 +        elif mode == "semiexternal":
   4.380 +            mat = self.create_distance_matrix()
   4.381 +            #if mat.max() == 0: return False
   4.382 +            f=open("./jobs/temptree.dist","w")
   4.383 +            f.write(str(mat))
   4.384 +            f.close()
   4.385 +            os.system("fneighbor ./jobs/temptree.dist -outfile ./jobs/temptree.nj -outtreefile ./jobs/temptree.tre -treetype u 2>>./jobs/temptree.log >>./jobs/temptree.log")
   4.386 +            f=open("./jobs/temptree.tre", "r")
   4.387 +            tree=f.readlines()
   4.388 +            f.close()
   4.389 +            line=""
   4.390 +            parenthetic_structure=line.join(tree).replace("\n","").replace("\r","").replace("\t","").replace(" ","")
   4.391 +        else:
   4.392 +            raise "Method "+mode+" unknown"
   4.393 +        # end if - elif
   4.394 +        self.rooted_tree=Rooted_tree()
   4.395 +        self.rooted_tree.parenthetic_structure_to_tree(parenthetic_structure)
   4.396 +        os.system("rm ./jobs/temptree.*")
   4.397 +        return True
   4.398 +        #sys.exit()
   4.399 +
   4.400 +    def detect_gapped_positions(self):
   4.401 +        """
   4.402 +        Author: Burkov
   4.403 +        Status: one test passed;
   4.404 +
   4.405 +        constructs:
   4.406 +                   self.gapped_positions
   4.407 +                   self.unalignable_regions
   4.408 +
   4.409 +        returns:
   4.410 +                   non_gapped_positions (list of positions OUTSIDE unalignable_regions, that are non-gapped)
   4.411 +
   4.412 +        >>> import block
   4.413 +        >>> test_alignment=alignment.Alignment([])
   4.414 +        >>> test_alignment.get_from_file("test_1.msf")
   4.415 +        >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a)
   4.416 +        >>> block1.detect_gapped_positions()
   4.417 +        [0, 1, 2, 3, 4, 5, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 32, 33, 34, 35, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
   4.418 +        """
   4.419 +        self.unalignable_regions=[]
   4.420 +        self.gapped_positions=[]
   4.421 +        non_gapped_positions=[]
   4.422 +        #search for gapped postitions
   4.423 +        for i in self.position_indexes_a:
   4.424 +            is_gapped=0
   4.425 +            for j in self.sequence_indexes_a:
   4.426 +                if self.alignment.get_aa(j,i)==None:
   4.427 +                    is_gapped=1
   4.428 +                    break
   4.429 +            if is_gapped==1:
   4.430 +                self.gapped_positions.append(i)
   4.431 +        #search for unalignable regions
   4.432 +        #print "gapped_positions = "+str(self.gapped_positions)
   4.433 +        #first, consider the beginning of the analyzed block
   4.434 +        for i in self.position_indexes_a:
   4.435 +            if len(self.gapped_positions)>0:#proove that the accepted block's beginning is longer than configure.minimal_block_width
   4.436 +                if self.position_indexes_a.index(self.gapped_positions[0]) - self.position_indexes_a[0]+1 >= configure.minimal_block_width:
   4.437 +                    if i>=self.gapped_positions[0]: break
   4.438 +                    else: non_gapped_positions.append(i)
   4.439 +            else:#if no gaps at all, the block should be longer than configure.minimal_block_width
   4.440 +                if len(self.position_indexes_a) >= configure.minimal_block_width:
   4.441 +                    non_gapped_positions.append(i)
   4.442 +        #consider all the intervals between gaps: they should be greater than
   4.443 +        flag=0
   4.444 +        for i in self.gapped_positions:
   4.445 +            if flag==0:
   4.446 +                left_margin=i
   4.447 +                right_margin=i
   4.448 +                flag=1
   4.449 +            elif flag==1:
   4.450 +                if self.position_indexes_a.index(i) - self.position_indexes_a.index(right_margin)-1<configure.minimal_block_width:
   4.451 +                    right_margin=i
   4.452 +                else:
   4.453 +                    self.unalignable_regions.append([left_margin,right_margin,"gapped",None,None])
   4.454 +                    for x in range(self.position_indexes_a.index(right_margin)+1, self.position_indexes_a.index(i)):
   4.455 +                        non_gapped_positions.append(self.position_indexes_a[x])
   4.456 +                    left_margin=i
   4.457 +                    right_margin=i
   4.458 +        #consider the ending of the analyzed block
   4.459 +        if flag==1:
   4.460 +            if len(self.position_indexes_a)-1+1-self.position_indexes_a.index(right_margin)-1<configure.minimal_block_width:
   4.461 +                right_margin=len(self.position_indexes_a)-1
   4.462 +                self.unalignable_regions.append([left_margin,right_margin,"gapped",None,None])
   4.463 +            else:
   4.464 +                self.unalignable_regions.append([left_margin,right_margin,"gapped",None,None])
   4.465 +                for x in range(self.position_indexes_a.index(right_margin)+1, len(self.position_indexes_a)-1+1):
   4.466 +                    non_gapped_positions.append(self.position_indexes_a[x])
   4.467 +        #fill in the shortest and longest
   4.468 +        for x in self.unalignable_regions:
   4.469 +            longest=0
   4.470 +            shortest=x[1]-x[0]+1
   4.471 +            for j in self.sequence_indexes_a:
   4.472 +                current=0
   4.473 +                for i in range(x[0],x[1]+1):
   4.474 +                    if self.alignment.get_aa(j,i)!=None: current=current+1
   4.475 +                if current>longest: longest=current
   4.476 +                if current<shortest: shortest=current
   4.477 +            x[3]=shortest
   4.478 +            x[4]=longest
   4.479 +
   4.480 +        return non_gapped_positions
   4.481 +
   4.482 +
   4.483 +    def detect_conserved_positions(self, non_gapped_positions):
   4.484 +        """
   4.485 +        Author: Burkov
   4.486 +        Status: in progress
   4.487 +
   4.488 +        requires self.rooted_tree
   4.489 +        constucts self.conserved_positions
   4.490 +        ???should return list of alignable regions???
   4.491 +
   4.492 +        >>> import block
   4.493 +        >>> test_alignment=alignment.Alignment([])
   4.494 +        >>> test_alignment.get_from_file("test_1.msf")
   4.495 +        >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a)
   4.496 +        >>> block1.create_rooted_tree()
   4.497 +        >>> block1.detect_conserved_positions(block1.detect_gapped_positions())
   4.498 +        """
   4.499 +
   4.500 +        #search for conserved_positions (position_index, position_consensus, position_percent)
   4.501 +        self.conserved_positions=[]
   4.502 +        #self.rooted_tree.calculate_root_subtree_sum_of_lengthes()
   4.503 +        total_sum_of_lengthes=self.rooted_tree.root.content[1]
   4.504 +        if total_sum_of_lengthes!=0.0:
   4.505 +            for i in non_gapped_positions:
   4.506 +                current_leader=None
   4.507 +                for k,k_ in configure.groups_of_aa_1.items():
   4.508 +                    #search for the sequences_which_contain_this_group
   4.509 +                    sequences_which_contain_this_group=[]
   4.510 +                    for j in self.sequence_indexes_a:
   4.511 +                        if k_[0].lower().rfind(self.alignment.get_aa(j,i))!=-1:
   4.512 +                            sequences_which_contain_this_group.append(j)
   4.513 +                    #search for the leaves corresponding to sequences_which_contain_this_group
   4.514 +                    set_of_leaves=[]
   4.515 +                    for j in sequences_which_contain_this_group:
   4.516 +                        for l in self.rooted_tree.leaves:
   4.517 +                            if str(j+1)==l.content[0]:
   4.518 +                                set_of_leaves.append(l)
   4.519 +                                break
   4.520 +                    if self.rooted_tree.set_of_leaves_to_sum_of_lengthes_of_root_subtree(set_of_leaves)/total_sum_of_lengthes>=configure.conservation_threshold:
   4.521 +                        #if i==372:
   4.522 +                            #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" set_of_leaves = "+str(set_of_leaves)+", k = "+str(k)+", total_sum_of_lengthes = "+str(total_sum_of_lengthes)+", self.rooted_tree.set_of_leaves_to_sum_of_lengthes_of_root_subtree(set_of_leaves) = "+str(self.rooted_tree.set_of_leaves_to_sum_of_lengthes_of_root_subtree(set_of_leaves))+", sequences_which_contain_this_group = "+str(sequences_which_contain_this_group)
   4.523 +                            #for a in self.rooted_tree.depth_first_traversal():
   4.524 +                                #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" nesting depth = "+str(a[1])+", parental_edge = "+str(a[0].parental_edge)+", content = "+str(a[0].content)
   4.525 +                        if current_leader==None:
   4.526 +                            current_leader = (i,k,self.rooted_tree.set_of_leaves_to_sum_of_lengthes_of_root_subtree(set_of_leaves)/total_sum_of_lengthes)
   4.527 +                        else:
   4.528 +                            if configure.groups_of_aa_1[current_leader[1]][2]>configure.groups_of_aa_1[k][2]:
   4.529 +                                current_leader = (i,k,self.rooted_tree.set_of_leaves_to_sum_of_lengthes_of_root_subtree(set_of_leaves)/total_sum_of_lengthes)
   4.530 +                if current_leader!=None:
   4.531 +                    self.conserved_positions.append(current_leader)
   4.532 +        else:
   4.533 +            for i in non_gapped_positions:
   4.534 +                for k,k_ in configure.groups_of_aa_1.items():
   4.535 +                    if len(k_[0])==1 and k_[0].lower().rfind(self.alignment.get_aa(self.sequence_indexes_a[0],i))!=-1:
   4.536 +                        self.conserved_positions.append((i,k,1.0))
   4.537 +
   4.538 +
   4.539 +    def check_significance(self, subblock1_conserved_positions, subblock2_conserved_positions, formula=1):
   4.540 +        """
   4.541 +        Author: Burkov
   4.542 +        State: scheduled
   4.543 +
   4.544 +        Assumes, that rooted_trees for subblocks are already constructed
   4.545 +        """
   4.546 +        if formula==1:
   4.547 +            if self._formula_1(subblock1_conserved_positions, subblock2_conserved_positions): return True
   4.548 +            else: return False
   4.549 +        elif formula==2:
   4.550 +            pass
   4.551 +        elif formula==3:
   4.552 +            pass
   4.553 +
   4.554 +
   4.555 +    def _formula_1(self, subblock1_conserved_positions, subblock2_conserved_positions):
   4.556 +        """
   4.557 +        This is the implementation of the most primitive formula
   4.558 +        """
   4.559 +        import math
   4.560 +        #calculate the sum of logprobabilities of groups in actual block
   4.561 +        actual_logprobability=0
   4.562 +        for i in range(len(self.conserved_positions)):
   4.563 +            actual_logprobability=actual_logprobability+configure.groups_of_aa_1[self.conserved_positions[i][1]][2]
   4.564 +        #calculate the length and maximal_number_of_conserved_positions
   4.565 +        if len(subblock1_conserved_positions)>len(subblock2_conserved_positions): maximal_possible_number_of_conserved_positions=len(subblock2_conserved_positions)
   4.566 +        else: maximal_possible_number_of_conserved_positions=len(subblock1_conserved_positions)
   4.567 +        length=0
   4.568 +        for i in self.position_indexes_a:
   4.569 +            length=length+1
   4.570 +        if (len(subblock1_conserved_positions)+len(subblock2_conserved_positions))<length: minimal_possible_number_of_conserved_positions=0
   4.571 +        else: minimal_possible_number_of_conserved_positions=len(subblock1_conserved_positions)+len(subblock2_conserved_positions)-length
   4.572 +        if minimal_possible_number_of_conserved_positions<len(self.conserved_positions): minimal_possible_number_of_conserved_positions=len(self.conserved_positions)
   4.573 +        #denominator and numerator outcomes counts
   4.574 +        denominator = round(math.exp(Lgamma(length+1)-Lgamma(len(subblock1_conserved_positions)+1)-Lgamma(length-len(subblock1_conserved_positions)+1)+Lgamma(length+1)-Lgamma(len(subblock2_conserved_positions)+1)-Lgamma(length-len(subblock2_conserved_positions)+1)))
   4.575 +        numerator=0;
   4.576 +        for a in range(minimal_possible_number_of_conserved_positions, maximal_possible_number_of_conserved_positions+1):
   4.577 +            numerator=numerator+round(math.exp(Lgamma(length+1)-Lgamma(len(subblock1_conserved_positions)-a+1)-Lgamma(length-len(subblock1_conserved_positions)-len(subblock2_conserved_positions)+a+1)-Lgamma(len(subblock2_conserved_positions)-a+1)-Lgamma(a+1)))
   4.578 +        #a rough heuristic formula is implemented here
   4.579 +        probability = numerator / denominator * math.pow(2,actual_logprobability)
   4.580 +        if probability>=configure.probability_threshold:
   4.581 +            #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" SPLIT CONDEMNED"
   4.582 +            return False
   4.583 +        else:
   4.584 +            #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" SPLIT APPROVED"
   4.585 +            return True
   4.586 +        #MAY BE I SHOULD MULTIPLY BY C_a^self.conserved_positions, but there's a risk of probability exceeding 1
   4.587 +
   4.588 +    def traverse_block_splits(self):
   4.589 +        r"""
   4.590 +        Author: Burkov
   4.591 +        State: scheduled
   4.592 +
   4.593 +        Consequently traverses the nodes and vertices of self.rooted_tree, using rooted_tree.next_vertex,
   4.594 +        at each step splits the block onto two subblocks, finds out, whether their alignment is relevant
   4.595 +        or not and if it is, goes to the next step. If the alignment is irrelevant at least at one step,
   4.596 +        the block is discarded. Otherwise, the block is considered relevant.
   4.597 +
   4.598 +        Requires self.create_rooted_tree and self.rooted_tree.calculate_root_subtree_sum_of_lengthes
   4.599 +        to be called first.
   4.600 +
   4.601 +        >>> import block
   4.602 +        >>> import alignment
   4.603 +        >>> test_alignment=alignment.Alignment([])
   4.604 +        >>> test_alignment.get_from_file("test_1.msf")
   4.605 +        >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a)
   4.606 +        >>> block1.create_rooted_tree()
   4.607 +        >>> block1.rooted_tree.calculate_root_subtree_sum_of_lengthes()
   4.608 +        >>> block1.traverse_block_splits()
   4.609 +        """
   4.610 +        candidate_vertices=[]
   4.611 +        for i in self.rooted_tree.root.child_vertices:
   4.612 +            candidate_vertices.append(i)
   4.613 +
   4.614 +        self.detect_conserved_positions(self.detect_gapped_positions())
   4.615 +        #iterations
   4.616 +        flag=1
   4.617 +        while candidate_vertices!=[]:
   4.618 +            current_vertex,candidate_vertices=self.rooted_tree.next_vertex(candidate_vertices)
   4.619 +            if current_vertex.child_vertices==[]:
   4.620 +                #if leaf
   4.621 +                sequence_indexes_a1=[int(current_vertex.content[0])-1]
   4.622 +                complement_set_of_leaves=self.rooted_tree.complement_set_of_leaves([current_vertex])
   4.623 +                sequence_indexes_a2=[]
   4.624 +                for i in complement_set_of_leaves:
   4.625 +                    sequence_indexes_a2.append(int(i.content[0])-1)
   4.626 +                subblock1=Block(self.alignment, sequence_indexes_a1, self.position_indexes_a)
   4.627 +                subblock2=Block(self.alignment, sequence_indexes_a2, self.position_indexes_a)
   4.628 +                #subblock1.rooted_tree=self.rooted_tree.chop_the_tree(current_vertex)
   4.629 +                subblock2.rooted_tree=self.rooted_tree.chop_the_tree(complement_set_of_leaves)
   4.630 +                #print "mark 1"
   4.631 +                #print "current_vertex_content = "+str(current_vertex.content)
   4.632 +                #print "subblock1_sequence_indexes_a = "+str(subblock1.sequence_indexes_a)
   4.633 +                subblock1._initialization_of_conserved_positions_for_single_sequences()
   4.634 +                #print "mark 2"
   4.635 +                #print "subblock1.sequence_indexes_a = "+str(subblock1.sequence_indexes_a)
   4.636 +                #print "subblock2.sequence_indexes_a = "+str(subblock2.sequence_indexes_a)
   4.637 +                if len(subblock2.sequence_indexes_a)==1:
   4.638 +                    subblock2._initialization_of_conserved_positions_for_single_sequences()
   4.639 +                else:
   4.640 +                    subblock2.detect_conserved_positions(subblock2.detect_gapped_positions())
   4.641 +                if self.check_significance(subblock1.conserved_positions,subblock2.conserved_positions,1):
   4.642 +                    pass
   4.643 +                else:
   4.644 +                    flag=0
   4.645 +                    break
   4.646 +            else:
   4.647 +                #if node
   4.648 +                set_of_leaves=self.rooted_tree.node_to_set_of_leaves(current_vertex)
   4.649 +                sequence_indexes_a1=[]
   4.650 +                for i in set_of_leaves:
   4.651 +                    sequence_indexes_a1.append(int(i.content[0])-1)
   4.652 +                complement_set_of_leaves=self.rooted_tree.complement_set_of_leaves(set_of_leaves)
   4.653 +                sequence_indexes_a2=[]
   4.654 +                for i in complement_set_of_leaves:
   4.655 +                    sequence_indexes_a2.append(int(i.content[0])-1)
   4.656 +                subblock1=Block(self.alignment, sequence_indexes_a1, self.position_indexes_a)
   4.657 +                subblock2=Block(self.alignment, sequence_indexes_a2, self.position_indexes_a)
   4.658 +                subblock1.rooted_tree=self.rooted_tree.chop_the_tree(set_of_leaves)
   4.659 +                subblock2.rooted_tree=self.rooted_tree.chop_the_tree(complement_set_of_leaves)
   4.660 +                #print "mark 3"
   4.661 +                #print "subblock1_sequence_indexes_a = "+str(subblock1.sequence_indexes_a)
   4.662 +                if len(subblock1.sequence_indexes_a)==1:
   4.663 +                    subblock1._initialization_of_conserved_positions_for_single_sequences()
   4.664 +                else:
   4.665 +                    subblock1.detect_conserved_positions(subblock1.detect_gapped_positions())
   4.666 +                #print "mark 4"
   4.667 +                if len(subblock2.sequence_indexes_a)==1:
   4.668 +                    subblock2._initialization_of_conserved_positions_for_single_sequences()
   4.669 +                else:
   4.670 +                    #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" subblock2.__dict__ = "+str(subblock2.__dict__)
   4.671 +                    subblock2.detect_conserved_positions(subblock2.detect_gapped_positions())
   4.672 +                if self.check_significance(subblock1.conserved_positions,subblock2.conserved_positions,1):
   4.673 +                    pass
   4.674 +                else:
   4.675 +                    flag=0
   4.676 +                    break
   4.677 +        if flag==1:
   4.678 +            #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" SUCCESS"
   4.679 +            return True
   4.680 +        else:
   4.681 +            #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" FAILURE"
   4.682 +            return False
   4.683 +
   4.684 +
   4.685 +    def _initialization_of_conserved_positions_for_single_sequences(self):
   4.686 +        """
   4.687 +        A function for internal usage only
   4.688 +        """
   4.689 +
   4.690 +        self.conserved_positions=[]
   4.691 +        for i in self.position_indexes_a:
   4.692 +            if self.alignment.get_aa(0,i)!=None:
   4.693 +                self.conserved_positions.append((i,self.alignment.get_aa(0,i),1.0))
   4.694 +
   4.695 +    def get_vertical_block(self):
   4.696 +        """
   4.697 +        alignable_regions       # sorted list of tuples:
   4.698 +                                             (from, to,from_index_in_conserved,to_index_in_conserved)
   4.699 +                                             from, to are indexes of positions, positions must be conserved
   4.700 +                                             from_index_conserved, to_index_conserved are
   4.701 +                                             indexes in self.conserved_positions
   4.702 +        returns next alignable region
   4.703 +
   4.704 +        >>> import block
   4.705 +        >>> import alignment
   4.706 +        >>> test_alignment=alignment.Alignment([])
   4.707 +        >>> test_alignment.get_from_file("test_3.msf")
   4.708 +        >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a)
   4.709 +        >>> block1.create_rooted_tree()
   4.710 +        >>> block1.rooted_tree.calculate_root_subtree_sum_of_lengthes()
   4.711 +        >>> block1.get_vertical_block()
   4.712 +        """
   4.713 +        import copy
   4.714 +        self.detect_conserved_positions(self.detect_gapped_positions())
   4.715 +        self.detect_alignable_regions()
   4.716 +        #print "self.alignable_regions = "+str(self.alignable_regions)
   4.717 +        for i in self.alignable_regions:
   4.718 +            if len(i)>0:
   4.719 +                position_indexes_of_current_subblock=[]
   4.720 +                for j in self.position_indexes_a[self.position_indexes_a.index(i[0][0]):self.position_indexes_a.index(i[0][1])+1]:
   4.721 +                    position_indexes_of_current_subblock.append(j)
   4.722 +                current_subblock=Block(self.alignment, self._leaves_to_indexes_of_sequences(), position_indexes_of_current_subblock)
   4.723 +                current_subblock.rooted_tree=copy.deepcopy(self.rooted_tree)#this is a plug
   4.724 +                #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" current_subblock.__dict__ = "+str(current_subblock.__dict__)
   4.725 +                if current_subblock.traverse_block_splits():
   4.726 +                    current_subblock.state="aligned"
   4.727 +                    return current_subblock
   4.728 +        return None
   4.729 +
   4.730 +
   4.731 +    def detect_alignable_regions(self):
   4.732 +        """
   4.733 +        Should be run after detect_gapped_positions and detect_conserved_positions
   4.734 +        Creates:
   4.735 +            self.alignable_regions=[(from,to), (from1,to1), (from2, to2)]
   4.736 +        """
   4.737 +        #!!! SHOULD CONSIDER THE CASES, WHEN THERE ARE NO GAPS OR NO CONSERVED POSITIONS !!!
   4.738 +        self.alignable_regions=[]
   4.739 +        #first special iteration
   4.740 +        left_margin=self.position_indexes_a[0]-1
   4.741 +        if len(self.gapped_positions) == 0:
   4.742 +            right_margin=self.position_indexes_a[len(self.position_indexes_a)-1]+1
   4.743 +            conserved_positions_on_current_interval=[]
   4.744 +            for j in self.conserved_positions:
   4.745 +                if j[0]>left_margin and j[0]<right_margin:
   4.746 +                    conserved_positions_on_current_interval.append(j[0])
   4.747 +            self.alignable_regions.append(self._find_pairs(conserved_positions_on_current_interval))
   4.748 +        else:
   4.749 +            right_margin=self.gapped_positions[0]
   4.750 +            conserved_positions_on_current_interval=[]
   4.751 +            for j in self.conserved_positions:
   4.752 +                if j[0]>left_margin and j[0]<right_margin:
   4.753 +                    conserved_positions_on_current_interval.append(j[0])
   4.754 +            self.alignable_regions.append(self._find_pairs(conserved_positions_on_current_interval))
   4.755 +            #normal iterations
   4.756 +            for i in range(1,len(self.gapped_positions)-1):
   4.757 +                left_margin=self.gapped_positions[i]
   4.758 +                right_margin=self.gapped_positions[i+1]
   4.759 +                conserved_positions_on_current_interval=[]
   4.760 +                for j in self.conserved_positions:
   4.761 +                    if j[0]>left_margin and j[0]<right_margin:
   4.762 +                        conserved_positions_on_current_interval.append(j[0])
   4.763 +                self.alignable_regions.append(self._find_pairs(conserved_positions_on_current_interval))
   4.764 +            #last special iteration
   4.765 +            conserved_positions_on_current_interval=[]
   4.766 +            left_margin=self.gapped_positions[-1]
   4.767 +            right_margin=self.position_indexes_a[-1]+1
   4.768 +            for j in self.conserved_positions:
   4.769 +                if j[0]>left_margin and j[0]<right_margin:
   4.770 +                    conserved_positions_on_current_interval.append(j[0])
   4.771 +            self.alignable_regions.append(self._find_pairs(conserved_positions_on_current_interval))
   4.772 +
   4.773 +
   4.774 +    def _find_pairs(self, conserved_positions_on_current_interval):
   4.775 +        """
   4.776 +        Internal method
   4.777 +        The output is sorted so, that the length of the output element decreases
   4.778 +
   4.779 +        >>> import block
   4.780 +        >>> import alignment
   4.781 +        >>> test_alignment=alignment.Alignment([])
   4.782 +        >>> test_alignment.get_from_file("test_3.msf")
   4.783 +        >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a)
   4.784 +        >>> block1.create_rooted_tree()
   4.785 +        >>> block1.rooted_tree.calculate_root_subtree_sum_of_lengthes()
   4.786 +        >>> block1.detect_conserved_positions(block1.detect_gapped_positions())
   4.787 +        >>> block1._find_pairs([4,5,20,22])
   4.788 +        [(4, 22), (5, 22), (5, 20)]
   4.789 +        """
   4.790 +        output=[]
   4.791 +        if len(conserved_positions_on_current_interval)>1:
   4.792 +            flag=0
   4.793 +            i=0
   4.794 +            j=(len(conserved_positions_on_current_interval))-1
   4.795 +            while (i<j):
   4.796 +                output.append((conserved_positions_on_current_interval[i],conserved_positions_on_current_interval[j]))
   4.797 +                length1=0
   4.798 +                length2=0
   4.799 +                for a in range(self.position_indexes_a.index(conserved_positions_on_current_interval[i+1]), self.position_indexes_a.index(conserved_positions_on_current_interval[j])+1):
   4.800 +                    length1+=1
   4.801 +                for a in range(self.position_indexes_a.index(conserved_positions_on_current_interval[i]), self.position_indexes_a.index(conserved_positions_on_current_interval[j-1])+1):
   4.802 +                    length2+=1
   4.803 +                if length1>length2: i+=1
   4.804 +                else: j-=1
   4.805 +        return output
   4.806 +
   4.807 +    def downsize(self,aligned_block):
   4.808 +        """
   4.809 +        Should be written
   4.810 +        """
   4.811 +        pass
   4.812 +    def upsize(self,aligned_block):
   4.813 +        """
   4.814 +        Once tested
   4.815 +
   4.816 +        >>> import alignment
   4.817 +        >>> import block
   4.818 +        >>> test_alignment = alignment.Alignment([])
   4.819 +        >>> test_alignment.get_from_file("test_3.msf")
   4.820 +        >>> block1=block.Block(test_alignment, [2,3,4,5], [14,15,16,17,18,19])
   4.821 +        >>> aligned_block = block.Block(test_alignment, [2,3,4], [14,15,16,17,18])
   4.822 +        >>> aligned_block.create_rooted_tree()
   4.823 +        >>> aligned_block.rooted_tree.calculate_root_subtree_sum_of_lengthes()
   4.824 +        >>> aligned_block.detect_conserved_positions(aligned_block.detect_gapped_positions())
   4.825 +        >>> output_block=block1.upsize(aligned_block)
   4.826 +        >>> print output_block.__dict__
   4.827 +        """
   4.828 +        added_sequences=[]
   4.829 +        for i in self.sequence_indexes_a:
   4.830 +            if i in aligned_block.sequence_indexes_a:
   4.831 +                pass
   4.832 +            else:
   4.833 +                flag=1;
   4.834 +                for j in range(aligned_block.position_indexes_a[0], aligned_block.position_indexes_a[-1]+1):
   4.835 +                    if j in self.position_indexes_a:
   4.836 +                        if j in aligned_block.position_indexes_a:
   4.837 +                            if self.alignment.get_aa(i, j)==None:
   4.838 +                               flag=0;
   4.839 +                        else:
   4.840 +                            if self.alignment.get_aa(i, j)!=None:
   4.841 +                               flag=0;
   4.842 +                if flag==1:
   4.843 +                    one_sequence_block=Block(self.alignment, [i], aligned_block.position_indexes_a)
   4.844 +                    one_sequence_block._initialization_of_conserved_positions_for_single_sequences()
   4.845 +                    united_block_sequence_indexes_a=aligned_block.sequence_indexes_a[:]
   4.846 +                    united_block_sequence_indexes_a.append(i)
   4.847 +                    united_block_sequence_indexes_a.sort()
   4.848 +                    united_block=Block(self.alignment, united_block_sequence_indexes_a, aligned_block.position_indexes_a)
   4.849 +                    united_block.create_rooted_tree("semiexternal")
   4.850 +                    united_block.rooted_tree.calculate_root_subtree_sum_of_lengthes()
   4.851 +                    united_block.detect_conserved_positions(self.detect_gapped_positions())
   4.852 +                    if united_block.check_significance(one_sequence_block.conserved_positions, aligned_block.conserved_positions,1):
   4.853 +                        added_sequences.append(i);
   4.854 +        output_block_sequence_indexes_a=aligned_block.sequence_indexes_a[:]
   4.855 +        output_block_sequence_indexes_a.extend(added_sequences)
   4.856 +        output_block=Block(self.alignment, output_block_sequence_indexes_a, aligned_block.position_indexes_a)
   4.857 +        #!!!Bugfix: aligned_block positions_indexes_a should be changed to something, probably t the span between indexes of the first and last found conserved positions!!!
   4.858 +        #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" output_block.__dict__ = "+str(output_block.__dict__)
   4.859 +        output_block.create_rooted_tree("semiexternal")
   4.860 +        output_block.rooted_tree.calculate_root_subtree_sum_of_lengthes()
   4.861 +        output_block.detect_conserved_positions(output_block.detect_gapped_positions())
   4.862 +        output_block.state="aligned"
   4.863 +        return output_block
   4.864 +
   4.865 +
   4.866 +    def show(self):
   4.867 +        """
   4.868 +        Should be designed
   4.869 +        """
   4.870 +
   4.871 +    def _leaves_to_indexes_of_sequences(self):
   4.872 +        """
   4.873 +        This is an internal method
   4.874 +        """
   4.875 +        indexes_of_sequences=[]
   4.876 +        for i in self.rooted_tree.leaves:
   4.877 +           indexes_of_sequences.append(int(i.content[0])-1)
   4.878 +        indexes_of_sequences.sort()
   4.879 +        return indexes_of_sequences
   4.880 +
   4.881 +    def get_aligned_block(self):
   4.882 +        """
   4.883 +        Author: AAl (Beware, I've changed some parts of the method, even including the interface - Burkov)
   4.884 +        State:
   4.885 +
   4.886 +        Returns None or aligned block and several new blocks in stack.
   4.887 +        If None, the state of current block is set "no_alignment"
   4.888 +
   4.889 +        >>> import alignment
   4.890 +        >>> import block
   4.891 +        >>> test_alignment=alignment.Alignment([])
   4.892 +        >>> test_alignment.get_from_file("test_3.msf")
   4.893 +        >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a)
   4.894 +        >>> block1.get_aligned_block()
   4.895 +        """
   4.896 +# Create tree
   4.897 +        if self.create_rooted_tree("semiexternal") == False:
   4.898 +            print "Content-type: text/plain\n"
   4.899 +            print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" Tree construction failed!!!"
   4.900 +            sys.exit()
   4.901 +        else:
   4.902 +            #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" self.__dict__ = "+str(self.__dict__)
   4.903 +            self.rooted_tree.calculate_root_subtree_sum_of_lengthes()
   4.904 +            candidate_nodes=[self.rooted_tree.root]
   4.905 +            while len(candidate_nodes) > 0:
   4.906 +    # Get current_node and new candidate_nodes
   4.907 +                current_node, candidate_nodes = self.rooted_tree.next_node(candidate_nodes)
   4.908 +    # Define leaves_indexes for current_node
   4.909 +                set_of_leaves=self.rooted_tree.node_to_set_of_leaves(current_node)
   4.910 +                leaves_indexes=[]
   4.911 +                for leaf in set_of_leaves:
   4.912 +                    leaves_indexes.append(int(leaf.content[0])-1)
   4.913 +                leaves_indexes.sort()
   4.914 +
   4.915 +                if len(set_of_leaves) >= 2:    # 2=minimal number of sequences in aligned block
   4.916 +    # Define horizontal_block
   4.917 +                    horizontal_block=Block(self.alignment, leaves_indexes, self.position_indexes_a )
   4.918 +                    #for a in self.rooted_tree.depth_first_traversal():
   4.919 +                    #    print "Original rooted tree::nesting depth = "+str(a[1])+", parental_edge = "+str(a[0].parental_edge)+", content = "+str(a[0].content)
   4.920 +                    if horizontal_block.position_indexes_a!=[]: horizontal_block.rooted_tree=self.rooted_tree.chop_the_tree(set_of_leaves)
   4.921 +                    else: continue
   4.922 +    # Get vertical block in horizontal block
   4.923 +                    #print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" horizontal_block.__dict__ = "+str(horizontal_block.__dict__)
   4.924 +                    aligned_block=horizontal_block.get_vertical_block()
   4.925 +                    if aligned_block <> None:
   4.926 +
   4.927 +    # Refine aligned_block
   4.928 +                        #self.upsize(aligned_block)
   4.929 +    # Reorganize blocks appropriately
   4.930 +
   4.931 +                        return aligned_block
   4.932 +            self.state = "no_alignment"
   4.933 +            return None
   4.934 +
   4.935 +
   4.936 +
   4.937 +    def find_complementary_blocks(self, aligned_block_position_indexes_a, aligned_block_sequence_indexes_a):
   4.938 +        """
   4.939 +        Author: Burkov
   4.940 +        State: in progress
   4.941 +
   4.942 +        Given the aligned_block, this method returns 0, 1, 2, 3, 4, 5 or 6 nonempty blocks,
   4.943 +        which compose the "self" block along with the aligned_block
   4.944 +
   4.945 +        >>> import alignment
   4.946 +        >>> import block
   4.947 +        >>> test_alignment=alignment.Alignment([])
   4.948 +        >>> test_alignment.get_from_file("test_3.msf")
   4.949 +        >>> block1=block.Block(test_alignment, test_alignment.sequence_indexes_s, test_alignment.position_indexes_a)
   4.950 +        >>> block2=block1.get_aligned_block()
   4.951 +        >>> block1.find_complementary_blocks(block2.position_indexes_a, block2.sequence_indexes_a)
   4.952 +        """
   4.953 +        #A problem: we will never detect the unalignable regions
   4.954 +        #(SHOULD I REMEMBER THEM, WHEN I DELETE_EMPTY_POSITIONS AND RETURN THEM, WHEN I FIND THAT THE BLOCK IS ALIGNED?)
   4.955 +
   4.956 +        output=[]
   4.957 +        import copy
   4.958 +        left_neighbor_position_indexes_a=[]
   4.959 +        right_neighbor_position_indexes_a=[]
   4.960 +        for i in self.position_indexes_a:
   4.961 +            if i<aligned_block_position_indexes_a[0]:
   4.962 +                left_neighbor_position_indexes_a.append(i)
   4.963 +            elif i>aligned_block_position_indexes_a[len(aligned_block_position_indexes_a)-1]:
   4.964 +                right_neighbor_position_indexes_a.append(i)
   4.965 +        vertical_neighbor_sequence_indexes_a=copy.deepcopy(self.sequence_indexes_a)
   4.966 +        for i in aligned_block_sequence_indexes_a:
   4.967 +            vertical_neighbor_sequence_indexes_a.remove(i)
   4.968 +        if left_neighbor_position_indexes_a != []:
   4.969 +            left_neighbor=Block(self.alignment, self.sequence_indexes_a, left_neighbor_position_indexes_a)
   4.970 +            left_neighbor.state="pending"
   4.971 +            if left_neighbor.position_indexes_a!=[]: output.append(left_neighbor)
   4.972 +            if left_neighbor.block_of_gaps!=None: output.append(left_neighbor.block_of_gaps)
   4.973 +        if vertical_neighbor_sequence_indexes_a != [] :#if no sequences or 1 sequence
   4.974 +            #remember to include the gapped positions into the vertical neighbor
   4.975 +            list_of_positions_in_vertical_neighbor=[]
   4.976 +            for i in range(aligned_block_position_indexes_a[0], aligned_block_position_indexes_a[-1]+1):
   4.977 +                list_of_positions_in_vertical_neighbor.append(i)
   4.978 +            vertical_neighbor=Block(self.alignment, vertical_neighbor_sequence_indexes_a, list_of_positions_in_vertical_neighbor)
   4.979 +            if len(vertical_neighbor_sequence_indexes_a) != 1: vertical_neighbor.state="pending"
   4.980 +            else: vertical_neighbor.state="no_alignment"
   4.981 +            if vertical_neighbor.position_indexes_a!=[]: output.append(vertical_neighbor)
   4.982 +            if vertical_neighbor.block_of_gaps!=None: output.append(vertical_neighbor.block_of_gaps)
   4.983 +        if right_neighbor_position_indexes_a != []:
   4.984 +            right_neighbor=Block(self.alignment, self.sequence_indexes_a, right_neighbor_position_indexes_a)
   4.985 +            right_neighbor.state="pending"
   4.986 +            if right_neighbor.position_indexes_a!=[]: output.append(right_neighbor)
   4.987 +            if right_neighbor.block_of_gaps!=None: output.append(right_neighbor.block_of_gaps)
   4.988 +        return output
   4.989 +
   4.990 +
   4.991 +###### OPTIONAL
   4.992 +    def get_ss(self):
   4.993 +        """
   4.994 +        Construct self.consensus_ss
   4.995 +        """
   4.996 +    def get_important_aa(self):
   4.997 +
   4.998 +        """
   4.999 +        Construct self.important_aas
  4.1000 +        """
  4.1001 +    def get_subsets_from_unrooted_tree(self):
  4.1002 +        """
  4.1003 +        - Creats tree object
  4.1004 +        - Convert tree to list subsets of subset objects
  4.1005 +        - Sort subsets
  4.1006 +        Result is a list "subsets"
  4.1007 +        """
  4.1008 +        subsets=[]
  4.1009 +        return subsets
  4.1010 +
  4.1011 +
  4.1012 +
  4.1013 +
  4.1014 +###############################################################################################################
  4.1015 +
  4.1016 +if __name__ == "__main__":
  4.1017 +    print "Hello!"
  4.1018 +    import doctest
  4.1019 +    doctest.testmod()
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/sbbs_2009/modules/blocks.py	Sat Jul 09 16:30:28 2011 +0400
     5.3 @@ -0,0 +1,43 @@
     5.4 +import rooted_tree
     5.5 +
     5.6 +class Blocks(object):
     5.7 +    """
     5.8 +    Author: AAl
     5.9 +    Status: designed
    5.10 +
    5.11 +    Data:
    5.12 +        list_of_blocks  is a list of all Block objects of an alignment
    5.13 +            a block is identified by its index in the list_of_blocks
    5.14 +        tree_of_blocks  is a rooted_tree object for all blocks of an alignment
    5.15 +            tree_of_blocks.content = [block,...]
    5.16 +        -stack_of_blocks is a list of undelivered blocks
    5.17 +           the list is delivered as stack (I've killed it - no need in it - (Burkov))
    5.18 +        aligned_blocks is a list of aligned blocks
    5.19 +
    5.20 +    Methods
    5.21 +        self.show_blocks
    5.22 +        self.add_aligned block
    5.23 +    """
    5.24 +
    5.25 +    def __init__(self,alignment=None,list_of_blocks=None,aligned_blocks=None,tree_of_blocks=None):
    5.26 +        self.alignment=alignment
    5.27 +        self.list_of_blocks=list_of_blocks
    5.28 +        self.tree_of_blocks=tree_of_blocks
    5.29 +        self.aligned_blocks=aligned_blocks
    5.30 +
    5.31 +        if self.list_of_blocks==None:
    5.32 +            self.list_of_blocks=[]
    5.33 +        if self.tree_of_blocks==None:
    5.34 +            self.tree_of_blocks=rooted_tree.Rooted_tree()
    5.35 +        if self.aligned_blocks==None:
    5.36 +            self.aligned_blocks=[]
    5.37 +
    5.38 +
    5.39 +    def add_aligned_block(self, parent_block, aligned_block):
    5.40 +        """
    5.41 +        Should be written
    5.42 +                    self.add_aligned_block(aligned_block)
    5.43 +
    5.44 +        """
    5.45 +
    5.46 +
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/sbbs_2009/modules/blocks_io.py	Sat Jul 09 16:30:28 2011 +0400
     6.3 @@ -0,0 +1,279 @@
     6.4 +#! /usr/bin/python
     6.5 +
     6.6 +import configure
     6.7 +import block
     6.8 +import blocks
     6.9 +import alignment
    6.10 +import pickle
    6.11 +
    6.12 +def tree_of_blocks_to_plain_file(tree_of_blocks, file_name, input_file_name):
    6.13 +    """
    6.14 +    """
    6.15 +    blocks=tree_of_blocks.depth_first_traversal()
    6.16 +    file_content="Alignment_id\tblock_#\tsequence_name\tpositions_indexes\n"
    6.17 +    for i, i_ in enumerate(blocks):
    6.18 +        if i_[0].content[0].state=="aligned":
    6.19 +            for j in i_[0].content[0].sequence_indexes_a:
    6.20 +                file_content+=str(input_file_name)+"\t"+str(i)+"\t"+str(i_[0].content[0].alignment.sequences[j].name)+"\t"
    6.21 +                buffer=""
    6.22 +                for k in i_[0].content[0].position_indexes_a:
    6.23 +                    buffer+=str(k)+" "
    6.24 +                if (len(buffer)!=0): buffer=buffer[:-1]
    6.25 +                file_content+=buffer+"\n"
    6.26 +    file=open(file_name,"w")
    6.27 +    file.write(file_content)
    6.28 +    file.close()
    6.29 +
    6.30 +
    6.31 +def tree_of_blocks_to_html(input_alignment_file_name, tree_of_blocks, alignment, main_file_name="main.html", directry_file_name="directry.html", blocks_file_name="blocks.html"):
    6.32 +    """
    6.33 +    This function shall create 3 html documents - frameholder and 2 frames:
    6.34 +    directry.html - contains tree of blocks
    6.35 +    blocks.html - contains tables of alignment with marked blocks and conserved positions
    6.36 +    """
    6.37 +    #main frame creation
    6.38 +    blocks=tree_of_blocks.depth_first_traversal()
    6.39 +    main_file=open(main_file_name,"w")
    6.40 +    main_content=""
    6.41 +    main_content+="<Html>\n<Head>\n<Title>Blocks found in alignment "+str(input_alignment_file_name)+"</Title>\n</Head>\n"
    6.42 +    main_content+="<script>\nvar body=new Array();\n"
    6.43 +    for i, i_ in enumerate(alignment.sequence_indexes_s):
    6.44 +        main_content+="body["+str(i)+"] = new Array("
    6.45 +        buffer=""
    6.46 +        for j, j_ in enumerate(alignment.body[i_]):
    6.47 +            if alignment.get_aa(i_,j)!=None:
    6.48 +                buffer+="\""+str(alignment.get_aa(i_,j)).upper()+"\","
    6.49 +            else:
    6.50 +                buffer+="\"-\","
    6.51 +        if len(buffer)!=0: buffer=buffer[:-1]
    6.52 +        main_content+=buffer
    6.53 +        main_content+=");\n"
    6.54 +    main_content+="var sequences_names = new Array("
    6.55 +    buffer=""
    6.56 +    for i in alignment.sequences:
    6.57 +        buffer+="\""+str(i.name)+"\","
    6.58 +    if len(buffer)!=0: buffer=buffer[:-1]
    6.59 +    main_content+=buffer
    6.60 +    main_content+=");\n"
    6.61 +
    6.62 +    main_content+="var groups_names = new Array("
    6.63 +    buffer=""
    6.64 +    for i, i_ in configure.groups_of_aa_1.items():
    6.65 +        buffer+="\""+str(i)+"\","
    6.66 +    if (len(buffer)!=0): buffer=buffer[:-1]
    6.67 +    main_content+=buffer
    6.68 +    main_content+=");\n"
    6.69 +
    6.70 +    main_content+="var groups_content = new Array();\n"
    6.71 +    counter=0
    6.72 +    for i, i_ in configure.groups_of_aa_1.items():
    6.73 +        main_content+="groups_content["+str(counter)+"] = new Array("
    6.74 +        buffer=""
    6.75 +        for j in range(len(i_[0])):
    6.76 +            buffer+="\""+str(i_[0][j:j+1])+"\","
    6.77 +        if (len(buffer)!=0): buffer=buffer[:-1]
    6.78 +        main_content+=buffer
    6.79 +        main_content+=");\n"
    6.80 +        counter+=1
    6.81 +
    6.82 +    main_content+="</script>\n"
    6.83 +    main_content+="    <Frameset cols=\"200,*\">\n        <Frame src=\""+str(input_alignment_file_name)+".directry.html\" name=\"directry\">\n        <Frame src=\""+str(input_alignment_file_name)+".blocks.html\" name=\"blocks\">\n    </Frameset>\n</Html>"
    6.84 +    main_file.write(main_content)
    6.85 +    main_file.close()
    6.86 +
    6.87 +    #directry frame creation
    6.88 +    directry_content=""
    6.89 +    directry_content+="<Html>\n<Head>\n<Title>Tree of blocks</Title>\n</Head>\n<Body bgcolor=\"#FFFFFF\">\n<script>\n"
    6.90 +    #function, that checks, whether current aminoacid in conserved position is in conserved group or not
    6.91 +    directry_content+="function check_conservation(block_groups_in_conserved_positions, groups_names, groups_content, block_conserved_position, body_element){\n"
    6.92 +    directry_content+="    var i;\n    var j;\n"
    6.93 +    directry_content+="    for (i=0;i<groups_names.length;i++){\n"
    6.94 +    directry_content+="        if (groups_names[i]==block_groups_in_conserved_positions[block_conserved_position]){\n"
    6.95 +    directry_content+="            var flag=0;\n"
    6.96 +    directry_content+="            for (j=0;j<groups_content[i].length;j++){\n"
    6.97 +    directry_content+="                if (body_element==groups_content[i][j]){\n"
    6.98 +    directry_content+="                    flag=1;break;\n"
    6.99 +    directry_content+="                }\n"
   6.100 +    directry_content+="            }\n"
   6.101 +    directry_content+="            if (flag==1) {return true;}\n"
   6.102 +    directry_content+="            else {return false;}\n"
   6.103 +    directry_content+="        }\n"
   6.104 +    directry_content+="    }\n"
   6.105 +    directry_content+="}\n"
   6.106 +    #block-drawing function
   6.107 +    directry_content+="function draw_a_block(block_sequences, block_positions, block_parental_block_remaining_sequences, block_parental_block_positions, block_state, block_conserved_positions, block_groups_in_conserved_positions, block_name){\n"
   6.108 +    directry_content+="    parent.blocks.document.getElementsByTagName('P')[2].innerHTML=\"block_state = \"+block_state;\n"
   6.109 +    directry_content+="    parent.blocks.document.getElementsByTagName('P')[1].innerHTML=block_name;\n"
   6.110 +    directry_content+="    var i;\n    var j;\n    var k;\n    var l;\n"
   6.111 +    directry_content+="    for (i=1;i<parent.body.length+1;i++){\n"
   6.112 +    directry_content+="        for (j=1;j<parent.body[0].length+1;j++){\n"
   6.113 +    directry_content+="            parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#FFFFFF';\n"
   6.114 +    directry_content+="        }\n"
   6.115 +    directry_content+="    }\n"
   6.116 +    directry_content+="    for (i=1;i<block_sequences.length+1;i++){\n"
   6.117 +    directry_content+="        parent.blocks.document.getElementById(\"alignment\").rows[i].cells[0].innerText=parent.sequences_names[block_sequences[i-1]];\n"
   6.118 +    directry_content+="        for (j=1;j<parent.body[0].length+1;j++){\n"
   6.119 +    directry_content+="            parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].innerText=parent.body[block_sequences[i-1]][j-1];\n"
   6.120 +    directry_content+="            var flag=0;\n"
   6.121 +    directry_content+="            for (k=0;k<block_positions.length;k++){\n"
   6.122 +    directry_content+="                if (block_positions[k]==j-1) {flag=1;break;}\n"
   6.123 +    directry_content+="            }\n"
   6.124 +    directry_content+="            if (flag==1){\n"
   6.125 +    directry_content+="                if (block_state==\"aligned\"){\n"
   6.126 +    directry_content+="                    var innerflag=0;\n"
   6.127 +    directry_content+="                    for (k=0;k<block_conserved_positions.length;k++){\n"
   6.128 +    directry_content+="                        if (block_conserved_positions[k]==j-1){innerflag=1;break;}\n"
   6.129 +    directry_content+="                        parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#AAAAFF';\n"
   6.130 +    directry_content+="                    }\n"
   6.131 +    directry_content+="                    if (innerflag==1){\n"
   6.132 +    directry_content+="                        if (check_conservation(block_groups_in_conserved_positions, parent.groups_names, parent.groups_content, k, parent.body[block_sequences[i-1]][j-1])==true){parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#7777FF';}\n"
   6.133 +    directry_content+="                        else{\n"
   6.134 +    directry_content+="                            parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#AAAAFF';\n"
   6.135 +    directry_content+="                        }\n"
   6.136 +    directry_content+="                    }\n"
   6.137 +    directry_content+="                    else {parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#AAAAFF';}\n"
   6.138 +    directry_content+="                }\n"
   6.139 +    directry_content+="                else{parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#AAAAFF';}\n"
   6.140 +    directry_content+="            }\n"
   6.141 +    directry_content+="            else{\n"
   6.142 +    directry_content+="                for (l=0;l<block_parental_block_positions.length;l++){\n"
   6.143 +    directry_content+="                    if (block_parental_block_positions[l]==j-1){parent.blocks.document.getElementById(\"alignment\").rows[i].cells[j].bgColor='#AAFFAA';break;}\n"
   6.144 +    directry_content+="                }\n"
   6.145 +    directry_content+="            }\n"
   6.146 +    directry_content+="        }\n"
   6.147 +    directry_content+="    }\n"
   6.148 +    directry_content+="    var counter=block_sequences.length+1;\n"
   6.149 +    directry_content+="    for (i=1;i<block_parental_block_remaining_sequences.length+1;i++){\n"
   6.150 +    directry_content+="        parent.blocks.document.getElementById(\"alignment\").rows[counter].cells[0].innerText=parent.sequences_names[block_parental_block_remaining_sequences[i-1]];\n"
   6.151 +    directry_content+="        for (j=1;j<parent.body[0].length+1;j++){\n"
   6.152 +    directry_content+="            parent.blocks.document.getElementById(\"alignment\").rows[counter].cells[j].innerText=parent.body[block_parental_block_remaining_sequences[i-1]][j-1];\n"
   6.153 +    directry_content+="            var flag=0;\n"
   6.154 +    directry_content+="            for (k=0;k<block_parental_block_positions.length;k++){\n"
   6.155 +    directry_content+="                if (block_parental_block_positions[k]==j-1) {flag=1;break;}\n"
   6.156 +    directry_content+="            }\n"
   6.157 +    directry_content+="            if (flag==1){\n"
   6.158 +    directry_content+="                parent.blocks.document.getElementById(\"alignment\").rows[counter].cells[j].bgColor='#AAFFAA';\n"
   6.159 +    directry_content+="            }\n"
   6.160 +    directry_content+="        }\n"
   6.161 +    directry_content+="        counter++;\n"
   6.162 +    directry_content+="    }\n"
   6.163 +    directry_content+="    for (i=1;i<parent.body.length+1;i++){\n"
   6.164 +    directry_content+="        var flag=0;\n"
   6.165 +    directry_content+="        for (j=1;j<block_sequences.length+1;j++){\n"
   6.166 +    directry_content+="            if (i-1==block_sequences[j-1]){flag=1;break;}\n"
   6.167 +    directry_content+="        }\n"
   6.168 +    directry_content+="        for (j=0;j<block_parental_block_remaining_sequences.length;j++){\n"
   6.169 +    directry_content+="            if (i-1==block_parental_block_remaining_sequences[j]){flag=1;break;}\n"
   6.170 +    directry_content+="        }\n"
   6.171 +    directry_content+="        if (flag==0) {\n"
   6.172 +    directry_content+="            parent.blocks.document.getElementById(\"alignment\").rows[counter].cells[0].innerText=parent.sequences_names[i-1];\n"
   6.173 +    directry_content+="            for (j=1;j<parent.body[0].length+1;j++){\n"
   6.174 +    directry_content+="                parent.blocks.document.getElementById(\"alignment\").rows[counter].cells[j].innerText=parent.body[i-1][j-1];\n"
   6.175 +    directry_content+="            }\n"
   6.176 +    directry_content+="        counter++;\n"
   6.177 +    directry_content+="        }\n"
   6.178 +    directry_content+="    }\n"
   6.179 +    directry_content+="}\n"
   6.180 +
   6.181 +    #traverse the tree_of_blocks depth-first
   6.182 +    for i,i_ in enumerate(blocks):
   6.183 +        if (i_[0]==tree_of_blocks.root): continue
   6.184 +        directry_content+="var block"+str(i)+"_sequences=new Array("
   6.185 +        buffer=""
   6.186 +        for j in i_[0].content[0].sequence_indexes_a:
   6.187 +            buffer+="\""+str(j)+"\","
   6.188 +        if len(buffer)!=0: buffer=buffer[:-1]
   6.189 +        directry_content+=buffer
   6.190 +        directry_content+=");\n"
   6.191 +
   6.192 +        directry_content+="var block"+str(i)+"_positions=new Array("
   6.193 +        buffer=""
   6.194 +        for j in i_[0].content[0].position_indexes_a:
   6.195 +            buffer+="\""+str(j)+"\","
   6.196 +        if len(buffer)!=0: buffer=buffer[:-1]
   6.197 +        directry_content+=buffer
   6.198 +        directry_content+=");\n"
   6.199 +
   6.200 +        directry_content+="var block"+str(i)+"_conserved_positions=new Array("
   6.201 +        buffer=""
   6.202 +        if i_[0].content[0].state=="aligned":
   6.203 +            for j in i_[0].content[0].conserved_positions:
   6.204 +                buffer+="\""+str(j[0])+"\","
   6.205 +            if len(buffer)!=0: buffer=buffer[:-1]
   6.206 +            directry_content+=buffer
   6.207 +        directry_content+=");\n"
   6.208 +
   6.209 +        directry_content+="var block"+str(i)+"_groups_in_conserved_positions=new Array("
   6.210 +        buffer=""
   6.211 +        if i_[0].content[0].state=="aligned":
   6.212 +            for j in i_[0].content[0].conserved_positions:
   6.213 +                buffer+="\""+str(j[1])+"\","
   6.214 +            if len(buffer)!=0: buffer=buffer[:-1]
   6.215 +            directry_content+=buffer
   6.216 +        directry_content+=");\n"
   6.217 +
   6.218 +        directry_content+="var block"+str(i)+"_state=\""+str(i_[0].content[0].state)+"\";\n"
   6.219 +
   6.220 +        if i_[0].parental_vertex!=tree_of_blocks.root:
   6.221 +
   6.222 +            directry_content+="var block"+str(i)+"_parental_block_remaining_sequences=new Array("
   6.223 +            buffer=""
   6.224 +            for j in i_[0].parental_vertex.content[0].sequence_indexes_a:
   6.225 +                if (j in i_[0].content[0].sequence_indexes_a) == False : buffer+="\""+str(j)+"\","
   6.226 +            if len(buffer)!=0: buffer=buffer[:-1]
   6.227 +            directry_content+=buffer
   6.228 +            directry_content+=");\n"
   6.229 +
   6.230 +            directry_content+="var block"+str(i)+"_parental_block_positions=new Array("
   6.231 +            buffer=""
   6.232 +            for j in i_[0].parental_vertex.content[0].position_indexes_a:
   6.233 +                buffer+="\""+str(j)+"\","
   6.234 +            if len(buffer)!=0: buffer=buffer[:-1]
   6.235 +            directry_content+=buffer
   6.236 +            directry_content+=");\n"
   6.237 +        else:
   6.238 +            directry_content+="var block"+str(i)+"_parental_block_remaining_sequences=new Array();\n"
   6.239 +            directry_content+="var block"+str(i)+"_parental_block_positions=new Array();\n"
   6.240 +
   6.241 +    directry_content+="</script>\n"
   6.242 +    for i, i_ in enumerate(blocks):
   6.243 +        directry_content+="<A href=\"javascript:void(draw_a_block(block"+str(i)+"_sequences, block"+str(i)+"_positions, block"+str(i)+"_parental_block_remaining_sequences, block"+str(i)+"_parental_block_positions, block"+str(i)+"_state, block"+str(i)+"_conserved_positions, block"+str(i)+"_groups_in_conserved_positions, \'block"+str(i)+"\'))\">"
   6.244 +        for j in range(i_[1]-1):
   6.245 +            directry_content+="&nbsp&nbsp|"
   6.246 +        if i_[1]!=1:directry_content+="-"
   6.247 +        directry_content+="block"+str(i)
   6.248 +        if i_[0].content[0].state=="aligned": directry_content+=" +"
   6.249 +        elif i_[0].content[0].state=="no_alignment": directry_content+=" -"
   6.250 +        directry_content+="</A><Br>\n"
   6.251 +    directry_content+="</Body>\n</Html>"
   6.252 +    directry_file=open(directry_file_name,"w")
   6.253 +    directry_file.write(directry_content)
   6.254 +    directry_file.close()
   6.255 +
   6.256 +    #blocks frame creation
   6.257 +    #create blocks_content
   6.258 +    blocks_content=""
   6.259 +    blocks_content+="<Html>\n<Head>\n<Title>Blocks found in alignment "+str(input_alignment_file_name)+"</Title>\n</Head>\n<Body bgcolor=\"#FFFFFF\">\n"
   6.260 +    blocks_content+="<P> Blue color in tables designates current block, green color designates current block's parental block.</P><Br><Br>\n"
   6.261 +    blocks_content+="<P></P>\n"
   6.262 +    blocks_content+="<P></P>\n"
   6.263 +    blocks_content+="<Table border=\"1\" id=\"alignment\">\n"
   6.264 +    blocks_content+="    <Tr>\n"
   6.265 +    blocks_content+="        <Th>Sequences names</Th>\n"
   6.266 +    for i in alignment.position_indexes_a:
   6.267 +        blocks_content+="        <Th>"+str(int(i)+1)+"</Th>\n"
   6.268 +    blocks_content+="    </Tr>\n"
   6.269 +    for i, i_ in enumerate(alignment.body):
   6.270 +        blocks_content+="    <Tr>\n"
   6.271 +        blocks_content+="        <Td>"+str(alignment.sequences[i].name)+"</Td>\n"
   6.272 +        for j,j_ in enumerate(i_):
   6.273 +            if alignment.get_aa(i,j)!=None:
   6.274 +                blocks_content+="        <Td>"+str(alignment.get_aa(i,j)).upper()+"</Td>\n"
   6.275 +            else:
   6.276 +                blocks_content+="        <Td>-</Td>\n"
   6.277 +        blocks_content+="    </Tr>\n"
   6.278 +    blocks_content+="</Body>\n</Html>"
   6.279 +    blocks_file=open(blocks_file_name, "w")
   6.280 +    blocks_file.write(blocks_content)
   6.281 +    blocks_file.close()
   6.282 +
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/sbbs_2009/modules/configure.py	Sat Jul 09 16:30:28 2011 +0400
     7.3 @@ -0,0 +1,85 @@
     7.4 +#!/usr/bin/python
     7.5 +"""
     7.6 +Author: AAl
     7.7 +Status: Format accepted Feb.6, 2009
     7.8 +
     7.9 +All parameters are contained here.
    7.10 +
    7.11 +To use parameters one need  import configure and use names as follows:
    7.12 +configure.minimal_block_size
    7.13 +
    7.14 +
    7.15 +Vocabulary
    7.16 +    Suffixes
    7.17 +        _a   # Alignment
    7.18 +        _b   # Block
    7.19 +        _s   # Sequence
    7.20 +        _p   # Parent
    7.21 +        _aa           # amino acid residue
    7.22 +        _num          # number of an element; 1, 2, ...  Typically num=index+1
    7.23 +        _ss           # secondary structure
    7.24 +
    7.25 +    Words
    7.26 +        position        # position in alignment, syn. column
    7.27 +        alignment
    7.28 +        index           # 0, 1, ...
    7.29 +        gap_position    # a position containing at least one aa_symbol and at least one gap symbol "-"
    7.30 +        empty_position  # contains gap symbols "-" only
    7.31 +        body            #alignment markup
    7.32 +
    7.33 +
    7.34 +"""
    7.35 +
    7.36 +missing_characters=set([" ",'\t','\n',"0","1","2","3","4","5","6","7","8","9",'\r'])
    7.37 +
    7.38 +gap_symbols=set([".","-","~"])                            # all are immidiatly changed to "-"
    7.39 +
    7.40 +aa_characters=set(["a","c","d","e","f","g","h","i","k","l","m","n","p","q","r","s","t","v","w","y","x"])
    7.41 +
    7.42 +special_sequence_names = set(["aligned", "ss", "sa", "domains", "contact"])  #to be compared with words with lowered letters
    7.43 +
    7.44 +alignment_filename = "my_alignment.fasta"
    7.45 +
    7.46 +minimal_block_width = 3
    7.47 +
    7.48 +# {group_id, (letters, weight, description)}
    7.49 +# Weight is  log_2 of group frequency
    7.50 +
    7.51 +groups_of_aa_1={
    7.52 +                "b":("IVLMFA",None,-1.671163536,None),
    7.53 +                "t":("AGSTC", None,-1.625934282,None),
    7.54 +                "u":("HKR",   None,-2.805912948,None),
    7.55 +                "l":("IVLM",  None,-2.321928095,None),
    7.56 +                "c":("IV",    None,-3.23786383 ,None),
    7.57 +                "a":("FWY",   None,-3.53951953 ,None),
    7.58 +                "d":("DE",    None,-3.095419565,None),
    7.59 +                "e":("KR",    None,-3.13289427 ,None),
    7.60 +                "g":("DN",    None,-3.279283757,None),
    7.61 +                "f":("EQ",    None,-3.395928676,None),
    7.62 +                "i":("ST",    None,-2.805912948,None),
    7.63 +                "h":("AG",    None,-2.756330919,None),
    7.64 +                "A":("A",     None,-3.756330919,None),
    7.65 +                "G":("G",     None,-3.756330919,None),
    7.66 +                "S":("S",     None,-3.625934282,None),
    7.67 +                "T":("T",     None,-4.011587974,None),
    7.68 +                "C":("C",     None,-4.921390165,None),
    7.69 +                "P":("P",     None,-4.321928095,None),
    7.70 +                "N":("N",     None,-4.506352666,None),
    7.71 +                "H":("H",     None,-5.10780329 ,None),
    7.72 +                "K":("K",     None,-3.795859283,None),
    7.73 +                "R":("R",     None,-4.573466862,None),
    7.74 +                "D":("D",     None,-4.083141235,None),
    7.75 +                "E":("E",     None,-4.10780329 ,None),
    7.76 +                "Q":("Q",     None,-4.756330919,None),
    7.77 +                "I":("I",     None,-4.717856771,None),
    7.78 +                "L":("L",     None,-3.717856771,None),
    7.79 +                "V":("V",     None,-3.878321443,None),
    7.80 +                "F":("F",     None,-4.64385619 ,None),
    7.81 +                "W":("W",     None,-6.265344567,None),
    7.82 +                "Y":("Y",     None,-4.921390165,None),
    7.83 +                "M":("M",     None,-5.795859283,None)
    7.84 +                }
    7.85 +
    7.86 +conservation_threshold=0.95
    7.87 +
    7.88 +probability_threshold=1.0/800
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/sbbs_2009/modules/main.py	Sat Jul 09 16:30:28 2011 +0400
     8.3 @@ -0,0 +1,124 @@
     8.4 +#!usr/bin/python
     8.5 +
     8.6 +# Test changes
     8.7 +pass
     8.8 +
     8.9 +import configure
    8.10 +import alignment
    8.11 +import block
    8.12 +import blocks
    8.13 +import rooted_tree
    8.14 +from optparse import OptionParser
    8.15 +import blocks_io
    8.16 +
    8.17 +import time
    8.18 +import inspect
    8.19 +
    8.20 +def _find_next_current_vertex(current_vertex):
    8.21 +    """
    8.22 +    This is an internal routine
    8.23 +    """
    8.24 +    flag=0
    8.25 +    while flag!=1:
    8.26 +        if current_vertex.content[0].state=="in process":
    8.27 +            second_flag=0
    8.28 +            for i in current_vertex.child_vertices:
    8.29 +                if i.content[0].state=="pending":
    8.30 +                    current_vertex=i
    8.31 +                    return current_vertex
    8.32 +                elif i.content[0].state=="in process":
    8.33 +                    current_vertex=i
    8.34 +                    second_flag=1
    8.35 +            if second_flag==0:
    8.36 +                current_vertex.content[0].state="processed"
    8.37 +                current_vertex=current_vertex.parental_vertex
    8.38 +        elif current_vertex.content[0].state=="processed":
    8.39 +            current_vertex=current_vertex.parental_vertex
    8.40 +        elif current_vertex.content[0].state=="no_alignment":
    8.41 +            current_vertex=current_vertex.parental_vertex
    8.42 +        elif current_vertex.content[0].state=="initial":
    8.43 +            return current_vertex
    8.44 +        elif current_vertex.content[0].state=="pending":
    8.45 +            return current_vertex
    8.46 +
    8.47 +#class plug(object):
    8.48 +#    def __init__(self):
    8.49 +#        self.state="initial"
    8.50 +
    8.51 +
    8.52 +
    8.53 +
    8.54 +"""
    8.55 +Borya Nagaev - BNagaev@gmail.com
    8.56 +
    8.57 +"""
    8.58 +"""
    8.59 +The main routine
    8.60 +
    8.61 +Author: Burkov
    8.62 +State: in progress
    8.63 +
    8.64 +This is the main routine
    8.65 +To run the program, type:
    8.66 +
    8.67 +python main.py test_3.msf > log.txt
    8.68 +"""
    8.69 +
    8.70 +#parsing the arguments
    8.71 +parser=OptionParser()
    8.72 +options, args = parser.parse_args()
    8.73 +alignment = alignment.Alignment([])
    8.74 +
    8.75 +print args[0]
    8.76 +
    8.77 +alignment.get_from_file(args[0])
    8.78 +#initialization of blocks and initial_block
    8.79 +blocks=blocks.Blocks(alignment, None, None, None)
    8.80 +initial_block=block.Block(alignment, alignment.sequence_indexes_s, alignment.position_indexes_a)
    8.81 +initial_block.state="pending"
    8.82 +#initialization of rooted_tree of blocks
    8.83 +tree_of_blocks = rooted_tree.Rooted_tree()
    8.84 +tree_of_blocks.root.child_vertices.append(rooted_tree.Vertex([initial_block],tree_of_blocks.root,[],None))
    8.85 +tree_of_blocks.root.content[0]=block.plug()
    8.86 +#there's no need in stack of blocks: to end the process we just need all the children of the initial_block to have status "processed", "aligned" or "no alignment"
    8.87 +#traverse of the rooted_tree
    8.88 +
    8.89 +print ">"+str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" time ="+str(time.time())
    8.90 +#print "time at"+inspect.currentframe().f_back.f_lineno+"  = "+str(time.time())
    8.91 +#all this info is available via "sys" module "_something" internal function or "traceback" module
    8.92 +current_vertex=tree_of_blocks.root.child_vertices[0]
    8.93 +while current_vertex!=tree_of_blocks.root:
    8.94 +    print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" current_vertex.content[0].__dict__ = "+str(current_vertex.content[0].__dict__)
    8.95 +    print ">"+str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" time ="+str(time.time())
    8.96 +    aligned_block=current_vertex.content[0].get_aligned_block()
    8.97 +    if aligned_block != None:
    8.98 +        aligned_block=current_vertex.content[0].upsize(aligned_block)
    8.99 +        print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" aligned_block.__dict__ = "+str(aligned_block.__dict__)
   8.100 +        if aligned_block.block_of_gaps!=None: print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" aligned_block.block_of_gaps.__dict__ = "+str(aligned_block.block_of_gaps.__dict__)
   8.101 +        current_vertex.content[0].state="in process"
   8.102 +        blocks.aligned_blocks.append(aligned_block)
   8.103 +        current_vertex.child_vertices.append(rooted_tree.Vertex([aligned_block],current_vertex,[],None))
   8.104 +        if aligned_block.block_of_gaps!=None: current_vertex.child_vertices.append(rooted_tree.Vertex([aligned_block.block_of_gaps],current_vertex,[],None))
   8.105 +        print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+"before find_complementary_blocks"
   8.106 +        print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" time at mark 2 = "+str(time.time())
   8.107 +        complementary_blocks=current_vertex.content[0].find_complementary_blocks(aligned_block.position_indexes_a, aligned_block.sequence_indexes_a)
   8.108 +        for i in complementary_blocks:
   8.109 +            current_vertex.child_vertices.append(rooted_tree.Vertex([i],current_vertex,[],None))
   8.110 +            print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" i.__dict__ = "+str(i.__dict__)
   8.111 +        current_vertex=_find_next_current_vertex(current_vertex)
   8.112 +    else:
   8.113 +        current_vertex=_find_next_current_vertex(current_vertex)
   8.114 +
   8.115 +for i in blocks.aligned_blocks:
   8.116 +    print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" found_aligned_blocks.__dict__ = "+str(i.__dict__)+"\n\n"
   8.117 +
   8.118 +blocks_for_print=tree_of_blocks.depth_first_traversal()
   8.119 +for i in blocks_for_print:
   8.120 +    print str(inspect.currentframe().f_code.co_filename)+"::line "+str(inspect.currentframe().f_lineno)+" nesting_depth = "+str(i[1])+" current_block's dict = "+str(i[0].content[0].__dict__)
   8.121 +
   8.122 +import pickle
   8.123 +storage=open(str(args[0])+'.pcl','wb')
   8.124 +pickle.dump(tree_of_blocks, storage)
   8.125 +storage.close()
   8.126 +
   8.127 +blocks_io.tree_of_blocks_to_html(args[0], tree_of_blocks, alignment, args[0]+".main.html", args[0]+".directry.html", args[0]+".blocks.html")
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/sbbs_2009/modules/phylip.py	Sat Jul 09 16:30:28 2011 +0400
     9.3 @@ -0,0 +1,275 @@
     9.4 +#!/usr/bin/python
     9.5 +"""
     9.6 +Contains classes and functions for information exchange with the "PHYLIP" package
     9.7 +Classes:
     9.8 +    Phy_matrix: distance matrix
     9.9 +    WrongFileFormat: technical (exception for errors while reading file)
    9.10 +
    9.11 +Functions:
    9.12 +    readmatrix(filename): read file with distance matrix in PHYLIP
    9.13 +        format; return object of Phy_matrix class
    9.14 +
    9.15 +"""
    9.16 +class Phy_matrix(object):
    9.17 +    """
    9.18 +    Distance matrix
    9.19 +
    9.20 +    Author: S.A.S.
    9.21 +    Status:  in progress
    9.22 +
    9.23 +    Data:
    9.24 +        self.number                           # number of objects (int)
    9.25 +        self.names                            # names of objects (tuple of strings)
    9.26 +        self.matrix                           # distances between objects (dictionary with keys = unordered pairs of names
    9.27 +                                              #     and float values)
    9.28 +
    9.29 +    Methods
    9.30 +        __init__(self, names=())              # initializing with zero distances
    9.31 +        __str__(self)                         # output in PHYLIP format
    9.32 +        __mul__(self,m)                       # multiply all distances by a coefficicent m
    9.33 +        __div__(self,d)                       # divide all distances by a divisor d
    9.34 +        max(self)                             # return the maximal value of distance
    9.35 +        hasneg(self)                          # if there is a negative distance (True or False)?
    9.36 +        index(self, name)                     # index of object (1 to self.number)
    9.37 +        name(self, index)                     # name of the object with the given index (1 to self.number)
    9.38 +        distance(self, name1, name2)          # return the element of the matrix (the distance between two objects)
    9.39 +        distance_n(self, index1, index2)      # the same, objects are given by their indices
    9.40 +        setdist(self, name1, name2, d)        # return new Phy_matrix that differs from *self* by the distance between
    9.41 +                                              #    two given objects that is set to d
    9.42 +        setdist_n(self, index1, index2, d)    # the same, objects are given by their indices
    9.43 +        addobject(self, name)                 # return new Phy_matrix that differs from *self* by the added object having
    9.44 +                                              #    the given name, the last possible index and zero distances to all others
    9.45 +        permutate(self, name1, name2)         # return new Phy_matrix with two permutated objects
    9.46 +        permutate_n(self, index1, index2)     # the same, objects are given by their indices
    9.47 +
    9.48 +    Planned method
    9.49 +        wpgma(self)                           # rooted tree from distances (wpgma method)
    9.50 +    """
    9.51 +
    9.52 +    def __init__(self, names=()):
    9.53 +        self.number=len(names)
    9.54 +        self.names=names
    9.55 +        self.matrix=dict()
    9.56 +        for _i in range(1, self.number):
    9.57 +            for _j in range(0, _i):
    9.58 +                if names[_i] == names[_j]:
    9.59 +                    raise "Duplicate name "+names[_i]
    9.60 +                else:
    9.61 +                    self.matrix[frozenset((names[_i], names[_j]))] = 0.0
    9.62 +
    9.63 +    def __str__(self):
    9.64 +        if (self.number) > 99999:
    9.65 +            _result="Too large matrix to print!!!"
    9.66 +        else:
    9.67 +            _result = "%5i" % self.number
    9.68 +            for _i in range(self.number):
    9.69 +                if len(self.names[_i]) < 11:
    9.70 +                    _row = "\n" + (self.names[_i]).ljust(10)
    9.71 +                else:
    9.72 +                    _row = "\n" + "Obj_" + str(_i).ljust(6)
    9.73 +                _counter = 1
    9.74 +                for _j in range(self.number):
    9.75 +                     if _counter == 7:
    9.76 +                         _row = _row + "\n"
    9.77 +                         _counter = 0
    9.78 +                     if _i == _j:
    9.79 +                         _row = _row + "  0.000000"
    9.80 +                     else:
    9.81 +                         _row = _row + "%10.6f" % self.matrix[frozenset((self.names[_i], self.names[_j]))]
    9.82 +                     _counter += 1
    9.83 +                _result = _result + _row
    9.84 +        return _result
    9.85 +
    9.86 +    def __mul__(self, m):
    9.87 +        _result = Phy_matrix(self.names)
    9.88 +        for _pair in self.matrix.iterkeys():
    9.89 +            _result.matrix[_pair] = self.matrix[_pair]*m
    9.90 +        return _result
    9.91 +
    9.92 +    def __div__(self, d):
    9.93 +        _result = Phy_matrix(self.names)
    9.94 +        for _pair in self.matrix.iterkeys():
    9.95 +            _result.matrix[_pair] = self.matrix[_pair]/d
    9.96 +        return _result
    9.97 +
    9.98 +    def max(self):
    9.99 +        return max(self.matrix.values())
   9.100 +
   9.101 +    def hasneg(self):
   9.102 +        _result = False
   9.103 +        for dist in self.matrix.values():
   9.104 +            if dist < 0: _result = True
   9.105 +        return _result
   9.106 +
   9.107 +    def index(self, name):
   9.108 +        _result = 0
   9.109 +        for _i in range(self.number):
   9.110 +            if self.names[_i] == name:
   9.111 +                _result = _i+1
   9.112 +        return _result
   9.113 +
   9.114 +    def name(self, index):
   9.115 +        _result = ""
   9.116 +        if index > self.number:
   9.117 +            print "Content-type=text/plain\n"
   9.118 +            print "Index %i is out of range!\n" % index
   9.119 +        elif index > 0:
   9.120 +            _result = self.names[index-1]
   9.121 +        return _result
   9.122 +
   9.123 +    def distance(self, name1, name2):
   9.124 +        _result = 0.0
   9.125 +        _n1 = self.index(name1)
   9.126 +        if _n1 == 0:
   9.127 +            print "Content-type=text/plain\n"
   9.128 +            print "Name \""+name1+"\" does not exist!\n"
   9.129 +        _n2 = self.index(name2)
   9.130 +        if _n2 == 0:
   9.131 +            print "Content-type=text/plain\n"
   9.132 +            print "Name \""+name2+"\" does not exist!\n"
   9.133 +        if (_n1 > 0) and (_n2 > 0) and (_n1 != _n2):
   9.134 +            _result = self.matrix[frozenset((name1, name2))]
   9.135 +        return _result
   9.136 +
   9.137 +    def distance_n(self, i, j):
   9.138 +        _result = 0.0
   9.139 +        if (i > 0) and (j > 0) and (i != j):
   9.140 +            _result = self.matrix[frozenset((self.name(i), self.name(j)))]
   9.141 +        return _result
   9.142 +
   9.143 +    def setdist(self, name1, name2, d):
   9.144 +        if name1 == name2:
   9.145 +            raise "Impossible to set distance between two identical objects!\n"
   9.146 +        else:
   9.147 +            _result = Phy_matrix(self.names)
   9.148 +            _pair0 = frozenset((name1, name2))
   9.149 +            for _pair in self.matrix.iterkeys():
   9.150 +                if _pair0 == _pair:
   9.151 +                    _result.matrix[_pair] = d
   9.152 +                else:
   9.153 +                    _result.matrix[_pair] = self.matrix[_pair]
   9.154 +            return _result
   9.155 +
   9.156 +    def setdist_n(self, i, j, d):
   9.157 +        if i == j:
   9.158 +            raise "Impossible to set distance between two identical objects!\n"
   9.159 +        else:
   9.160 +            _result = Phy_matrix(self.names)
   9.161 +            _pair0 = frozenset((self.name(i), self.name(j)))
   9.162 +            for _pair in self.matrix.iterkeys():
   9.163 +                if _pair0 == _pair:
   9.164 +                    _result.matrix[_pair] = d
   9.165 +                else:
   9.166 +                    _result.matrix[_pair] = self.matrix[_pair]
   9.167 +            return _result
   9.168 +
   9.169 +    def addobject(self, name):
   9.170 +        if self.matrix.has_key(name):
   9.171 +            _result = self.copy()
   9.172 +        else:
   9.173 +            _newnames = self.names + (name,)
   9.174 +            _result = Phy_matrix(_newnames)
   9.175 +            _result.matrix = self.matrix.copy()
   9.176 +            for _name in self.names:
   9.177 +                _result.matrix[frozenset((_name,name))] = 0.0
   9.178 +        return _result
   9.179 +
   9.180 +    def permutate(self, name1, name2):
   9.181 +        if name1==name2:
   9.182 +            result = self.copy()
   9.183 +        else:
   9.184 +            _i = self.index(name1)
   9.185 +            _j = self.index(name2)
   9.186 +	    if (_i==0) or (_j==0):
   9.187 +                result = self.copy()
   9.188 +            else:
   9.189 +                _min = min(_i-1,_j-1)
   9.190 +                _max = max(_i-1,_j-1)
   9.191 +                _newnames = self.names[:_min] + (self.names[_max],) + self.names[_min+1:_max] + (self.names[_min],) + self.names[_max+1:]
   9.192 +                _result = Phy_matrix(_newnames)
   9.193 +                _result.matrix = self.matrix.copy()
   9.194 +        return _result
   9.195 +
   9.196 +    def permutate_n(self, i, j):
   9.197 +        if (i==j) or (i==0) or (j==0):
   9.198 +           result = self.copy()
   9.199 +        else:
   9.200 +            _min = min(i-1,j-1)
   9.201 +            _max = max(i-1,j-1)
   9.202 +            _newnames = self.names[:_min] + (self.names[_max],) + self.names[_min+1:_max] + (self.names[_min],) + self.names[_max+1:]
   9.203 +            _result = Phy_matrix(_newnames)
   9.204 +            _result.matrix = self.matrix.copy()
   9.205 +        return _result
   9.206 +
   9.207 +
   9.208 +# class phy_matrix
   9.209 +
   9.210 +class WrongFileFormat(Exception):
   9.211 +    pass
   9.212 +
   9.213 +def readmatrix(filename):
   9.214 +    '''
   9.215 +    Reading distance matrix from a file in PHYLIP format defined by its name
   9.216 +    Author: S.A.S.
   9.217 +
   9.218 +    '''
   9.219 +    #import sys # for debug purposes!
   9.220 +    infile = open(filename, "r")
   9.221 +    line = infile.readline()
   9.222 +    try:
   9.223 +        number = int(line)
   9.224 +    except ValueError:
   9.225 +        raise WrongFileFormat, "First line of \""+filename+"\" does not contain the number of objects"
   9.226 +    if number == 0:
   9.227 +        raise WrongFileFormat, "Zero number of objects in "+filename
   9.228 +    names = ()
   9.229 +    counter = 0
   9.230 +    matrix = dict()
   9.231 +    tempmatrix = ()
   9.232 +    row = ()
   9.233 +    for line in infile.readlines():
   9.234 +        if counter == 0:
   9.235 +            names = names + (line[0:10].strip(),)
   9.236 +            if (number < 6):
   9.237 +                numelements = number
   9.238 +            else:
   9.239 +                numelements = 6
   9.240 +            if len(line) < 10*(numelements+1) :
   9.241 +                raise WrongFileFormat, "Too short row in file " + filename
   9.242 +
   9.243 +            for i in range(numelements):
   9.244 +                try:
   9.245 +                    row = row + (float(line[10*(i+1):10*(i+2)]),)
   9.246 +                except ValueError:
   9.247 +                    raise WrongFileFormat, "Non-numeric data in "+filename
   9.248 +            counter += numelements
   9.249 +        else:
   9.250 +            if (number - counter < 7):
   9.251 +                numelements = number - counter
   9.252 +            else:
   9.253 +                numelements = 7
   9.254 +            if len(line) < 10*numelements :
   9.255 +                raise WrongFileFormat, "Too short row in file" + filename
   9.256 +
   9.257 +            for i in range(numelements):
   9.258 +                try:
   9.259 +                    row = row + (float(line[10*i:10*(i+1)]),)
   9.260 +                except ValueError:
   9.261 +                    raise WrongFileFormat, "Non-numeric data in "+filename
   9.262 +            counter += numelements
   9.263 +        if counter == number:
   9.264 +            counter = 0
   9.265 +            tempmatrix = tempmatrix + (row,)
   9.266 +            row = ()
   9.267 +    # for all lines of the file
   9.268 +    infile.close()
   9.269 +    if (counter != 0) or (len(names) != number):
   9.270 +        raise WrongFileFormat, "Wrong number of objects in "+filename
   9.271 +
   9.272 +    result = Phy_matrix(names)
   9.273 +    for i in range(1, result.number):
   9.274 +        for j in range(0, i):
   9.275 +            result.matrix[frozenset((names[i], names[j]))] = tempmatrix[i][j]
   9.276 +    return result
   9.277 +# readmatrix
   9.278 +
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/sbbs_2009/modules/rooted_tree.py	Sat Jul 09 16:30:28 2011 +0400
    10.3 @@ -0,0 +1,507 @@
    10.4 +"""
    10.5 +The module comtains classes
    10.6 +    Rooted_tree
    10.7 +    Vertex
    10.8 +
    10.9 +Dictionary:
   10.10 +    vertex - any vertex of the tree, including nodes and leaves
   10.11 +    node - internal vertex
   10.12 +    leaf - terminal vertex
   10.13 +    edge
   10.14 +    root
   10.15 +    parenthetic structure - str without "\n" or "\r", which ends with ";",
   10.16 +        contains parentheses, corresponding to nodes e.g. "(a,b,c);" corresponds
   10.17 +        to a tree, with leaves a, b and c arise from the root; optionnaly may
   10.18 +        contain lengths (minus and dot symbols allowed) on edges: "(a:1.5,b:2,c:-3.5)"
   10.19 +    subtree, corresponding to a node (or lying under a node)
   10.20 +    root subtree, corresponding to the node - union of edges+nodes, leading
   10.21 +        from the root to that node and subtree under that node
   10.22 +
   10.23 +
   10.24 +"""
   10.25 +
   10.26 +
   10.27 +class Rooted_tree(object):
   10.28 +
   10.29 +    """
   10.30 +    Authors: Burkov
   10.31 +    Status: in progress (most methods seem to work)
   10.32 +
   10.33 +    Data:
   10.34 +        self.root                                           # contains Vertex object which is the root
   10.35 +        self.leaves                                         # contains list of tree leaves as Vertex objects
   10.36 +
   10.37 +    Methods
   10.38 +        +self.parenthetic_structure_to_tree                  #
   10.39 +        +self.next_node(self, candidate_nodes)               # Return a list [next_node,[new_candidate_nodes]]
   10.40 +        +self.node_to_set_of_leaves(self, node)              # Return a set of leaves under the node
   10.41 +                                                               A leaf is a Vertex object
   10.42 +        +self.complement(self, set_of_leaves)                # Return complement set of leaves
   10.43 +        +self.chop_the_tree(self, set_of_leaves)             # Returns a susbtree of the original tree as a
   10.44 +                                                                 Rooted_tree object, that conatins only the
   10.45 +                                                                 leaves in set_of_leaves
   10.46 +        +self.set_of_leaves_to_sum_of_lengthes_of_root_subtree(self, set_of_leaves)  # Returns sum_of_lengthes_of_root_subtree,
   10.47 +                                                                 which contains only lists in set_of_leaves
   10.48 +             NEW (AAl)!!!
   10.49 +         self.add_node(self,content, parental_vertex,parental_edge)
   10.50 +                                                             # Add edges and a node;
   10.51 +                                                                local tree rearrangement
   10.52 +                                                                Return index of new node
   10.53 +                                                                The method is needed for a tree of blocks
   10.54 +                                                              # Parental_edge contains one of strings:
   10.55 +                                                              # "left" means block is left complement in parent
   10.56 +                                                              # "right" means block is right complement in parent
   10.57 +                                                              # "bottom" means block is bottom complement in parent
   10.58 +
   10.59 +    """
   10.60 +    def __init__(self):
   10.61 +        self.root=Vertex(["root"],None,[],None)
   10.62 +        self.leaves=[]
   10.63 +
   10.64 +    def add_node(self, vertex):
   10.65 +
   10.66 +        vertex.parental_vertex.child_vertices.append(vertex)
   10.67 +        if vertex.parental_vertex in self.leaves:
   10.68 +            pass
   10.69 +#        ... add vertex to leaves
   10.70 +
   10.71 +    def parenthetic_structure_to_tree(self,parenthetic_structure,testflag=0):
   10.72 +        r"""
   10.73 +        This function gets parenthetic structure of the tree (ending with ";" ; lenghts of tree edges may be
   10.74 +        stated or not; tree may be binary or not)
   10.75 +        by definition root's parent = None;
   10.76 +        The tree is traversed width-first upon construction; currently processed nodes are stored in queue;
   10.77 +        with their parental vertices and indexes of the respective wax and wane (indeed, wax and wane in the
   10.78 +        parenthetic structure correspond to a node).
   10.79 +
   10.80 +        //Note that sequence names are nums made from alignment.sequence_indexes
   10.81 +        //Therefore, vertices reffer to that indexes
   10.82 +
   10.83 +        >>> import rooted_tree
   10.84 +        >>> tree=rooted_tree.Rooted_tree()
   10.85 +        >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);')
   10.86 +        >>> print "tree.root.child_vertices[0].content = "+str(tree.root.child_vertices[0].content)
   10.87 +        tree.root.child_vertices[0].content = ['a']
   10.88 +        >>> print "tree.root.child_vertices[0].parental_edge = "+str(tree.root.child_vertices[0].parental_edge)
   10.89 +        tree.root.child_vertices[0].parental_edge = 10
   10.90 +        >>> print "tree.root.child_vertices[1].content = "+str(tree.root.child_vertices[1].content)
   10.91 +        tree.root.child_vertices[1].content = [None]
   10.92 +        >>> print "tree.root.child_vertices[1].parental_edge = "+str(tree.root.child_vertices[1].parental_edge)
   10.93 +        tree.root.child_vertices[1].parental_edge = 2.0
   10.94 +        >>> print "tree.root.child_vertices[1].child_vertices[0].content = "+str(tree.root.child_vertices[1].child_vertices[0].content)
   10.95 +        tree.root.child_vertices[1].child_vertices[0].content = ['b']
   10.96 +        >>> print "tree.root.child_vertices[1].child_vertices[0].parental_edge = "+str(tree.root.child_vertices[1].child_vertices[0].parental_edge)
   10.97 +        tree.root.child_vertices[1].child_vertices[0].parental_edge = 8.0
   10.98 +        >>> print "tree.root.child_vertices[1].child_vertices[1].content = "+str(tree.root.child_vertices[1].child_vertices[1].content)
   10.99 +        tree.root.child_vertices[1].child_vertices[1].content = [None]
  10.100 +        >>> print "tree.root.child_vertices[1].child_vertices[1].parental_edge = "+str(tree.root.child_vertices[1].child_vertices[1].parental_edge)
  10.101 +        tree.root.child_vertices[1].child_vertices[1].parental_edge = 5.0
  10.102 +        >>> print "tree.root.child_vertices[1].child_vertices[1].child_vertices[0].content = "+str(tree.root.child_vertices[1].child_vertices[1].child_vertices[0].content)
  10.103 +        tree.root.child_vertices[1].child_vertices[1].child_vertices[0].content = ['c']
  10.104 +        >>> print "tree.root.child_vertices[1].child_vertices[1].child_vertices[0].parental_edge = "+str(tree.root.child_vertices[1].child_vertices[1].child_vertices[0].parental_edge)
  10.105 +        tree.root.child_vertices[1].child_vertices[1].child_vertices[0].parental_edge = 3.0
  10.106 +        >>> print "tree.root.child_vertices[1].child_vertices[1].child_vertices[1].content = "+str(tree.root.child_vertices[1].child_vertices[1].child_vertices[1].content)
  10.107 +        tree.root.child_vertices[1].child_vertices[1].child_vertices[1].content = ['d']
  10.108 +        >>> print "tree.root.child_vertices[1].child_vertices[1].child_vertices[1].parental_edge = "+str(tree.root.child_vertices[1].child_vertices[1].child_vertices[1].parental_edge)
  10.109 +        tree.root.child_vertices[1].child_vertices[1].child_vertices[1].parental_edge = 3.0
  10.110 +
  10.111 +
  10.112 +        """
  10.113 +
  10.114 +        import re
  10.115 +        if testflag!=0: import sys
  10.116 +        queue=[(self.root,None,0,len(parenthetic_structure)-2)] #len(parenthetic_structure)-2 == index of last ')'
  10.117 +        while queue!=[]:
  10.118 +            #take the first element in the queue
  10.119 +            current_queue_element=queue[0]
  10.120 +            wax=0
  10.121 +            wane=0
  10.122 +            left_margin=current_queue_element[2]+1
  10.123 +            for i in xrange(current_queue_element[2],current_queue_element[3]+1,1):
  10.124 +                if parenthetic_structure[i]=="(":
  10.125 +                    wax=wax+1
  10.126 +                elif parenthetic_structure[i]=="," or parenthetic_structure[i]==")":#child vertex found
  10.127 +
  10.128 +                    if wax==wane+1:
  10.129 +                        #search for the branch length; if not length-oriented, length=None
  10.130 +                        length_search_object=re.search('-?\d*\.?\d+$',parenthetic_structure[left_margin:i])
  10.131 +                        if length_search_object:#if length-oriented
  10.132 +                           length=length_search_object.group()
  10.133 +                        #add a new vertex to current_queue_element's children list
  10.134 +                        current_queue_element[0].child_vertices.append(Vertex([],current_queue_element[0],[],length))
  10.135 +                        if testflag!=0: sys.stderr.write("left_margin = "+str(left_margin)+"| i = "+str(i)+"| length = " +str(length)+"\n")
  10.136 +                        if testflag!=0: sys.stderr.write(str(queue)+"\n")
  10.137 +                        if testflag!=0: sys.stderr.write(str(current_queue_element[0].content)+"\n")
  10.138 +                        #check: node or leaf (are there parentheses inside)
  10.139 +                        if None==re.search('[\(\)]',parenthetic_structure[left_margin:i]):
  10.140 +                            if testflag!=0: sys.stderr.write("New leaf found!\n")
  10.141 +                            #if leaf, find and fill in its name
  10.142 +                            if length_search_object:#if length-oriented
  10.143 +                                name=re.search('(.*):-?\d*\.?\d+$',parenthetic_structure[left_margin:i]).group(1)
  10.144 +                            else:#if non-weight-oriented
  10.145 +                                name=parenthetic_structure[left_margin:i]
  10.146 +                            if testflag!=0: sys.stderr.write("name = "+str(name)+"\n")
  10.147 +                            current_queue_element[0].child_vertices[len(current_queue_element[0].child_vertices)-1].content.append(name)
  10.148 +                            self.leaves.append(current_queue_element[0].child_vertices[len(current_queue_element[0].child_vertices)-1])
  10.149 +                        else:
  10.150 +                            if testflag!=0: sys.stderr.write("New node found:\n"+re.search('[\(\)]',parenthetic_structure[left_margin:i]).group())
  10.151 +                            #if node, append None insted of name and add it to queue
  10.152 +                            current_queue_element[0].child_vertices[len(current_queue_element[0].child_vertices)-1].content.append(None)
  10.153 +                            queue.append((current_queue_element[0].child_vertices[len(current_queue_element[0].child_vertices)-1],current_queue_element[0],left_margin,i))
  10.154 +                        left_margin=i+1
  10.155 +                if parenthetic_structure[i]==")":
  10.156 +                    wane=wane+1
  10.157 +            queue.pop(0)
  10.158 +
  10.159 +
  10.160 +    def calculate_root_subtree_sum_of_lengthes(self, testflag=0):
  10.161 +        r"""
  10.162 +        The Block module will traverse the tree from the root downwards. But the calculation of any subtree's
  10.163 +        sum of lengthes traverses the tree from leaves upwards. So it makes sense to implement a method, that
  10.164 +        first calcualtes distnace from the root to each node, traversing the tree downwards, gets stuck on leaves
  10.165 +        and goes upwards, calculating subtree sum of lengthes for each node and adding it to the distance to that
  10.166 +        node from the root.
  10.167 +        Result for each node is stored in content[1].
  10.168 +        For leaves distances to the root are stored in content[1]
  10.169 +        Subtree distances are stored in content[2] of each node
  10.170 +
  10.171 +        >>> import rooted_tree
  10.172 +        >>> tree=rooted_tree.Rooted_tree()
  10.173 +        >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);')
  10.174 +        >>> tree.calculate_root_subtree_sum_of_lengthes()
  10.175 +        """
  10.176 +        #creating a queue, contating the root
  10.177 +        if testflag!=0: import sys
  10.178 +        downward_queue=[]
  10.179 +        upward_queue=[]
  10.180 +        self.root.content.append(0)
  10.181 +        self.root.content.append(0)
  10.182 +        for i in self.root.child_vertices:
  10.183 +            downward_queue.append(i)
  10.184 +        #width-first traversing of the tree downwards
  10.185 +        if testflag!=0: sys.stderr.write("downward_queue = "+str(downward_queue)+"\n")
  10.186 +        while downward_queue!=[]:
  10.187 +            if testflag!=0: sys.stderr.write("downward_queue = "+str(downward_queue)+"\n")
  10.188 +            downward_queue[0].content.append(float(downward_queue[0].parental_edge) + downward_queue[0].parental_vertex.content[1])
  10.189 +            if testflag!=0: sys.stderr.write("downward_queue[0].content = "+str(downward_queue[0].content)+"\n")
  10.190 +            downward_queue[0].content.append(0)#initialization of content[2]
  10.191 +            downward_queue[0].traversed=0#initialization of flag for upward traverse
  10.192 +            for i in downward_queue[0].child_vertices:
  10.193 +                downward_queue.append(i)
  10.194 +            if downward_queue[0].child_vertices==[]:
  10.195 +                upward_queue.append(downward_queue[0])#upward_queue intialized with leaves
  10.196 +            downward_queue.pop(0)
  10.197 +        #traversing the tree upwards
  10.198 +        while upward_queue!=[]:
  10.199 +            if testflag!=0: sys.stderr.write("upward_queue = "+str(upward_queue)+"\n")
  10.200 +            if upward_queue[0].parental_vertex!= None:
  10.201 +                upward_queue[0].parental_vertex.content[2]=upward_queue[0].parental_vertex.content[2]+upward_queue[0].content[2]+float(upward_queue[0].parental_edge)
  10.202 +                upward_queue[0].parental_vertex.content[1]=upward_queue[0].parental_vertex.content[1]+upward_queue[0].content[2]+float(upward_queue[0].parental_edge)
  10.203 +                upward_queue[0].traversed=1
  10.204 +                traverse_flag=1
  10.205 +                for i in upward_queue[0].parental_vertex.child_vertices:
  10.206 +                    if i.traversed==0: traverse_flag=0
  10.207 +                if traverse_flag==1:
  10.208 +                    upward_queue.append(upward_queue[0].parental_vertex)
  10.209 +                if testflag!=0:
  10.210 +                    sys.stderr.write("upward_queue[0].content = "+str(upward_queue[0].content) +"\n")
  10.211 +                    sys.stderr.write("upward_queue[0].parental_vertex.content = "+str(upward_queue[0].parental_vertex.content)+"\n")
  10.212 +            upward_queue.pop(0)
  10.213 +        #garbage collection
  10.214 +        for i in self.root.child_vertices:
  10.215 +            downward_queue.append(i)
  10.216 +        while downward_queue!=[]:
  10.217 +            del downward_queue[0].traversed
  10.218 +            for i in downward_queue[0].child_vertices:
  10.219 +                downward_queue.append(i)
  10.220 +            downward_queue.pop(0)
  10.221 +
  10.222 +
  10.223 +    def next_node(self,candidate_nodes):
  10.224 +        r"""
  10.225 +        Finds next node with the greatest root subrtee sum of lengthes among candidate_nodes,
  10.226 +        removes it from candidate_nodes and adds its child nodes (NOT vertices) to
  10.227 +        new_candidate_nodes
  10.228 +
  10.229 +        Input: candidate_nodes
  10.230 +        Output: tuple (max, new_candidate_nodes)
  10.231 +            max - node with the gretest root subtree sum of lengthes among candidate_nodes
  10.232 +            new_candidate_nodes - array "candidate_nodes" without the "max" node, but with "max"
  10.233 +                children nodes instead (NOT nodes and leaves)
  10.234 +
  10.235 +        >>> import rooted_tree
  10.236 +        >>> tree = rooted_tree.Rooted_tree()
  10.237 +        >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);')
  10.238 +        >>> tree.calculate_root_subtree_sum_of_lengthes()
  10.239 +        >>> candidate_nodes=[tree.root]
  10.240 +        >>> max, new_candidate_nodes = tree.next_node(candidate_nodes)
  10.241 +        """
  10.242 +
  10.243 +        #find a node among candidates with the greatest root subtree sum of lengthes
  10.244 +        new_candidate_nodes=candidate_nodes[:]
  10.245 +        if new_candidate_nodes!=[]:
  10.246 +            max_index=0
  10.247 +            for i_,i in enumerate(new_candidate_nodes):
  10.248 +                if i.content[1]>new_candidate_nodes[max_index].content[1]:
  10.249 +                    max_index=i_
  10.250 +            #update new_candidate_nodes
  10.251 +            max=new_candidate_nodes.pop(max_index)
  10.252 +            for i in max.child_vertices:
  10.253 +                if i.child_vertices!=[]: new_candidate_nodes.append(i)
  10.254 +            return (max,new_candidate_nodes)
  10.255 +
  10.256 +    def next_vertex(self, candidate_vertices):
  10.257 +        r"""
  10.258 +        Finds next vertex with the greatest root subtree sum of lengthes among candidate_vertices,
  10.259 +        removes it from candidate_vertices and adds its child vertices to new_candidate_nodes
  10.260 +
  10.261 +        Input: candidate_nodes
  10.262 +        Output: tuple (max, new_candidate_vertices)
  10.263 +            max - vertex with the gretest root subtree sum of lengthes among candidate_nodes
  10.264 +            new_candidate_vertices - array "candidate_vertices" without the "max" vertex,
  10.265 +            but with "max" children leaves instead
  10.266 +
  10.267 +       >>> import rooted_tree
  10.268 +       >>> tree = rooted_tree.Rooted_tree()
  10.269 +       >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);')
  10.270 +       >>> tree.calculate_root_subtree_sum_of_lengthes()
  10.271 +       >>> candidate_vertices=[tree.root]
  10.272 +       >>> max, new_candidate_vertices=tree.next_vertex(candidate_vertices)
  10.273 +
  10.274 +        """
  10.275 +        #find a vertex among candidates with the greatest root subtree sum of lengthes
  10.276 +        new_candidate_vertices=candidate_vertices[:]
  10.277 +        if new_candidate_vertices!=[]:
  10.278 +            max_index=0
  10.279 +            for i_,i in enumerate(new_candidate_vertices):
  10.280 +                if i.content[1]>new_candidate_vertices[max_index].content[1]:
  10.281 +                    max_index=i_
  10.282 +            #update candidate_vertices
  10.283 +            max=new_candidate_vertices.pop(max_index)
  10.284 +            for i in max.child_vertices:
  10.285 +                new_candidate_vertices.append(i)
  10.286 +            return (max, new_candidate_vertices)
  10.287 +
  10.288 +    def node_to_set_of_leaves(self, node):
  10.289 +        r"""
  10.290 +        Input: Vertex object
  10.291 +        Output: set_of_leaves list
  10.292 +
  10.293 +        >>> import rooted_tree
  10.294 +        >>> tree=rooted_tree.Rooted_tree()
  10.295 +        >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);')
  10.296 +        >>> set_of_leaves=tree.node_to_set_of_leaves(tree.root.child_vertices[1])
  10.297 +        >>> for i in set_of_leaves:
  10.298 +        ...     print i.content
  10.299 +        ['b']
  10.300 +        ['c']
  10.301 +        ['d']
  10.302 +        """
  10.303 +
  10.304 +        set_of_leaves=[]
  10.305 +        queue=[node]
  10.306 +        #traversing the tree downwards form the node
  10.307 +        while queue!=[]:
  10.308 +            current_vertex = queue.pop(0)
  10.309 +            for i in current_vertex.child_vertices:
  10.310 +                if i.child_vertices==[]:
  10.311 +                    set_of_leaves.append(i)
  10.312 +                else:
  10.313 +                    queue.append(i)
  10.314 +        return set_of_leaves
  10.315 +
  10.316 +
  10.317 +    def complement_set_of_leaves(self, set_of_leaves):
  10.318 +        r"""
  10.319 +        Input: set_of_leaves list of leaves as Vertex objects
  10.320 +        Output: complementary_leaves list of leaves as Vertex objects
  10.321 +
  10.322 +        >>> import rooted_tree
  10.323 +        >>> tree=rooted_tree.Rooted_tree()
  10.324 +        >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);')
  10.325 +        >>> for i in tree.leaves:
  10.326 +        ...     print i.content
  10.327 +        ['a']
  10.328 +        ['b']
  10.329 +        ['c']
  10.330 +        ['d']
  10.331 +        >>> set_of_leaves=[tree.leaves[0],tree.leaves[2]]
  10.332 +        >>> for i in tree.complement_set_of_leaves(set_of_leaves):
  10.333 +        ...     print i.content
  10.334 +        ['b']
  10.335 +        ['d']
  10.336 +        """
  10.337 +        complementary_leaves=[]
  10.338 +        #subtract the set_of_leaves from all_leaves to acieve complemenatry_leaves
  10.339 +        for i in self.leaves:
  10.340 +            if set_of_leaves.count(i)==0:
  10.341 +                complementary_leaves.append(i)
  10.342 +        return complementary_leaves
  10.343 +
  10.344 +
  10.345 +    def chop_the_tree(self,set_of_leaves):
  10.346 +        r"""
  10.347 +        !!!Returns a new tree object (makes copy somewhere in the beginning)
  10.348 +        Input: set_of_leaves as Vertex objects
  10.349 +
  10.350 +        >>> import rooted_tree
  10.351 +        >>> tree=rooted_tree.Rooted_tree()
  10.352 +        >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);')
  10.353 +        >>> set_of_leaves=[tree.leaves[2],tree.leaves[3]]
  10.354 +        >>> tree1=tree.chop_the_tree(set_of_leaves)
  10.355 +        >>> print tree1.root.child_vertices[0].content
  10.356 +        ['c', 3.0, 0]
  10.357 +        >>> print tree1.root.child_vertices[1].content
  10.358 +        ['d', 3.0, 0]
  10.359 +        """
  10.360 +        import sys
  10.361 +        import copy
  10.362 +        #temprorary variable for rset of leaves
  10.363 +        self.set_of_leaves=set_of_leaves
  10.364 +        tree=copy.deepcopy(self)
  10.365 +        del self.set_of_leaves
  10.366 +        #removing all outsider subtrees
  10.367 +        queue=tree.complement_set_of_leaves(tree.set_of_leaves)
  10.368 +        tree.leaves=tree.set_of_leaves
  10.369 +        del tree.set_of_leaves
  10.370 +        while queue!=[]:
  10.371 +            current=queue.pop(0)
  10.372 +            if current.child_vertices==[] and current!=tree.root:
  10.373 +                #sys.stderr.write("current = "+str(current)+"\n")
  10.374 +                #sys.stderr.write("current.parental_vertex.child_vertices = "+str(current.parental_vertex.child_vertices)+"\n")
  10.375 +                #sys.stderr.write("index of current = "+str(current.parental_vertex.child_vertices.index(current))+"\n")
  10.376 +                current.parental_vertex.child_vertices.pop(current.parental_vertex.child_vertices.index(current))
  10.377 +                if current.parental_vertex.child_vertices==[] or (len(current.parental_vertex.child_vertices)==1 and current.parental_vertex==tree.root):
  10.378 +                    queue.append(current.parental_vertex)
  10.379 +            elif len(current.child_vertices)==1 and current==tree.root:
  10.380 +                tree.root=current.child_vertices[0]
  10.381 +                current.child_vertices[0].content[0]="root"
  10.382 +                current.child_vertices[0].parental_vertex=current.parental_vertex
  10.383 +                queue.append(current.child_vertices[0])
  10.384 +
  10.385 +        queue=[tree.root]
  10.386 +        while queue!=[]:
  10.387 +            current=queue.pop(0)
  10.388 +            if len(current.child_vertices)==1:
  10.389 +                current.child_vertices[0].parental_vertex=current.parental_vertex
  10.390 +                #sys.stderr.write("current = "+str(current)+", current.content = "+str(current.content)+"\n")
  10.391 +                #sys.stderr.write("current.parental_vertex.child_vertices = "+str(current.parental_vertex.child_vertices)+"\n")
  10.392 +                #sys.stderr.write("index of current = "+str(current.parental_vertex.child_vertices.index(current))+"\n")
  10.393 +                current.parental_vertex.child_vertices.pop(current.parental_vertex.child_vertices.index(current))
  10.394 +                current.parental_vertex.child_vertices.append(current.child_vertices[0])
  10.395 +                current.child_vertices[0].parental_edge=float(current.child_vertices[0].parental_edge)+float(current.parental_edge)
  10.396 +            for i in current.child_vertices:
  10.397 +                queue.append(i)
  10.398 +            #REDO calculate_root_subtree_sum_of_lengthes
  10.399 +            if len(current.content)>1:
  10.400 +                for i in range(1,len(current.content)):
  10.401 +                    del current.content[1]
  10.402 +        tree.calculate_root_subtree_sum_of_lengthes()
  10.403 +        return tree
  10.404 +
  10.405 +    def set_of_leaves_to_sum_of_lengthes_of_root_subtree(self, set_of_leaves):
  10.406 +        r"""
  10.407 +        Input: set_of_leaves, which represent sequences with same groups in them in current position
  10.408 +        Output: sum_of_lengthes_of_root_subtree
  10.409 +
  10.410 +        >>> import rooted_tree
  10.411 +        >>> tree=rooted_tree.Rooted_tree()
  10.412 +        >>> tree.parenthetic_structure_to_tree('(a:10,(b:8.0,(c:3.0,d:3.0):5.0):2.0);')
  10.413 +        >>> set_of_leaves=[tree.leaves[2],tree.leaves[3]]
  10.414 +        >>> print tree.set_of_leaves_to_sum_of_lengthes_of_root_subtree(set_of_leaves)
  10.415 +        13.0
  10.416 +        """
  10.417 +
  10.418 +        import copy
  10.419 +        self.set_of_leaves=set_of_leaves
  10.420 +        tree=copy.deepcopy(self)
  10.421 +        del self.set_of_leaves
  10.422 +        queue=tree.complement_set_of_leaves(tree.set_of_leaves)
  10.423 +        while queue!=[]:
  10.424 +            current=queue.pop(0)
  10.425 +            if current.child_vertices==[] and current!=tree.root:
  10.426 +                current.parental_vertex.child_vertices.pop(current.parental_vertex.child_vertices.index(current))
  10.427 +                if current.parental_vertex.child_vertices==[]:
  10.428 +                    queue.append(current.parental_vertex)
  10.429 +        #sum of lengthes count
  10.430 +        sum=0
  10.431 +        queue.append(tree.root)
  10.432 +        while queue!=[]:
  10.433 +            current=queue.pop(0)
  10.434 +            for i in current.child_vertices:
  10.435 +                queue.append(i)
  10.436 +                sum=sum+float(i.parental_edge)
  10.437 +        return sum
  10.438 +
  10.439 +    def depth_first_traversal(self):
  10.440 +        r"""
  10.441 +        Returns list of tuples of vertices and their nesting depthes. Root has nesting depth 0.
  10.442 +        Creates child_vertices_copy for each vertex of the tree
  10.443 +
  10.444 +        """
  10.445 +        #width-first traversal in order to create child_vertices_copy arrays
  10.446 +        for i in self.width_first_traversal():
  10.447 +            i.child_vertices_copy=i.child_vertices[:]
  10.448 +        #depth-first traversal itself
  10.449 +        output=[]
  10.450 +        stack=[self.root]
  10.451 +        while stack!=[]:
  10.452 +            if stack[-1].child_vertices_copy!=[]:
  10.453 +                stack.append(stack[-1].child_vertices_copy.pop())
  10.454 +                #calculate the nesting depth of stack[-1]
  10.455 +                counter=0
  10.456 +                current_vertex=stack[-1]
  10.457 +                while current_vertex!=self.root:
  10.458 +                    counter+=1
  10.459 +                    current_vertex=current_vertex.parental_vertex
  10.460 +                output.append((stack[-1],counter))
  10.461 +            else:
  10.462 +                stack.pop()
  10.463 +        return output
  10.464 +
  10.465 +    def width_first_traversal(self):
  10.466 +        """
  10.467 +        Returns list of all vertices in width-first order
  10.468 +        """
  10.469 +        output=[]
  10.470 +        queue=[self.root]
  10.471 +        while queue!=[]:
  10.472 +            current_vertex=queue.pop(0)
  10.473 +            output.append(current_vertex)
  10.474 +            for i in current_vertex.child_vertices:
  10.475 +                queue.append(i)
  10.476 +        return output
  10.477 +
  10.478 +class Vertex(object):
  10.479 +    r"""
  10.480 +    Represents a single vertex of the tree
  10.481 +    Data:
  10.482 +      content - the Vertex's content list (actually, anything we want to store in each particular case)
  10.483 +
  10.484 +      parental_vertex - The parental Vertex object
  10.485 +      child_vertices - list of Vertex's children (as Vertex objects)
  10.486 +
  10.487 +      parental_edge - any information concerning parental edge, we want to store
  10.488 +    """
  10.489 +
  10.490 +    def __init__(self, content=None, parental_vertex=None, child_vertices=None, parental_edge=None):
  10.491 +        self.content=content
  10.492 +        self.parental_vertex=parental_vertex
  10.493 +        self.child_vertices=child_vertices
  10.494 +        self.parental_edge=parental_edge
  10.495 +        if self.content==None:
  10.496 +            self.content=[]
  10.497 +        if self.child_vertices==None:
  10.498 +            self.child_vertices=[]
  10.499 +
  10.500 +
  10.501 +if __name__=="__main__":
  10.502 +    print "Test";
  10.503 +    import doctest;
  10.504 +    doctest.testmod(extraglobs={'this_is_a_test':True})
  10.505 +
  10.506 +
  10.507 +
  10.508 +
  10.509 +
  10.510 +
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/sbbs_2009/modules/sequence.py	Sat Jul 09 16:30:28 2011 +0400
    11.3 @@ -0,0 +1,124 @@
    11.4 +#!/usr/bin/python
    11.5 +"""
    11.6 +Contain classes:
    11.7 +    Sequence
    11.8 +    PDB_to_sequence
    11.9 +        and other concerning 3D
   11.10 +"""
   11.11 +import configure
   11.12 +
   11.13 +class Sequence(object):
   11.14 +    """
   11.15 +    Author: AAl
   11.16 +
   11.17 +    Sequence is a string of aa characters and additinal data
   11.18 +    DATA:
   11.19 +        self.sequences          # list of sequence objects
   11.20 +        self.string             # string of aa characters; no gap symbols allowed
   11.21 +        self.name               # default name for data export
   11.22 +        self.description        # string
   11.23 +
   11.24 +    Optional
   11.25 +        self.synonims           # dictionary   source:name; examples:   gi:12345678  AC:P23456
   11.26 +        self.pdbs               # list of pdb_to_sequence objects
   11.27 +        consensus_ss            #
   11.28 +        important_aa            #
   11.29 +    """
   11.30 +#####!!!!!!  def __init__(self, sequences, name=None, description=None, string =None): ##### CHANGED TO NEXT LINE!
   11.31 +    def __init__(self, sequences, name=None, description=None, string=None):
   11.32 +        self.sequences=sequences
   11.33 +        self.name=name
   11.34 +        self.description=description
   11.35 +        self.string=string
   11.36 +
   11.37 +    def add_gapped_string(self, sequence_name, sequence_description, sequence_string):
   11.38 +        r"""
   11.39 +        Degap sequence string
   11.40 +        Create sequence object,
   11.41 +        add it to sequences
   11.42 +        Return sequence index (in sequences) and string of aa_indexes
   11.43 +        >>> sequence_name="seq"
   11.44 +        >>> sequence_description="test sequence"
   11.45 +        >>> sequence_string="-a--cd--"
   11.46 +        >>> sequences=[]
   11.47 +        >>> aaa=Sequence(sequences)
   11.48 +        >>> print  aaa.add_gapped_string(sequence_name,sequence_description,sequence_string)
   11.49 +        [0, [None, 0, None, None, 1, 2, None, None]]
   11.50 +        >>> print aaa.sequences[0].string
   11.51 +        acd
   11.52 +
   11.53 +        """
   11.54 +        self.name=sequence_name
   11.55 +        if self.name in configure.special_sequence_names:
   11.56 +            return None
   11.57 +        else:
   11.58 +            self_description=sequence_description
   11.59 +            self.string=""
   11.60 +            string_indexes=[]
   11.61 +            aa_index=0
   11.62 +            for x in sequence_string:
   11.63 +                if x=="-":
   11.64 +                    string_indexes.append(None)
   11.65 +                else:
   11.66 +                    self.string=self.string + x
   11.67 +                    string_indexes.append(aa_index)
   11.68 +                    aa_index=aa_index+1
   11.69 +            sequence_index=len(self.sequences)
   11.70 +            self.sequences.append(self)
   11.71 +        return [sequence_index, string_indexes]
   11.72 +
   11.73 +
   11.74 +
   11.75 +class Pdb_to_sequence(object):
   11.76 +    """"
   11.77 +    pdb_to_sequence data:
   11.78 +        pdb_code
   11.79 +        sequence_3D
   11.80 +        ss            # ordered list of ss_elements
   11.81 +                           ss_element=(from, to, type)
   11.82 +                               from, to are indexes in sequence
   11.83 +                               type is either "helix" or "strand"
   11.84 +        helixes
   11.85 +        strands       # ordered list of strands
   11.86 +                          strand=(from,to)
   11.87 +        sheets        # list of beta_sheet objects
   11.88 +    """
   11.89 +    def __init__(self, sequence):
   11.90 +        self.sequence=sequence
   11.91 +
   11.92 +class Beta_sheet(object):
   11.93 +    """
   11.94 +    beta_sheet data:
   11.95 +        self.sequence       #
   11.96 +        self.meridians      # soretd list of sheet_meridian objects
   11.97 +        self.parallels      # sorted list of sheet_parallel objects
   11.98 +        self.marginals      # list of marginal aa residues in a sheet
   11.99 +    """
  11.100 +    def __init__(self,sequence):
  11.101 +        self.sequence=sequence
  11.102 +class Sheet_meridian(object):
  11.103 +    """
  11.104 +    sheet_meridian data:
  11.105 +        strands     # list of strands; typically, consists of one strand
  11.106 +
  11.107 +    """
  11.108 +class Sheet_parallel(object):
  11.109 +    """
  11.110 +    sheet_parallel data:
  11.111 +        self.num          # number of the parellel in beta_sheet ?
  11.112 +        self.cross_rows   # list of cross_rows; typically, consists of one cross_row
  11.113 +                             cross_row=[aas, side]
  11.114 +                                 aas is a list of aa indexes insequence
  11.115 +                                 side is either "in" or "out"
  11.116 +
  11.117 +    """
  11.118 +
  11.119 +
  11.120 +if __name__ == "__main__":
  11.121 +    print "Hello!"
  11.122 +    import doctest
  11.123 +    doctest.testmod()
  11.124 +
  11.125 +
  11.126 +
  11.127 +
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/sbbs_2009/test_homology.py	Sat Jul 09 16:30:28 2011 +0400
    12.3 @@ -0,0 +1,5 @@
    12.4 +import sys
    12.5 +from blocks_finder import *
    12.6 +
    12.7 +blocks = main(sys.argv[1])
    12.8 +blocks2homology_file(blocks, open(sys.argv[2], 'w'))