allpy

changeset 1043:116f5bfc39b8
blocks_finder back to 1.4.0 and contains compatibility crutches
author: Boris Burkov <BurkovBA@gmail.com>
date: Tue, 20 Mar 2012 22:08:55 +0400
parents: ed1f4f87034f
children: 370678689947
files: allpy/base.py sequence_based_blocks_search/blocks_finder.py
diffstat: 2 files changed, 67 insertions(+), 11 deletions(-) [+]
[-]

allpy/base.py 1

sequence_based_blocks_search/blocks_finder.py 77 allpy/base.py 1 sequence_based_blocks_search/blocks_finder.py 77
allpy/base.py 1
sequence_based_blocks_search/blocks_finder.py 77
     1.1 --- a/allpy/base.py	Fri Oct 28 20:40:55 2011 +0300
     1.2 +++ b/allpy/base.py	Tue Mar 20 22:08:55 2012 +0400
     1.3 @@ -507,6 +507,7 @@
     1.4          """Return hash by identity."""
     1.5          return id(self)
     1.6  
     1.7 +
     1.8  class Block(Alignment):
     1.9      """Block of alignment.
    1.10  

     2.1 --- a/sequence_based_blocks_search/blocks_finder.py	Fri Oct 28 20:40:55 2011 +0300
     2.2 +++ b/sequence_based_blocks_search/blocks_finder.py	Tue Mar 20 22:08:55 2012 +0400
     2.3 @@ -4,11 +4,46 @@
     2.4  sys.path.append("../")
     2.5  from allpy import protein
     2.6  from allpy.homology import *
     2.7 +from allpy import data
     2.8 +from allpy import base
     2.9  import copy
    2.10  import math
    2.11  from functional_groups import functional_groups
    2.12  from allpy.util import *
    2.13  
    2.14 +class SbbsProteinMonomer(protein.Monomer):
    2.15 +    def __eq__(self, other):
    2.16 +        return self is other
    2.17 +
    2.18 +    def __ne__(self, other):
    2.19 +        return not self is other
    2.20 +
    2.21 +SbbsProteinMonomer._initialize(data.codes.protein)
    2.22 +
    2.23 +class SbbsProteinAlignment(protein.Alignment):
    2.24 +    def rows_as_strings(self, gaps='-'):
    2.25 +        rows = []
    2.26 +        for sequence in self.sequences:
    2.27 +            string = ""
    2.28 +            for column in self.columns:
    2.29 +                if sequence in column:
    2.30 +                    if hasattr(column[sequence], "input_code1"):
    2.31 +                        string += column[sequence].input_code1
    2.32 +                    else:
    2.33 +                        string += column[sequence].code1
    2.34 +                else:
    2.35 +                    string += gap
    2.36 +            string = util.UserString(string)
    2.37 +            string.sequence = sequence
    2.38 +            rows.append(string)
    2.39 +        return rows
    2.40 +
    2.41 +class SbbsProteinColumn(base.Column):
    2.42 +    def __eq__(self, other):
    2.43 +        return self is other
    2.44 +
    2.45 +vars(base)['Column'] = SbbsProteinColumn
    2.46 +
    2.47  class Preblock(object):
    2.48      """Attributes:
    2.49      first_column_index=self.alignment.columns[column1]
    2.50 @@ -51,7 +86,10 @@
    2.51  
    2.52  
    2.53  def main(file):
    2.54 -    alignment = protein.Alignment().append_file(file)
    2.55 +    try:
    2.56 +        alignment = SbbsProteinAlignment().append_file(file)
    2.57 +    except Exception as E:
    2.58 +        raise Exception("Failed to read alignment\nCheck alignment format")
    2.59      groups_content = find_groups_content(alignment, functional_groups)
    2.60      find_links(groups_content, alignment)
    2.61      blocks = mark_blocks(alignment)
    2.62 @@ -88,7 +126,7 @@
    2.63                      addition_to_candidate_preblocks.append(Preblock(index_of_column, index_of_column, copy.copy(groups_content[column][group_name]), functional_groups[group_name]["70%"], functional_groups[group_name]["100%"], 0.0, alignment))
    2.64          except KeyError as ke:
    2.65              print "Found non-canonical aminoacid: %s"%(ke)
    2.66 -        #create new preblocks as combinations of the old ones 
    2.67 +        #create new preblocks as combinations of the old ones
    2.68          try:
    2.69              for preblock in candidate_preblocks:
    2.70                  preblock_extensions = []
    2.71 @@ -117,7 +155,7 @@
    2.72          overlapping_preblocks=[]
    2.73          for preblock in candidate_preblocks:
    2.74              if preblock.alignment.columns[preblock.last_column_index] is column and preblock.calculate_score() > calculate_threshold(len(preblock.sequences), len(alignment.sequences), functional_groups):
    2.75 -                create_links(preblock, alignment) #links are not cliques, just connected components 
    2.76 +                create_links(preblock, alignment) #links are not cliques, just connected components
    2.77                  overlapping_preblocks+=accept_and_return_overlapping_preblocks_before(preblock, alignment, candidate_preblocks)
    2.78              if preblock not in overlapping_preblocks: new_candidate_preblocks.append(preblock)
    2.79          candidate_preblocks = new_candidate_preblocks
    2.80 @@ -218,7 +256,7 @@
    2.81      inconserved positions, create the transition matrix, calculate
    2.82      its eigenvectors and eigenvalues, round the weights, find coefficient
    2.83      of [1,0,0,...] decomposition in the basis of eigenvectors.
    2.84 -    
    2.85 +
    2.86  
    2.87      """
    2.88  
    2.89 @@ -244,13 +282,13 @@
    2.90  #                    column[iterated_sequence].links=[sequence]
    2.91  #                else:
    2.92  #                    if sequence not in column[iterated_sequence].links: column[iterated_sequence].links.append(sequence)
    2.93 -       
    2.94 +
    2.95          if "links" not in column[any_sequence].__dict__:
    2.96              column[any_sequence].links=[]
    2.97          remaining_sequences = copy.copy(preblock.sequences)
    2.98          remaining_sequences.remove(any_sequence)
    2.99          for sequence in preblock.sequences:#links are bidirectional
   2.100 -            if sequence not in column[any_sequence].links: 
   2.101 +            if sequence not in column[any_sequence].links:
   2.102                  column[any_sequence].links.append(sequence)
   2.103                  if "links" not in column[sequence].__dict__:
   2.104                      column[sequence].links=[any_sequence]
   2.105 @@ -304,8 +342,8 @@
   2.106          #print index_of_column, [gblock.columns for gblock in blocks_in_this_column]
   2.107          if alignment.columns.index(column) == len(alignment.columns)-1: output+=blocks_in_this_column
   2.108      return output
   2.109 -               
   2.110 -                
   2.111 +
   2.112 +
   2.113  def find_connected_component(sequences, column):
   2.114      """output is a list of sequences"""
   2.115      #find one sequence that has blocks
   2.116 @@ -326,7 +364,7 @@
   2.117                  if another_sequence not in queue and another_sequence not in output and another_sequence in sequences:
   2.118                      queue.append(another_sequence)
   2.119      return output
   2.120 -                
   2.121 +
   2.122  
   2.123  def delete_links(alignment):
   2.124      for sequence in alignment.sequences:
   2.125 @@ -334,7 +372,7 @@
   2.126              if "links" in monomer.__dict__:
   2.127                  del monomer.links
   2.128  
   2.129 -#[gblock for gblock in blocks_in_previous_column for gcolumn in gblock.columns if alignment.columns.index(gcolumn)==147]   
   2.130 +#[gblock for gblock in blocks_in_previous_column for gcolumn in gblock.columns if alignment.columns.index(gcolumn)==147]
   2.131  
   2.132  def create_monomer_homology(alignment):
   2.133      """returns MonomerHomology object, given alignment
   2.134 @@ -408,4 +446,21 @@
   2.135  
   2.136  
   2.137  if __name__== '__main__':
   2.138 -    main(open(sys.argv[1]))
   2.139 +    input_file = open(sys.argv[1])
   2.140 +    blocks = main(input_file)
   2.141 +    if blocks == []:
   2.142 +        sys.exit()
   2.143 +    aim = blocks[0].alignment.add_markup('index') #create markup
   2.144 +    file_content=""
   2.145 +    for index, block in enumerate(blocks):
   2.146 +        for sequence in block.sequences:
   2.147 +            file_content+=str(str(sys.argv[1]))+"\t"+str(index)+"\t"+str(sequence.name)+"\t"
   2.148 +            buffer=""
   2.149 +            for column in block.columns:
   2.150 +                buffer+=str(aim[column])+" "
   2.151 +            if (len(buffer)!=0): buffer=buffer[:-1]
   2.152 +            file_content+=buffer+"\n"
   2.153 +
   2.154 +    output_file = open(sys.argv[2], 'w')
   2.155 +    output_file.write(file_content)
   2.156 +