Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/1ce3b0e42ba5
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 01:06:18 2012
Кодировка:
allpy: 1ce3b0e42ba5

allpy

changeset 702:1ce3b0e42ba5

alignment_blocks method added to homology and tested. Blocks found in monomer homology file can be now returned to alignment blocks
author Andrei <aba@belozersky.msu.ru>
date Thu, 07 Jul 2011 12:53:42 +0400
parents bf97a2bc9d08
children eb2a902b1e0a
files allpy/homology.py
diffstat 1 files changed, 89 insertions(+), 18 deletions(-) [+]
line diff
     1.1 --- a/allpy/homology.py	Wed Jul 06 18:05:51 2011 +0400
     1.2 +++ b/allpy/homology.py	Thu Jul 07 12:53:42 2011 +0400
     1.3 @@ -1,3 +1,5 @@
     1.4 +from allpy import protein
     1.5 +
     1.6  class  MonomerHomology(object):
     1.7      """ Essentially, alignment is a set of monomer homology classes
     1.8      Object of type MonomerHomology desribes an alignment  in terms of monomer homology 
     1.9 @@ -10,12 +12,23 @@
    1.10  
    1.11      DATA:
    1.12          monomer_ids = {monomer_id:class_id}
    1.13 +
    1.14          classes     = {class_id:monomer_id)
    1.15              monomer_id = (sequence_name, monomer_number) 
    1.16              class_id = string 
    1.17 +
    1.18          columns = {class_id:column_number} optional
    1.19 -        blocks  = [ (block_number, class_id, column_number), ...]  
    1.20  
    1.21 +        blocks_data = [block_data, ...]
    1.22 +        block_data = (sequence_names,class_ids)
    1.23 +            sequence_names = set( [sequence_1_name,...] )
    1.24 +            class_ids = = set( [class_1_id, ...] )
    1.25 +
    1.26 +     BLOCK is assigned to an alignment (currently) 
    1.27 +        Here block is not a block of an alignment, but a set of data to create such block
    1.28 +        On the other hand, block consists of class_ids (not columns) to have possibility to get monomer_id
    1.29 +        Thus, block reaarengement must be accomplished with classes reassignments!
    1.30 +        Not very good, but currently is made as above described 
    1.31  
    1.32      METHODS:
    1.33         + .read(file_name) 
    1.34 @@ -23,12 +36,14 @@
    1.35         + .write_class(file,list_of_monomer_ids,monomer_homology_class_id)
    1.36         + .write_block(file, block)
    1.37         + .compare_with(class)
    1.38 +         .highest_blocks()
    1.39 +         .alignment_blocks(alignment)
    1.40      """
    1.41      def __init__(self, next_class_id = 1):
    1.42           self.classes = {}
    1.43           self.monomer_ids = {}
    1.44           self.columns = {}
    1.45 -         self.blocks = []
    1.46 +         self.blocks_data = []
    1.47           self.next_class_id = next_class_id
    1.48  
    1.49  
    1.50 @@ -224,26 +239,28 @@
    1.51              set_sequences.add(x[0])
    1.52          return set_sequences
    1.53    
    1.54 -    def _add_to_block(self, i, block_number, class_id, column_number):
    1.55 -        """ Add an entry to a class_ids  
    1.56 -        """
    1.57 -        self.blocks[i] = (block_number, class_id, column_number)
    1.58 -
    1.59  
    1.60      def highest_blocks(self):
    1.61          """ Makes blocks of maximal possible heights from monomer homology classes
    1.62          block = set of homology classes with the same set of sequences
    1.63          block MUST BE marked up by column numbers
    1.64  
    1.65 +        blocks_data = [block_data, ...]
    1.66 +        block_data = (sequence_names,class_ids)
    1.67 +            sequence_names = set( [sequence_1_name,...] )
    1.68 +            class_ids = = set( [class_1_id, ...] )
    1.69 +
    1.70 +
    1.71          RETURN: self.blocks
    1.72  
    1.73          """
    1.74 -        self.blocks = list(self.classes)
    1.75 -        self.blocks.sort()
    1.76 +        class_ids = list(self.classes)
    1.77 +        class_ids.sort()
    1.78 +        self.blocks_data = []
    1.79          block_number = 0
    1.80  
    1.81 -        for i in range(len(self.blocks)):
    1.82 -            class_id_1 = self.blocks[i]
    1.83 +        for i in range(len(class_ids)):
    1.84 +            class_id_1 = class_ids[i]
    1.85  
    1.86              if type(class_id_1) == type(tuple()):
    1.87                  continue
    1.88 @@ -252,28 +269,82 @@
    1.89              try: 
    1.90                  column_number = self.columns[class_id_1]
    1.91              except:
    1.92 -                raise Exception("Fail to get column_number! Homology classes must be marked to creats hiest blocks") 
    1.93 +                raise Exception("Fail to get column_number! Homology classes must be marked to creats highest blocks") 
    1.94  
    1.95 -            self._add_to_block(i, block_number, class_id_1,  column_number)
    1.96 +            # NEW BLOCK
    1.97  
    1.98              set_sequences_1 = self._sequences_of_class(self.classes[class_id_1])
    1.99 +      
   1.100 +            self.blocks_data.append( (set_sequences_1, set([class_id_1]) ) ) 
   1.101 +            class_ids[i] = (class_id_1,i)
   1.102  
   1.103 -            for j in range(i+1,len(self.blocks)):
   1.104 -                class_id_2 = self.blocks[j]
   1.105 +            # ADD classes to BLOCK
   1.106 +            for j in range(i+1,len(class_ids)):
   1.107 +                class_id_2 = class_ids[j]
   1.108  
   1.109                  if type(class_id_2) == type(tuple()):
   1.110                      continue
   1.111  
   1.112                  set_sequences_2 = self._sequences_of_class(self.classes[class_id_2])
   1.113  
   1.114 -
   1.115                  if set_sequences_2 == set_sequences_1:
   1.116                      try:
   1.117                          column_number = self.columns[class_id_2]
   1.118                      except:                                                                                                  
   1.119                          raise Exception("Fail to get column_number! Homology classes must be marked to creats hiest blocks") 
   1.120 -                    self._add_to_block(j, block_number, class_id_2,  column_number)
   1.121 +                    class_ids[j] = (class_id_2, j)
   1.122 +                    self.blocks_data[len(self.blocks_data)-1][1].add( class_id_2)
   1.123  
   1.124 -        return self.blocks 
   1.125 +        return self.blocks_data 
   1.126  
   1.127 +    def alignment_blocks(self, alignment):
   1.128 +        """ From homology.blocks creates a list of alignment blocks (accessible for visualization) 
   1.129  
   1.130 +        blocks_data = [block_data, ...]
   1.131 +        block_data = (sequence_names,class_ids)
   1.132 +            sequence_names = set( [sequence_1_name,...] )
   1.133 +            class_ids = = set( [class_1_id, ...] )
   1.134 +        RETURN blocks, list of blocks in alignment 
   1.135 +
   1.136 +        """
   1.137 +        blocks = []
   1.138 +        dict_sequences = {}
   1.139 +        for sequence in alignment.sequences:
   1.140 +            dict_sequences[sequence.name]=sequence 
   1.141 +
   1.142 +        dict_columns = {}
   1.143 +        for column in alignment.columns:
   1.144 +            try:
   1.145 +                dict_columns[alignment.markups['number'][column]] = column
   1.146 +            except:
   1.147 +                raise Exception("alignment columns are not marked up by numbers! Can't creat block from blocks data")
   1.148 +                exit()
   1.149 +
   1.150 +        for block_data in self.blocks_data:
   1.151 +            set_sequence_names = block_data[0]
   1.152 +            set_class_ids = block_data[1]
   1.153 +
   1.154 +            block_sequences = []
   1.155 +
   1.156 +            for sequence_name in set_sequence_names:
   1.157 +
   1.158 +                try:
   1.159 +                    s = dict_sequences[sequence_name]
   1.160 +                    block_sequences.append(s)
   1.161 +                except:
   1.162 +                    raise Exception("Sequence name %s from block data is not in alignment sequences!" % sequence_name)
   1.163 +                    exit()
   1.164 +
   1.165 +            block_columns = []
   1.166 +            for class_id in set_class_ids:
   1.167 +                column_number = self.columns[class_id]
   1.168 +                try:
   1.169 +                    block_columns.append(dict_columns[column_number])
   1.170 +                except:
   1.171 +                    raise Exception("Column number %s from block data is not in alignment columns numbers!" % column_number)
   1.172 +                    exit()
   1.173 +
   1.174 +            block = protein.Block.from_alignment(alignment, sequences = block_sequences, columns = block_columns)
   1.175 +            blocks.append(block)
   1.176 +
   1.177 +        return blocks
   1.178 \ No newline at end of file