allpy
changeset 702:1ce3b0e42ba5
alignment_blocks method added to homology and tested. Blocks found in monomer homology file can be now returned to alignment blocks
author | Andrei <aba@belozersky.msu.ru> |
---|---|
date | Thu, 07 Jul 2011 12:53:42 +0400 |
parents | bf97a2bc9d08 |
children | eb2a902b1e0a |
files | allpy/homology.py |
diffstat | 1 files changed, 89 insertions(+), 18 deletions(-) [+] |
line diff
1.1 --- a/allpy/homology.py Wed Jul 06 18:05:51 2011 +0400 1.2 +++ b/allpy/homology.py Thu Jul 07 12:53:42 2011 +0400 1.3 @@ -1,3 +1,5 @@ 1.4 +from allpy import protein 1.5 + 1.6 class MonomerHomology(object): 1.7 """ Essentially, alignment is a set of monomer homology classes 1.8 Object of type MonomerHomology desribes an alignment in terms of monomer homology 1.9 @@ -10,12 +12,23 @@ 1.10 1.11 DATA: 1.12 monomer_ids = {monomer_id:class_id} 1.13 + 1.14 classes = {class_id:monomer_id) 1.15 monomer_id = (sequence_name, monomer_number) 1.16 class_id = string 1.17 + 1.18 columns = {class_id:column_number} optional 1.19 - blocks = [ (block_number, class_id, column_number), ...] 1.20 1.21 + blocks_data = [block_data, ...] 1.22 + block_data = (sequence_names,class_ids) 1.23 + sequence_names = set( [sequence_1_name,...] ) 1.24 + class_ids = = set( [class_1_id, ...] ) 1.25 + 1.26 + BLOCK is assigned to an alignment (currently) 1.27 + Here block is not a block of an alignment, but a set of data to create such block 1.28 + On the other hand, block consists of class_ids (not columns) to have possibility to get monomer_id 1.29 + Thus, block reaarengement must be accomplished with classes reassignments! 1.30 + Not very good, but currently is made as above described 1.31 1.32 METHODS: 1.33 + .read(file_name) 1.34 @@ -23,12 +36,14 @@ 1.35 + .write_class(file,list_of_monomer_ids,monomer_homology_class_id) 1.36 + .write_block(file, block) 1.37 + .compare_with(class) 1.38 + .highest_blocks() 1.39 + .alignment_blocks(alignment) 1.40 """ 1.41 def __init__(self, next_class_id = 1): 1.42 self.classes = {} 1.43 self.monomer_ids = {} 1.44 self.columns = {} 1.45 - self.blocks = [] 1.46 + self.blocks_data = [] 1.47 self.next_class_id = next_class_id 1.48 1.49 1.50 @@ -224,26 +239,28 @@ 1.51 set_sequences.add(x[0]) 1.52 return set_sequences 1.53 1.54 - def _add_to_block(self, i, block_number, class_id, column_number): 1.55 - """ Add an entry to a class_ids 1.56 - """ 1.57 - self.blocks[i] = (block_number, class_id, column_number) 1.58 - 1.59 1.60 def highest_blocks(self): 1.61 """ Makes blocks of maximal possible heights from monomer homology classes 1.62 block = set of homology classes with the same set of sequences 1.63 block MUST BE marked up by column numbers 1.64 1.65 + blocks_data = [block_data, ...] 1.66 + block_data = (sequence_names,class_ids) 1.67 + sequence_names = set( [sequence_1_name,...] ) 1.68 + class_ids = = set( [class_1_id, ...] ) 1.69 + 1.70 + 1.71 RETURN: self.blocks 1.72 1.73 """ 1.74 - self.blocks = list(self.classes) 1.75 - self.blocks.sort() 1.76 + class_ids = list(self.classes) 1.77 + class_ids.sort() 1.78 + self.blocks_data = [] 1.79 block_number = 0 1.80 1.81 - for i in range(len(self.blocks)): 1.82 - class_id_1 = self.blocks[i] 1.83 + for i in range(len(class_ids)): 1.84 + class_id_1 = class_ids[i] 1.85 1.86 if type(class_id_1) == type(tuple()): 1.87 continue 1.88 @@ -252,28 +269,82 @@ 1.89 try: 1.90 column_number = self.columns[class_id_1] 1.91 except: 1.92 - raise Exception("Fail to get column_number! Homology classes must be marked to creats hiest blocks") 1.93 + raise Exception("Fail to get column_number! Homology classes must be marked to creats highest blocks") 1.94 1.95 - self._add_to_block(i, block_number, class_id_1, column_number) 1.96 + # NEW BLOCK 1.97 1.98 set_sequences_1 = self._sequences_of_class(self.classes[class_id_1]) 1.99 + 1.100 + self.blocks_data.append( (set_sequences_1, set([class_id_1]) ) ) 1.101 + class_ids[i] = (class_id_1,i) 1.102 1.103 - for j in range(i+1,len(self.blocks)): 1.104 - class_id_2 = self.blocks[j] 1.105 + # ADD classes to BLOCK 1.106 + for j in range(i+1,len(class_ids)): 1.107 + class_id_2 = class_ids[j] 1.108 1.109 if type(class_id_2) == type(tuple()): 1.110 continue 1.111 1.112 set_sequences_2 = self._sequences_of_class(self.classes[class_id_2]) 1.113 1.114 - 1.115 if set_sequences_2 == set_sequences_1: 1.116 try: 1.117 column_number = self.columns[class_id_2] 1.118 except: 1.119 raise Exception("Fail to get column_number! Homology classes must be marked to creats hiest blocks") 1.120 - self._add_to_block(j, block_number, class_id_2, column_number) 1.121 + class_ids[j] = (class_id_2, j) 1.122 + self.blocks_data[len(self.blocks_data)-1][1].add( class_id_2) 1.123 1.124 - return self.blocks 1.125 + return self.blocks_data 1.126 1.127 + def alignment_blocks(self, alignment): 1.128 + """ From homology.blocks creates a list of alignment blocks (accessible for visualization) 1.129 1.130 + blocks_data = [block_data, ...] 1.131 + block_data = (sequence_names,class_ids) 1.132 + sequence_names = set( [sequence_1_name,...] ) 1.133 + class_ids = = set( [class_1_id, ...] ) 1.134 + RETURN blocks, list of blocks in alignment 1.135 + 1.136 + """ 1.137 + blocks = [] 1.138 + dict_sequences = {} 1.139 + for sequence in alignment.sequences: 1.140 + dict_sequences[sequence.name]=sequence 1.141 + 1.142 + dict_columns = {} 1.143 + for column in alignment.columns: 1.144 + try: 1.145 + dict_columns[alignment.markups['number'][column]] = column 1.146 + except: 1.147 + raise Exception("alignment columns are not marked up by numbers! Can't creat block from blocks data") 1.148 + exit() 1.149 + 1.150 + for block_data in self.blocks_data: 1.151 + set_sequence_names = block_data[0] 1.152 + set_class_ids = block_data[1] 1.153 + 1.154 + block_sequences = [] 1.155 + 1.156 + for sequence_name in set_sequence_names: 1.157 + 1.158 + try: 1.159 + s = dict_sequences[sequence_name] 1.160 + block_sequences.append(s) 1.161 + except: 1.162 + raise Exception("Sequence name %s from block data is not in alignment sequences!" % sequence_name) 1.163 + exit() 1.164 + 1.165 + block_columns = [] 1.166 + for class_id in set_class_ids: 1.167 + column_number = self.columns[class_id] 1.168 + try: 1.169 + block_columns.append(dict_columns[column_number]) 1.170 + except: 1.171 + raise Exception("Column number %s from block data is not in alignment columns numbers!" % column_number) 1.172 + exit() 1.173 + 1.174 + block = protein.Block.from_alignment(alignment, sequences = block_sequences, columns = block_columns) 1.175 + blocks.append(block) 1.176 + 1.177 + return blocks 1.178 \ No newline at end of file