allpy
changeset 1043:116f5bfc39b8
blocks_finder back to 1.4.0 and contains compatibility crutches
author | Boris Burkov <BurkovBA@gmail.com> |
---|---|
date | Tue, 20 Mar 2012 22:08:55 +0400 |
parents | ed1f4f87034f |
children | 370678689947 |
files | allpy/base.py sequence_based_blocks_search/blocks_finder.py |
diffstat | 2 files changed, 67 insertions(+), 11 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Fri Oct 28 20:40:55 2011 +0300 1.2 +++ b/allpy/base.py Tue Mar 20 22:08:55 2012 +0400 1.3 @@ -507,6 +507,7 @@ 1.4 """Return hash by identity.""" 1.5 return id(self) 1.6 1.7 + 1.8 class Block(Alignment): 1.9 """Block of alignment. 1.10
2.1 --- a/sequence_based_blocks_search/blocks_finder.py Fri Oct 28 20:40:55 2011 +0300 2.2 +++ b/sequence_based_blocks_search/blocks_finder.py Tue Mar 20 22:08:55 2012 +0400 2.3 @@ -4,11 +4,46 @@ 2.4 sys.path.append("../") 2.5 from allpy import protein 2.6 from allpy.homology import * 2.7 +from allpy import data 2.8 +from allpy import base 2.9 import copy 2.10 import math 2.11 from functional_groups import functional_groups 2.12 from allpy.util import * 2.13 2.14 +class SbbsProteinMonomer(protein.Monomer): 2.15 + def __eq__(self, other): 2.16 + return self is other 2.17 + 2.18 + def __ne__(self, other): 2.19 + return not self is other 2.20 + 2.21 +SbbsProteinMonomer._initialize(data.codes.protein) 2.22 + 2.23 +class SbbsProteinAlignment(protein.Alignment): 2.24 + def rows_as_strings(self, gaps='-'): 2.25 + rows = [] 2.26 + for sequence in self.sequences: 2.27 + string = "" 2.28 + for column in self.columns: 2.29 + if sequence in column: 2.30 + if hasattr(column[sequence], "input_code1"): 2.31 + string += column[sequence].input_code1 2.32 + else: 2.33 + string += column[sequence].code1 2.34 + else: 2.35 + string += gap 2.36 + string = util.UserString(string) 2.37 + string.sequence = sequence 2.38 + rows.append(string) 2.39 + return rows 2.40 + 2.41 +class SbbsProteinColumn(base.Column): 2.42 + def __eq__(self, other): 2.43 + return self is other 2.44 + 2.45 +vars(base)['Column'] = SbbsProteinColumn 2.46 + 2.47 class Preblock(object): 2.48 """Attributes: 2.49 first_column_index=self.alignment.columns[column1] 2.50 @@ -51,7 +86,10 @@ 2.51 2.52 2.53 def main(file): 2.54 - alignment = protein.Alignment().append_file(file) 2.55 + try: 2.56 + alignment = SbbsProteinAlignment().append_file(file) 2.57 + except Exception as E: 2.58 + raise Exception("Failed to read alignment\nCheck alignment format") 2.59 groups_content = find_groups_content(alignment, functional_groups) 2.60 find_links(groups_content, alignment) 2.61 blocks = mark_blocks(alignment) 2.62 @@ -88,7 +126,7 @@ 2.63 addition_to_candidate_preblocks.append(Preblock(index_of_column, index_of_column, copy.copy(groups_content[column][group_name]), functional_groups[group_name]["70%"], functional_groups[group_name]["100%"], 0.0, alignment)) 2.64 except KeyError as ke: 2.65 print "Found non-canonical aminoacid: %s"%(ke) 2.66 - #create new preblocks as combinations of the old ones 2.67 + #create new preblocks as combinations of the old ones 2.68 try: 2.69 for preblock in candidate_preblocks: 2.70 preblock_extensions = [] 2.71 @@ -117,7 +155,7 @@ 2.72 overlapping_preblocks=[] 2.73 for preblock in candidate_preblocks: 2.74 if preblock.alignment.columns[preblock.last_column_index] is column and preblock.calculate_score() > calculate_threshold(len(preblock.sequences), len(alignment.sequences), functional_groups): 2.75 - create_links(preblock, alignment) #links are not cliques, just connected components 2.76 + create_links(preblock, alignment) #links are not cliques, just connected components 2.77 overlapping_preblocks+=accept_and_return_overlapping_preblocks_before(preblock, alignment, candidate_preblocks) 2.78 if preblock not in overlapping_preblocks: new_candidate_preblocks.append(preblock) 2.79 candidate_preblocks = new_candidate_preblocks 2.80 @@ -218,7 +256,7 @@ 2.81 inconserved positions, create the transition matrix, calculate 2.82 its eigenvectors and eigenvalues, round the weights, find coefficient 2.83 of [1,0,0,...] decomposition in the basis of eigenvectors. 2.84 - 2.85 + 2.86 2.87 """ 2.88 2.89 @@ -244,13 +282,13 @@ 2.90 # column[iterated_sequence].links=[sequence] 2.91 # else: 2.92 # if sequence not in column[iterated_sequence].links: column[iterated_sequence].links.append(sequence) 2.93 - 2.94 + 2.95 if "links" not in column[any_sequence].__dict__: 2.96 column[any_sequence].links=[] 2.97 remaining_sequences = copy.copy(preblock.sequences) 2.98 remaining_sequences.remove(any_sequence) 2.99 for sequence in preblock.sequences:#links are bidirectional 2.100 - if sequence not in column[any_sequence].links: 2.101 + if sequence not in column[any_sequence].links: 2.102 column[any_sequence].links.append(sequence) 2.103 if "links" not in column[sequence].__dict__: 2.104 column[sequence].links=[any_sequence] 2.105 @@ -304,8 +342,8 @@ 2.106 #print index_of_column, [gblock.columns for gblock in blocks_in_this_column] 2.107 if alignment.columns.index(column) == len(alignment.columns)-1: output+=blocks_in_this_column 2.108 return output 2.109 - 2.110 - 2.111 + 2.112 + 2.113 def find_connected_component(sequences, column): 2.114 """output is a list of sequences""" 2.115 #find one sequence that has blocks 2.116 @@ -326,7 +364,7 @@ 2.117 if another_sequence not in queue and another_sequence not in output and another_sequence in sequences: 2.118 queue.append(another_sequence) 2.119 return output 2.120 - 2.121 + 2.122 2.123 def delete_links(alignment): 2.124 for sequence in alignment.sequences: 2.125 @@ -334,7 +372,7 @@ 2.126 if "links" in monomer.__dict__: 2.127 del monomer.links 2.128 2.129 -#[gblock for gblock in blocks_in_previous_column for gcolumn in gblock.columns if alignment.columns.index(gcolumn)==147] 2.130 +#[gblock for gblock in blocks_in_previous_column for gcolumn in gblock.columns if alignment.columns.index(gcolumn)==147] 2.131 2.132 def create_monomer_homology(alignment): 2.133 """returns MonomerHomology object, given alignment 2.134 @@ -408,4 +446,21 @@ 2.135 2.136 2.137 if __name__== '__main__': 2.138 - main(open(sys.argv[1])) 2.139 + input_file = open(sys.argv[1]) 2.140 + blocks = main(input_file) 2.141 + if blocks == []: 2.142 + sys.exit() 2.143 + aim = blocks[0].alignment.add_markup('index') #create markup 2.144 + file_content="" 2.145 + for index, block in enumerate(blocks): 2.146 + for sequence in block.sequences: 2.147 + file_content+=str(str(sys.argv[1]))+"\t"+str(index)+"\t"+str(sequence.name)+"\t" 2.148 + buffer="" 2.149 + for column in block.columns: 2.150 + buffer+=str(aim[column])+" " 2.151 + if (len(buffer)!=0): buffer=buffer[:-1] 2.152 + file_content+=buffer+"\n" 2.153 + 2.154 + output_file = open(sys.argv[2], 'w') 2.155 + output_file.write(file_content) 2.156 +