allpy
changeset 506:936463fb68b2
fixed blocks3d()
* fix bug in continuous_blocks
it joined short batches to next batch
* blocks3d(): put monomers only of boundary columns
to the graph.
* reduce size of wide cliques
* optimization
this needs better solution
* corrected conversion clique -> blocks (result)
* fixed self-intersection of result blocks
(filled_columns)
* fixed infinite loop
* added temponary exclusion of shortest sequence
* sorting of cliques by height and by width
author | boris (kodomo) <bnagaev@gmail.com> |
---|---|
date | Wed, 23 Feb 2011 18:13:56 +0300 |
parents | edc167aa9989 |
children | 85ccc9e8e6f2 |
files | allpy/structure.py |
diffstat | 1 files changed, 89 insertions(+), 30 deletions(-) [+] |
line diff
1.1 --- a/allpy/structure.py Wed Feb 23 12:08:47 2011 +0300 1.2 +++ b/allpy/structure.py Wed Feb 23 18:13:56 2011 +0300 1.3 @@ -252,9 +252,12 @@ 1.4 result = [] 1.5 Block = self.__class__ 1.6 # for sorting 1.7 - key_column = lambda c: self.columns.index(c) 1.8 - key_sequence = lambda s: self.sequences.index(s) 1.9 - vertices = set(sum([seq for seq in self.sequences], [])) 1.10 + column2pos = {} 1.11 + for i, column in enumerate(self.columns): 1.12 + column2pos[column] = i 1.13 + sequence2i = {} 1.14 + for i, sequence in enumerate(self.sequences): 1.15 + sequence2i[sequence] = i 1.16 edges = {} 1.17 monomer2column = {} 1.18 monomer2sequence = {} 1.19 @@ -262,6 +265,7 @@ 1.20 for sequence, monomer in column.items(): 1.21 monomer2column[monomer] = column 1.22 monomer2sequence[monomer] = sequence 1.23 + parts = [] 1.24 for i, seq1 in enumerate(self.sequences): 1.25 for j, seq2 in enumerate(self.sequences): 1.26 if i < j: 1.27 @@ -271,36 +275,89 @@ 1.28 timeout=timeout, minsize=min_width) 1.29 for core in cores: 1.30 core_block = copy(block) 1.31 - core_block.columns = sorted(core, key=key_column) 1.32 - for part in core_block.continuous_blocks(min_width): 1.33 - monomers = part.monomers() 1.34 - for m1 in monomers: 1.35 - for m2 in monomers: 1.36 - edges 1.37 - edge = Graph.edge(m1, m2) 1.38 - edges[edge] = 1.0 1.39 + core_block.columns = sorted(core, key=column2pos.get) 1.40 + parts += core_block.continuous_blocks(min_width) 1.41 + boundaries = set() # of Columns 1.42 + for part in parts: 1.43 + boundaries.add(part.columns[0]) 1.44 + boundaries.add(part.columns[-1]) 1.45 + for part in parts: 1.46 + part.columns = sorted(set(part.columns) & boundaries, key=column2pos.get) 1.47 + monomers = part.monomers() 1.48 + for m1 in monomers: 1.49 + for m2 in monomers: 1.50 + edge = Graph.edge(m1, m2) 1.51 + edges[edge] = 1.0 1.52 + vertices = set(sum([list(edge) for edge in edges], [])) 1.53 graph = Graph(vertices, edges) 1.54 - cliques = graph.cliques(minsize=min_width*2, timeout=timeout_2) 1.55 + cliques = graph.cliques(minsize=4, timeout=timeout_2) 1.56 + clique_blocks = [] 1.57 + # expand cliques to rectangles 1.58 + for clique in cliques: 1.59 + sequences = set(monomer2sequence[m] for m in clique) 1.60 + columns = set(monomer2column[m] for m in clique) 1.61 + columns = sorted(columns, key=column2pos.get) 1.62 + first_column_i = self.columns.index(columns[0]) 1.63 + last_column_i = self.columns.index(columns[-1]) 1.64 + block = copy(self) 1.65 + block.columns = self.columns[first_column_i:last_column_i] 1.66 + block.sequences = sequences 1.67 + clique_blocks.append(block) 1.68 + def cmp_blocks(block1, block2): 1.69 + height1 = len(block1.sequences) 1.70 + height2 = len(block2.sequences) 1.71 + if cmp(height1, height2): 1.72 + return cmp(height1, height2) 1.73 + width1 = len(block1.columns) 1.74 + width2 = len(block2.columns) 1.75 + return cmp(width1, width2) 1.76 + # sort blocks 1.77 + clique_blocks.sort(cmp=cmp_blocks, reverse=True) 1.78 used_monomers = set() 1.79 - for clique in cliques: 1.80 - clique -= used_monomers 1.81 - while clique: 1.82 - sequences = set(monomer2sequence[m] for m in clique) 1.83 - sequences = sorted(sequences, key=key_sequence) 1.84 - columns = set(monomer2column[m] for m in clique) 1.85 - columns = sorted(columns, key=key_column) 1.86 - height = len(sequences) 1.87 - if height <= 1: 1.88 + for clique_block in clique_blocks: 1.89 + monomers = set(clique_block.monomers()) 1.90 + monomers -= used_monomers 1.91 + result_len = len(result) 1.92 + while monomers: 1.93 + sequences = set(monomer2sequence[m] for m in monomers) 1.94 + columns = set(monomer2column[m] for m in monomers) 1.95 + sequences = sorted(sequences, key=sequence2i.get) 1.96 + columns = sorted(columns, key=column2pos.get) 1.97 + 1.98 + block = copy(self) 1.99 + block.columns = columns 1.100 + block.sequences = sequences 1.101 + while len(sequences) > 1: 1.102 + filled_columns = [] 1.103 + for c in columns: 1.104 + for s in sequences: 1.105 + if s not in c or c[s] not in monomers: 1.106 + break 1.107 + else: 1.108 + filled_columns.append(c) 1.109 + block = copy(self) 1.110 + block.columns = filled_columns 1.111 + block.sequences = sequences 1.112 + parts = block.continuous_blocks(min_width) 1.113 + if parts: 1.114 + for part in parts: 1.115 + used_monomers |= set(part.monomers()) 1.116 + monomers -= set(part.monomers()) 1.117 + result.append(part) 1.118 + break 1.119 + else: 1.120 + #remove sequence 1.121 + number_of_columns = {} 1.122 + for sequence in sequences: 1.123 + number_of_columns[sequence] =\ 1.124 + len(set(sequence) & monomers) 1.125 + _rev = dict([(v,k) for (k,v) in number_of_columns.items()]) 1.126 + victim = _rev[min(_rev.keys())] 1.127 + sequences.remove(sequence) 1.128 + if len(result) == result_len: 1.129 break 1.130 - filled_columns = [c for c in columns 1.131 - if len(clique & set(c.values())) == height] 1.132 - block = copy(self) 1.133 - block.columns = filled_columns 1.134 - block.sequences = sequences 1.135 - for part in block.continuous_blocks(min_width): 1.136 - clique -= set(part.monomers()) 1.137 - used_monomers |= set(part.monomers()) 1.138 - result.append(part) 1.139 + else: 1.140 + result_len = len(result) 1.141 return result 1.142 1.143 def continuous_blocks(self, min_width=1): 1.144 @@ -316,6 +373,8 @@ 1.145 block.columns = columns_batch 1.146 columns_batch = [] 1.147 result.append(block) 1.148 + else: 1.149 + columns_batch = [] 1.150 if len(columns_batch) >= min_width: 1.151 block = copy(self) 1.152 block.columns = columns_batch