Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/936463fb68b2
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 00:58:39 2012
Кодировка:
allpy: 936463fb68b2

allpy

changeset 506:936463fb68b2

fixed blocks3d() * fix bug in continuous_blocks it joined short batches to next batch * blocks3d(): put monomers only of boundary columns to the graph. * reduce size of wide cliques * optimization this needs better solution * corrected conversion clique -> blocks (result) * fixed self-intersection of result blocks (filled_columns) * fixed infinite loop * added temponary exclusion of shortest sequence * sorting of cliques by height and by width
author boris (kodomo) <bnagaev@gmail.com>
date Wed, 23 Feb 2011 18:13:56 +0300
parents edc167aa9989
children 85ccc9e8e6f2
files allpy/structure.py
diffstat 1 files changed, 89 insertions(+), 30 deletions(-) [+]
line diff
     1.1 --- a/allpy/structure.py	Wed Feb 23 12:08:47 2011 +0300
     1.2 +++ b/allpy/structure.py	Wed Feb 23 18:13:56 2011 +0300
     1.3 @@ -252,9 +252,12 @@
     1.4          result = []
     1.5          Block = self.__class__
     1.6          # for sorting
     1.7 -        key_column = lambda c: self.columns.index(c)
     1.8 -        key_sequence = lambda s: self.sequences.index(s)
     1.9 -        vertices = set(sum([seq for seq in self.sequences], []))
    1.10 +        column2pos = {}
    1.11 +        for i, column in enumerate(self.columns):
    1.12 +            column2pos[column] = i
    1.13 +        sequence2i = {}
    1.14 +        for i, sequence in enumerate(self.sequences):
    1.15 +            sequence2i[sequence] = i
    1.16          edges = {}
    1.17          monomer2column = {}
    1.18          monomer2sequence = {}
    1.19 @@ -262,6 +265,7 @@
    1.20              for sequence, monomer in column.items():
    1.21                  monomer2column[monomer] = column
    1.22                  monomer2sequence[monomer] = sequence
    1.23 +        parts = []
    1.24          for i, seq1 in enumerate(self.sequences):
    1.25              for j, seq2 in enumerate(self.sequences):
    1.26                  if i < j:
    1.27 @@ -271,36 +275,89 @@
    1.28                          timeout=timeout, minsize=min_width)
    1.29                      for core in cores:
    1.30                          core_block = copy(block)
    1.31 -                        core_block.columns = sorted(core, key=key_column)
    1.32 -                        for part in core_block.continuous_blocks(min_width):
    1.33 -                            monomers = part.monomers()
    1.34 -                            for m1 in monomers:
    1.35 -                                for m2 in monomers:
    1.36 -                                    edges
    1.37 -                                    edge = Graph.edge(m1, m2)
    1.38 -                                    edges[edge] = 1.0
    1.39 +                        core_block.columns = sorted(core, key=column2pos.get)
    1.40 +                        parts += core_block.continuous_blocks(min_width)
    1.41 +        boundaries = set() # of Columns
    1.42 +        for part in parts:
    1.43 +            boundaries.add(part.columns[0])
    1.44 +            boundaries.add(part.columns[-1])
    1.45 +        for part in parts:
    1.46 +            part.columns = sorted(set(part.columns) & boundaries, key=column2pos.get)
    1.47 +            monomers = part.monomers()
    1.48 +            for m1 in monomers:
    1.49 +                for m2 in monomers:
    1.50 +                    edge = Graph.edge(m1, m2)
    1.51 +                    edges[edge] = 1.0
    1.52 +        vertices = set(sum([list(edge) for edge in edges], []))
    1.53          graph = Graph(vertices, edges)
    1.54 -        cliques = graph.cliques(minsize=min_width*2, timeout=timeout_2)
    1.55 +        cliques = graph.cliques(minsize=4, timeout=timeout_2)
    1.56 +        clique_blocks = []
    1.57 +        # expand cliques to rectangles
    1.58 +        for clique in cliques:
    1.59 +            sequences = set(monomer2sequence[m] for m in clique)
    1.60 +            columns = set(monomer2column[m] for m in clique)
    1.61 +            columns = sorted(columns, key=column2pos.get)
    1.62 +            first_column_i = self.columns.index(columns[0])
    1.63 +            last_column_i = self.columns.index(columns[-1])
    1.64 +            block = copy(self)
    1.65 +            block.columns = self.columns[first_column_i:last_column_i]
    1.66 +            block.sequences = sequences
    1.67 +            clique_blocks.append(block)
    1.68 +        def cmp_blocks(block1, block2):
    1.69 +            height1 = len(block1.sequences)
    1.70 +            height2 = len(block2.sequences)
    1.71 +            if cmp(height1, height2):
    1.72 +                return cmp(height1, height2)
    1.73 +            width1 = len(block1.columns)
    1.74 +            width2 = len(block2.columns)
    1.75 +            return cmp(width1, width2)
    1.76 +        # sort blocks
    1.77 +        clique_blocks.sort(cmp=cmp_blocks, reverse=True)
    1.78          used_monomers = set()
    1.79 -        for clique in cliques:
    1.80 -            clique -= used_monomers
    1.81 -            while clique:
    1.82 -                sequences = set(monomer2sequence[m] for m in clique)
    1.83 -                sequences = sorted(sequences, key=key_sequence)
    1.84 -                columns = set(monomer2column[m] for m in clique)
    1.85 -                columns = sorted(columns, key=key_column)
    1.86 -                height = len(sequences)
    1.87 -                if height <= 1:
    1.88 +        for clique_block in clique_blocks:
    1.89 +            monomers = set(clique_block.monomers())
    1.90 +            monomers -= used_monomers
    1.91 +            result_len = len(result)
    1.92 +            while monomers:
    1.93 +                sequences = set(monomer2sequence[m] for m in monomers)
    1.94 +                columns = set(monomer2column[m] for m in monomers)
    1.95 +                sequences = sorted(sequences, key=sequence2i.get)
    1.96 +                columns = sorted(columns, key=column2pos.get)
    1.97 +
    1.98 +                block = copy(self)
    1.99 +                block.columns = columns
   1.100 +                block.sequences = sequences
   1.101 +                while len(sequences) > 1:
   1.102 +                    filled_columns = []
   1.103 +                    for c in columns:
   1.104 +                        for s in sequences:
   1.105 +                            if s not in c or c[s] not in monomers:
   1.106 +                                break
   1.107 +                        else:
   1.108 +                            filled_columns.append(c)
   1.109 +                    block = copy(self)
   1.110 +                    block.columns = filled_columns
   1.111 +                    block.sequences = sequences
   1.112 +                    parts = block.continuous_blocks(min_width)
   1.113 +                    if parts:
   1.114 +                        for part in parts:
   1.115 +                            used_monomers |= set(part.monomers())
   1.116 +                            monomers -= set(part.monomers())
   1.117 +                            result.append(part)
   1.118 +                        break
   1.119 +                    else:
   1.120 +                        #remove sequence
   1.121 +                        number_of_columns = {}
   1.122 +                        for sequence in sequences:
   1.123 +                            number_of_columns[sequence] =\
   1.124 +                                len(set(sequence) & monomers)
   1.125 +                        _rev = dict([(v,k) for (k,v) in number_of_columns.items()])
   1.126 +                        victim = _rev[min(_rev.keys())]
   1.127 +                        sequences.remove(sequence)
   1.128 +                if len(result) == result_len:
   1.129                      break
   1.130 -                filled_columns = [c for c in columns
   1.131 -                    if len(clique & set(c.values())) == height]
   1.132 -                block = copy(self)
   1.133 -                block.columns = filled_columns
   1.134 -                block.sequences = sequences
   1.135 -                for part in block.continuous_blocks(min_width):
   1.136 -                    clique -= set(part.monomers())
   1.137 -                    used_monomers |= set(part.monomers())
   1.138 -                    result.append(part)
   1.139 +                else:
   1.140 +                    result_len = len(result)
   1.141          return result
   1.142  
   1.143      def continuous_blocks(self, min_width=1):
   1.144 @@ -316,6 +373,8 @@
   1.145                  block.columns = columns_batch
   1.146                  columns_batch = []
   1.147                  result.append(block)
   1.148 +            else:
   1.149 +                columns_batch = []
   1.150          if len(columns_batch) >= min_width:
   1.151              block = copy(self)
   1.152              block.columns = columns_batch