Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/0ef1a722b738
Дата изменения: Unknown
Дата индексирования: Mon Oct 1 23:57:54 2012
Кодировка: UTF-8
allpy: 0ef1a722b738

allpy

changeset 116:0ef1a722b738

lib: graph.py -- class Graph with clique builbing functions
author boris <bnagaev@gmail.com>
date Sat, 23 Oct 2010 20:29:40 +0400
parents 7f8922db4e55
children fd7371f14c6d
files lib/block.py lib/config.py lib/graph.py
diffstat 3 files changed, 270 insertions(+), 1 deletions(-) [+]
line diff
     1.1 --- a/lib/block.py	Sat Oct 23 13:19:10 2010 +0400
     1.2 +++ b/lib/block.py	Sat Oct 23 20:29:40 2010 +0400
     1.3 @@ -5,6 +5,7 @@
     1.4  import project
     1.5  import sequence
     1.6  import monomer
     1.7 +import config
     1.8  
     1.9  class Block(object):
    1.10      """
    1.11 @@ -14,7 +15,7 @@
    1.12          and/or gaps, that constitute the block
    1.13      *   self.positions -- sorted list of positions of the project.alignment that
    1.14          are included in the block
    1.15 -
    1.16 +    
    1.17      How to create a new block:
    1.18      >>> import project
    1.19      >>> import block
    1.20 @@ -55,3 +56,15 @@
    1.21                      out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
    1.22              else:
    1.23                  out_file.write("%s \n" % string)
    1.24 +    
    1.25 +    def geometrical_core(self, delta=config.delta):
    1.26 +        """
    1.27 +        returns sorted list of positions, representing geometrical core
    1.28 +        delta -- threshold of distance spreading
    1.29 +        """
    1.30 +        for i in self.positions:
    1.31 +            for j in self.positions:
    1.32 +                pass
    1.33 +                
    1.34 +                
    1.35 +                
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/lib/config.py	Sat Oct 23 20:29:40 2010 +0400
     2.3 @@ -0,0 +1,4 @@
     2.4 +
     2.5 +delta = 2.0 # for geometrical core building
     2.6 +
     2.7 +
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/lib/graph.py	Sat Oct 23 20:29:40 2010 +0400
     3.3 @@ -0,0 +1,252 @@
     3.4 +# -*- coding: utf-8 -*- 
     3.5 +
     3.6 +from datetime import datetime
     3.7 +from copy import copy
     3.8 +
     3.9 +class TimeoutError(Exception):
    3.10 +    pass
    3.11 +
    3.12 +
    3.13 +
    3.14 +class Graph(object):
    3.15 +    """
    3.16 +    Data:
    3.17 +    nodes -- set of elements
    3.18 +    lines -- {line: cost}. 
    3.19 +        line is frozenset([e1, e2])
    3.20 +        cost is float in (0, 1] or 1 (if all lines are equal)
    3.21 +        
    3.22 +    >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1})
    3.23 +    >>> g.fast_cliques()
    3.24 +    Fast algorithm started
    3.25 +    [frozenset([1, 2]), frozenset([3])]
    3.26 +    >>> g = Graph(set([1,2,3,4]), {frozenset([1,2]): 0.98, frozenset([1,3]): 0.98,
    3.27 +    ... frozenset([2,3]): 0.1})
    3.28 +    >>> g.fast_cliques()
    3.29 +    Fast algorithm started
    3.30 +    [frozenset([1, 2, 3]), frozenset([4])]
    3.31 +    >>> g.bron_kerbosh()
    3.32 +    Bron and Kerbosh algorithm started
    3.33 +    [frozenset([1, 2, 3]), frozenset([4])]
    3.34 +    """
    3.35 +    
    3.36 +    def __init__(self, nodes=None, lines=None):
    3.37 +        if not nodes:
    3.38 +            nodes = set()
    3.39 +        if not lines:
    3.40 +            lines = dict()
    3.41 +        self.nodes = nodes
    3.42 +        self.lines = lines
    3.43 +    
    3.44 +    def bounded(self, k1, k2):
    3.45 +        return k1 == k2 or frozenset([k1, k2]) in self.lines
    3.46 +    
    3.47 +    def count_one(self, node):
    3.48 +        return len([node1 for node1 in self.nodes if \
    3.49 +        frozenset([node, node1]) in self.lines])
    3.50 +    
    3.51 +    def cost_one(self, node):
    3.52 +        return sum([self.lines[frozenset([node, node1])] for \
    3.53 +        node1 in self.nodes if frozenset([node, node1]) in self.lines])
    3.54 +    
    3.55 +    def count_all(self):
    3.56 +        c = dict([(node, 0) for node in self.nodes])
    3.57 +        for line in self.lines:
    3.58 +            for node in line:
    3.59 +                c[node] += 1
    3.60 +        return c
    3.61 +    
    3.62 +    
    3.63 +    def drop_node(self, node):
    3.64 +        for node1 in self.nodes:
    3.65 +            self.lines.pop(frozenset([node, node1]), None)
    3.66 +        self.nodes.discard(node)
    3.67 +    
    3.68 +    def add_node(self, node, parent_graph):
    3.69 +        self.nodes.add(node)
    3.70 +        for node1 in self.nodes:
    3.71 +            line = frozenset([node, node1])
    3.72 +            if line in parent_graph.lines:
    3.73 +                self.lines[line] = parent_graph.lines              
    3.74 +    
    3.75 +    def drop_nodes(self, nodes):
    3.76 +        for node in nodes:
    3.77 +            self.drop_node(node)
    3.78 +        return bool(nodes)
    3.79 +    
    3.80 +    def drop_if_count(self, minsize):
    3.81 +        while True:
    3.82 +            if not self.drop_nodes([node for (node, count) in \
    3.83 +            self.nodes.count_all().items() if count < minsize]):
    3.84 +                break
    3.85 +     
    3.86 +    def bron_kerbosh(self, timeout=-1, minsize=1):
    3.87 +        """
    3.88 +        returns list of cliques
    3.89 +        clique is frozenset
    3.90 +        if timeout=-1, it means infinity
    3.91 +        if timeout has happened, raises TimeoutError
    3.92 +        
    3.93 +        lava flow
    3.94 +        """
    3.95 +        print 'Bron and Kerbosh algorithm started'
    3.96 +        cliques = []
    3.97 +        
    3.98 +        depth = 0
    3.99 +        list_candidates = [copy(self.nodes)]
   3.100 +        list_used = [set()]
   3.101 +        compsub = []
   3.102 +        
   3.103 +        start_time = datetime.now()
   3.104 +    
   3.105 +        while True: # ПОКА...
   3.106 +            if depth == -1:
   3.107 +                break  # ВСЕ! Все рекурсии (итерации) пройдены
   3.108 +            candidates = copy(list_candidates[depth])
   3.109 +            used = copy(list_used[depth])       
   3.110 +            if not candidates: # ПОКА candidates НЕ пусто
   3.111 +                depth -= 1
   3.112 +                if compsub:
   3.113 +                    compsub.pop()                
   3.114 +                continue
   3.115 +            
   3.116 +            # И used НЕ содержит вершины, СОЕДИНЕННОЙ СО ВСЕМИ вершинами из candidates
   3.117 +            # (все из used НЕ соединены хотя бы с 1 из candidates)
   3.118 +            used_candidates = False
   3.119 +        
   3.120 +            for used1 in used:
   3.121 +                for candidates1 in candidates:
   3.122 +                    if not self.bounded(used1, candidates1):
   3.123 +                        break
   3.124 +                else:
   3.125 +                    used_candidates = True             
   3.126 +
   3.127 +            if used_candidates:
   3.128 +                depth -= 1
   3.129 +
   3.130 +                if compsub:
   3.131 +                    compsub.pop()
   3.132 +                continue
   3.133 +            
   3.134 +            # Выбираем вершину v из candidates и добавляем ее в compsub
   3.135 +            v = candidates.pop()
   3.136 +            candidates.add(v)
   3.137 +            compsub.append(v)
   3.138 +            # Формируем new_candidates и new_used, удаляя из candidates и used вершины, НЕ соединенные с v
   3.139 +            # (то есть, оставляя только соединенные с v)
   3.140 +            new_candidates = set()
   3.141 +            for candidates1 in candidates:
   3.142 +                if self.bounded(candidates1, v) and candidates1 != v:
   3.143 +                    new_candidates.add(candidates1)
   3.144 +
   3.145 +            new_used = set()
   3.146 +            for used1 in used:
   3.147 +                if self.bounded(used1, v) and used1 != v:
   3.148 +                    new_used.add(used1)
   3.149 +            
   3.150 +            # Удаляем v из candidates и помещаем в used
   3.151 +            list_candidates[depth].remove(v)    
   3.152 +            list_used[depth].add(v)
   3.153 +            # ЕСЛИ new_candidates и new_used пусты
   3.154 +            if not new_candidates and not new_used:
   3.155 +                # compsub ? клика
   3.156 +                if len(compsub) >= minsize:
   3.157 +                    cliques.append(frozenset(compsub))
   3.158 +            else:
   3.159 +                # ИНАЧЕ рекурсивно вызываем bron_kerbosh(new_candidates, new_used)
   3.160 +                depth += 1
   3.161 +
   3.162 +                # TIMEOUT check start
   3.163 +                if timeout != -1:
   3.164 +                    if datetime.now() - start_time > timeout:
   3.165 +                        raise TimeoutError
   3.166 +                # TIMEOUT check end
   3.167 +                
   3.168 +                if depth >= len(list_candidates):
   3.169 +                    list_candidates.append(set())
   3.170 +                    list_used.append(set())
   3.171 +
   3.172 +                list_candidates[depth] = copy(new_candidates)
   3.173 +                list_used[depth] = copy(new_used)
   3.174 +                
   3.175 +                continue
   3.176 +            
   3.177 +            # Удаляем v из compsub
   3.178 +            if compsub:
   3.179 +                compsub.pop()
   3.180 +                
   3.181 +        return cliques
   3.182 +
   3.183 +    
   3.184 +    def fast_cliques(self, minsize=1):
   3.185 +        """
   3.186 +        returns list of cliques
   3.187 +        clique is frozenset
   3.188 +        """
   3.189 +        print 'Fast algorithm started'
   3.190 +        cliques = []
   3.191 +        
   3.192 +        while True:
   3.193 +            graph = Graph(copy(self.nodes), copy(self.lines))
   3.194 +            for clique in cliques:
   3.195 +                graph.drop_nodes(clique)
   3.196 +            if not graph.nodes or len(cliques) == 2:
   3.197 +                break
   3.198 +            
   3.199 +            while True: 
   3.200 +                # drop nodes, while its is possible
   3.201 +                c = graph.count_all()
   3.202 +                min_count = min(c.values())
   3.203 +                bad_nodes = [node for (node, count) in c.items() if count == min_count]
   3.204 +                if len(bad_nodes) == len(graph.nodes):
   3.205 +                    break
   3.206 +                
   3.207 +                costs = dict([(node, graph.cost_one(node)) for node in bad_nodes])
   3.208 +                min_cost = min(costs.values())
   3.209 +                for node, cost in costs.items():
   3.210 +                    if cost == min_cost:
   3.211 +                        graph.drop_node(node)
   3.212 +            
   3.213 +            while True:
   3.214 +                # add nodes, while its is possible
   3.215 +                candidats = {}
   3.216 +                for node in self.nodes:
   3.217 +                    if len([i for i in graph.nodes if frozenset([node, i]) in \
   3.218 +                    self.lines]) == len(self.nodes): # perl has me
   3.219 +                        graph1 = Graph(copy(graph.nodes), copy(graph.lines))
   3.220 +                        graph1.add_node(node, self)
   3.221 +                        candidats[node] = graph1.cost_one(node)
   3.222 +                if not candidats:
   3.223 +                    break
   3.224 +                
   3.225 +                max_cost = max(candidats.values())
   3.226 +                node = [node for (node, cost) in candidats.items() if cost == max_cost][0]
   3.227 +                graph.add_node(node, self)
   3.228 +                
   3.229 +            cliques.append(frozenset(graph.nodes))
   3.230 +            
   3.231 +        return cliques
   3.232 +        
   3.233 +    
   3.234 +    def cliques(self, timeout=-1, minsize=1):
   3.235 +        """
   3.236 +        returns length-sorted list of cliques
   3.237 +        clique is frozenset
   3.238 +        
   3.239 +        can change self!
   3.240 +        
   3.241 +        try to execute bron_kerbosh
   3.242 +        if it raises TimeoutError, executes fast_cliques
   3.243 +        """
   3.244 +        
   3.245 +        drop_if_count(minsize)
   3.246 +        
   3.247 +        try:
   3.248 +            cliques = bron_kerbosh(timeout, minsize)
   3.249 +        except TimeoutError:
   3.250 +            cliques = fast_cliques(minsize)
   3.251 +        return sorted(cliques, key=lambda clique: len(clique), reverse=True)
   3.252 +
   3.253 +if __name__ == "__main__":
   3.254 +    import doctest
   3.255 +    doctest.testmod()