allpy
changeset 116:0ef1a722b738
lib: graph.py -- class Graph with clique builbing functions
author | boris <bnagaev@gmail.com> |
---|---|
date | Sat, 23 Oct 2010 20:29:40 +0400 |
parents | 7f8922db4e55 |
children | fd7371f14c6d |
files | lib/block.py lib/config.py lib/graph.py |
diffstat | 3 files changed, 270 insertions(+), 1 deletions(-) [+] |
line diff
1.1 --- a/lib/block.py Sat Oct 23 13:19:10 2010 +0400 1.2 +++ b/lib/block.py Sat Oct 23 20:29:40 2010 +0400 1.3 @@ -5,6 +5,7 @@ 1.4 import project 1.5 import sequence 1.6 import monomer 1.7 +import config 1.8 1.9 class Block(object): 1.10 """ 1.11 @@ -14,7 +15,7 @@ 1.12 and/or gaps, that constitute the block 1.13 * self.positions -- sorted list of positions of the project.alignment that 1.14 are included in the block 1.15 - 1.16 + 1.17 How to create a new block: 1.18 >>> import project 1.19 >>> import block 1.20 @@ -55,3 +56,15 @@ 1.21 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line]) 1.22 else: 1.23 out_file.write("%s \n" % string) 1.24 + 1.25 + def geometrical_core(self, delta=config.delta): 1.26 + """ 1.27 + returns sorted list of positions, representing geometrical core 1.28 + delta -- threshold of distance spreading 1.29 + """ 1.30 + for i in self.positions: 1.31 + for j in self.positions: 1.32 + pass 1.33 + 1.34 + 1.35 +
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/lib/config.py Sat Oct 23 20:29:40 2010 +0400 2.3 @@ -0,0 +1,4 @@ 2.4 + 2.5 +delta = 2.0 # for geometrical core building 2.6 + 2.7 +
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/lib/graph.py Sat Oct 23 20:29:40 2010 +0400 3.3 @@ -0,0 +1,252 @@ 3.4 +# -*- coding: utf-8 -*- 3.5 + 3.6 +from datetime import datetime 3.7 +from copy import copy 3.8 + 3.9 +class TimeoutError(Exception): 3.10 + pass 3.11 + 3.12 + 3.13 + 3.14 +class Graph(object): 3.15 + """ 3.16 + Data: 3.17 + nodes -- set of elements 3.18 + lines -- {line: cost}. 3.19 + line is frozenset([e1, e2]) 3.20 + cost is float in (0, 1] or 1 (if all lines are equal) 3.21 + 3.22 + >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1}) 3.23 + >>> g.fast_cliques() 3.24 + Fast algorithm started 3.25 + [frozenset([1, 2]), frozenset([3])] 3.26 + >>> g = Graph(set([1,2,3,4]), {frozenset([1,2]): 0.98, frozenset([1,3]): 0.98, 3.27 + ... frozenset([2,3]): 0.1}) 3.28 + >>> g.fast_cliques() 3.29 + Fast algorithm started 3.30 + [frozenset([1, 2, 3]), frozenset([4])] 3.31 + >>> g.bron_kerbosh() 3.32 + Bron and Kerbosh algorithm started 3.33 + [frozenset([1, 2, 3]), frozenset([4])] 3.34 + """ 3.35 + 3.36 + def __init__(self, nodes=None, lines=None): 3.37 + if not nodes: 3.38 + nodes = set() 3.39 + if not lines: 3.40 + lines = dict() 3.41 + self.nodes = nodes 3.42 + self.lines = lines 3.43 + 3.44 + def bounded(self, k1, k2): 3.45 + return k1 == k2 or frozenset([k1, k2]) in self.lines 3.46 + 3.47 + def count_one(self, node): 3.48 + return len([node1 for node1 in self.nodes if \ 3.49 + frozenset([node, node1]) in self.lines]) 3.50 + 3.51 + def cost_one(self, node): 3.52 + return sum([self.lines[frozenset([node, node1])] for \ 3.53 + node1 in self.nodes if frozenset([node, node1]) in self.lines]) 3.54 + 3.55 + def count_all(self): 3.56 + c = dict([(node, 0) for node in self.nodes]) 3.57 + for line in self.lines: 3.58 + for node in line: 3.59 + c[node] += 1 3.60 + return c 3.61 + 3.62 + 3.63 + def drop_node(self, node): 3.64 + for node1 in self.nodes: 3.65 + self.lines.pop(frozenset([node, node1]), None) 3.66 + self.nodes.discard(node) 3.67 + 3.68 + def add_node(self, node, parent_graph): 3.69 + self.nodes.add(node) 3.70 + for node1 in self.nodes: 3.71 + line = frozenset([node, node1]) 3.72 + if line in parent_graph.lines: 3.73 + self.lines[line] = parent_graph.lines 3.74 + 3.75 + def drop_nodes(self, nodes): 3.76 + for node in nodes: 3.77 + self.drop_node(node) 3.78 + return bool(nodes) 3.79 + 3.80 + def drop_if_count(self, minsize): 3.81 + while True: 3.82 + if not self.drop_nodes([node for (node, count) in \ 3.83 + self.nodes.count_all().items() if count < minsize]): 3.84 + break 3.85 + 3.86 + def bron_kerbosh(self, timeout=-1, minsize=1): 3.87 + """ 3.88 + returns list of cliques 3.89 + clique is frozenset 3.90 + if timeout=-1, it means infinity 3.91 + if timeout has happened, raises TimeoutError 3.92 + 3.93 + lava flow 3.94 + """ 3.95 + print 'Bron and Kerbosh algorithm started' 3.96 + cliques = [] 3.97 + 3.98 + depth = 0 3.99 + list_candidates = [copy(self.nodes)] 3.100 + list_used = [set()] 3.101 + compsub = [] 3.102 + 3.103 + start_time = datetime.now() 3.104 + 3.105 + while True: # ПОКА... 3.106 + if depth == -1: 3.107 + break # ВСЕ! Все рекурсии (итерации) пройдены 3.108 + candidates = copy(list_candidates[depth]) 3.109 + used = copy(list_used[depth]) 3.110 + if not candidates: # ПОКА candidates НЕ пусто 3.111 + depth -= 1 3.112 + if compsub: 3.113 + compsub.pop() 3.114 + continue 3.115 + 3.116 + # И used НЕ содержит вершины, СОЕДИНЕННОЙ СО ВСЕМИ вершинами из candidates 3.117 + # (все из used НЕ соединены хотя бы с 1 из candidates) 3.118 + used_candidates = False 3.119 + 3.120 + for used1 in used: 3.121 + for candidates1 in candidates: 3.122 + if not self.bounded(used1, candidates1): 3.123 + break 3.124 + else: 3.125 + used_candidates = True 3.126 + 3.127 + if used_candidates: 3.128 + depth -= 1 3.129 + 3.130 + if compsub: 3.131 + compsub.pop() 3.132 + continue 3.133 + 3.134 + # Выбираем вершину v из candidates и добавляем ее в compsub 3.135 + v = candidates.pop() 3.136 + candidates.add(v) 3.137 + compsub.append(v) 3.138 + # Формируем new_candidates и new_used, удаляя из candidates и used вершины, НЕ соединенные с v 3.139 + # (то есть, оставляя только соединенные с v) 3.140 + new_candidates = set() 3.141 + for candidates1 in candidates: 3.142 + if self.bounded(candidates1, v) and candidates1 != v: 3.143 + new_candidates.add(candidates1) 3.144 + 3.145 + new_used = set() 3.146 + for used1 in used: 3.147 + if self.bounded(used1, v) and used1 != v: 3.148 + new_used.add(used1) 3.149 + 3.150 + # Удаляем v из candidates и помещаем в used 3.151 + list_candidates[depth].remove(v) 3.152 + list_used[depth].add(v) 3.153 + # ЕСЛИ new_candidates и new_used пусты 3.154 + if not new_candidates and not new_used: 3.155 + # compsub ? клика 3.156 + if len(compsub) >= minsize: 3.157 + cliques.append(frozenset(compsub)) 3.158 + else: 3.159 + # ИНАЧЕ рекурсивно вызываем bron_kerbosh(new_candidates, new_used) 3.160 + depth += 1 3.161 + 3.162 + # TIMEOUT check start 3.163 + if timeout != -1: 3.164 + if datetime.now() - start_time > timeout: 3.165 + raise TimeoutError 3.166 + # TIMEOUT check end 3.167 + 3.168 + if depth >= len(list_candidates): 3.169 + list_candidates.append(set()) 3.170 + list_used.append(set()) 3.171 + 3.172 + list_candidates[depth] = copy(new_candidates) 3.173 + list_used[depth] = copy(new_used) 3.174 + 3.175 + continue 3.176 + 3.177 + # Удаляем v из compsub 3.178 + if compsub: 3.179 + compsub.pop() 3.180 + 3.181 + return cliques 3.182 + 3.183 + 3.184 + def fast_cliques(self, minsize=1): 3.185 + """ 3.186 + returns list of cliques 3.187 + clique is frozenset 3.188 + """ 3.189 + print 'Fast algorithm started' 3.190 + cliques = [] 3.191 + 3.192 + while True: 3.193 + graph = Graph(copy(self.nodes), copy(self.lines)) 3.194 + for clique in cliques: 3.195 + graph.drop_nodes(clique) 3.196 + if not graph.nodes or len(cliques) == 2: 3.197 + break 3.198 + 3.199 + while True: 3.200 + # drop nodes, while its is possible 3.201 + c = graph.count_all() 3.202 + min_count = min(c.values()) 3.203 + bad_nodes = [node for (node, count) in c.items() if count == min_count] 3.204 + if len(bad_nodes) == len(graph.nodes): 3.205 + break 3.206 + 3.207 + costs = dict([(node, graph.cost_one(node)) for node in bad_nodes]) 3.208 + min_cost = min(costs.values()) 3.209 + for node, cost in costs.items(): 3.210 + if cost == min_cost: 3.211 + graph.drop_node(node) 3.212 + 3.213 + while True: 3.214 + # add nodes, while its is possible 3.215 + candidats = {} 3.216 + for node in self.nodes: 3.217 + if len([i for i in graph.nodes if frozenset([node, i]) in \ 3.218 + self.lines]) == len(self.nodes): # perl has me 3.219 + graph1 = Graph(copy(graph.nodes), copy(graph.lines)) 3.220 + graph1.add_node(node, self) 3.221 + candidats[node] = graph1.cost_one(node) 3.222 + if not candidats: 3.223 + break 3.224 + 3.225 + max_cost = max(candidats.values()) 3.226 + node = [node for (node, cost) in candidats.items() if cost == max_cost][0] 3.227 + graph.add_node(node, self) 3.228 + 3.229 + cliques.append(frozenset(graph.nodes)) 3.230 + 3.231 + return cliques 3.232 + 3.233 + 3.234 + def cliques(self, timeout=-1, minsize=1): 3.235 + """ 3.236 + returns length-sorted list of cliques 3.237 + clique is frozenset 3.238 + 3.239 + can change self! 3.240 + 3.241 + try to execute bron_kerbosh 3.242 + if it raises TimeoutError, executes fast_cliques 3.243 + """ 3.244 + 3.245 + drop_if_count(minsize) 3.246 + 3.247 + try: 3.248 + cliques = bron_kerbosh(timeout, minsize) 3.249 + except TimeoutError: 3.250 + cliques = fast_cliques(minsize) 3.251 + return sorted(cliques, key=lambda clique: len(clique), reverse=True) 3.252 + 3.253 +if __name__ == "__main__": 3.254 + import doctest 3.255 + doctest.testmod()