Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/annotate/004c2f6c45ac/lib/graph.py
Дата изменения: Unknown
Дата индексирования: Sun Mar 2 02:39:20 2014
Кодировка:
allpy: lib/graph.py annotate

allpy

annotate lib/graph.py @ 143:004c2f6c45ac

project.from_fasta improvements
author boris <bnagaev@gmail.com>
date Sun, 24 Oct 2010 19:05:31 +0400
parents d67140e313f4
children db9d116e979f
rev   line source
bnagaev@116 1 # -*- coding: utf-8 -*-
bnagaev@116 2
bnagaev@121 3 from datetime import datetime, timedelta
bnagaev@116 4 from copy import copy
bnagaev@116 5
bnagaev@116 6 class TimeoutError(Exception):
bnagaev@116 7 pass
bnagaev@116 8
bnagaev@116 9
bnagaev@116 10
bnagaev@116 11 class Graph(object):
bnagaev@116 12 """
bnagaev@116 13 Data:
bnagaev@116 14 nodes -- set of elements
bnagaev@116 15 lines -- {line: cost}.
bnagaev@116 16 line is frozenset([e1, e2])
bnagaev@116 17 cost is float in (0, 1] or 1 (if all lines are equal)
bnagaev@116 18
bnagaev@116 19 >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1})
bnagaev@116 20 >>> g.fast_cliques()
bnagaev@116 21 Fast algorithm started
bnagaev@116 22 [frozenset([1, 2]), frozenset([3])]
bnagaev@119 23 >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1, frozenset([1,1]): 1})
bnagaev@119 24 >>> g.fast_cliques()
bnagaev@119 25 Fast algorithm started
bnagaev@119 26 [frozenset([1, 2]), frozenset([3])]
bnagaev@116 27 >>> g = Graph(set([1,2,3,4]), {frozenset([1,2]): 0.98, frozenset([1,3]): 0.98,
bnagaev@119 28 ... frozenset([2,3]): 0.1, frozenset([1,1]): 1})
bnagaev@116 29 >>> g.fast_cliques()
bnagaev@116 30 Fast algorithm started
bnagaev@116 31 [frozenset([1, 2, 3]), frozenset([4])]
bnagaev@116 32 >>> g.bron_kerbosh()
bnagaev@116 33 Bron and Kerbosh algorithm started
bnagaev@116 34 [frozenset([1, 2, 3]), frozenset([4])]
bnagaev@119 35 >>> g.cliques()
bnagaev@119 36 Bron and Kerbosh algorithm started
bnagaev@119 37 [frozenset([1, 2, 3])]
bnagaev@116 38 """
bnagaev@116 39
bnagaev@116 40 def __init__(self, nodes=None, lines=None):
bnagaev@116 41 if not nodes:
bnagaev@116 42 nodes = set()
bnagaev@116 43 if not lines:
bnagaev@116 44 lines = dict()
bnagaev@121 45 self.nodes = set(nodes) # copy
bnagaev@119 46 self.lines = {}
bnagaev@119 47 for line, cost in lines.items():
bnagaev@119 48 if len(line) == 2 and line.issubset(self.nodes):
bnagaev@119 49 self.lines[line] = cost
bnagaev@119 50
bnagaev@117 51 @staticmethod
bnagaev@117 52 def line(k1, k2):
bnagaev@117 53 return frozenset([k1, k2])
bnagaev@116 54
bnagaev@116 55 def bounded(self, k1, k2):
bnagaev@117 56 return k1 == k2 or Graph.line(k1, k2) in self.lines
bnagaev@116 57
bnagaev@116 58 def count_one(self, node):
bnagaev@118 59 """
bnagaev@118 60 Returns number of connections of this node
bnagaev@118 61 """
bnagaev@117 62 return len([node1 for node1 in self.nodes if self.bounded(node, node1)]) - 1
bnagaev@116 63
bnagaev@116 64 def cost_one(self, node):
bnagaev@118 65 """
bnagaev@118 66 Returns sum of costs of all connections of this node
bnagaev@118 67 """
bnagaev@121 68 return sum([self.lines.get(Graph.line(node, node1), 0)
bnagaev@121 69 for node1 in self.nodes if node != node1])
bnagaev@116 70
bnagaev@116 71 def count_all(self):
bnagaev@118 72 """
bnagaev@118 73 Returns {node: number of connections of this node}
bnagaev@118 74 """
bnagaev@116 75 c = dict([(node, 0) for node in self.nodes])
bnagaev@116 76 for line in self.lines:
bnagaev@116 77 for node in line:
bnagaev@116 78 c[node] += 1
bnagaev@116 79 return c
bnagaev@116 80
bnagaev@116 81
bnagaev@116 82 def drop_node(self, node):
bnagaev@118 83 """
bnagaev@118 84 Remove node and all involved lines
bnagaev@118 85 """
bnagaev@116 86 for node1 in self.nodes:
bnagaev@117 87 self.lines.pop(Graph.line(node, node1), None)
bnagaev@116 88 self.nodes.discard(node)
bnagaev@116 89
bnagaev@116 90 def add_node(self, node, parent_graph):
bnagaev@118 91 """
bnagaev@118 92 Add node and corresponding lines from parent_graph
bnagaev@118 93 Added lines should be contained in self graph
bnagaev@118 94 (takes care of hanging lines)
bnagaev@118 95 """
bnagaev@116 96 self.nodes.add(node)
bnagaev@116 97 for node1 in self.nodes:
bnagaev@117 98 line = Graph.line(node, node1)
bnagaev@116 99 if line in parent_graph.lines:
bnagaev@117 100 self.lines[line] = parent_graph.lines[line]
bnagaev@116 101
bnagaev@116 102 def drop_nodes(self, nodes):
bnagaev@118 103 """
bnagaev@118 104 Run drop_node for each of given nodes
bnagaev@118 105 Returns if nodes was not empty (ugly beauty)
bnagaev@118 106 """
bnagaev@116 107 for node in nodes:
bnagaev@116 108 self.drop_node(node)
bnagaev@116 109 return bool(nodes)
bnagaev@116 110
bnagaev@116 111 def drop_if_count(self, minsize):
bnagaev@118 112 """
bnagaev@118 113 Run drop_node for each node, that has less than minsize lines
bnagaev@118 114 """
bnagaev@116 115 while True:
bnagaev@121 116 if not self.drop_nodes([node for (node, count)
bnagaev@121 117 in self.count_all().items() if count < minsize]):
bnagaev@116 118 break
bnagaev@116 119
bnagaev@116 120 def bron_kerbosh(self, timeout=-1, minsize=1):
bnagaev@116 121 """
bnagaev@116 122 returns list of cliques
bnagaev@116 123 clique is frozenset
bnagaev@116 124 if timeout=-1, it means infinity
bnagaev@116 125 if timeout has happened, raises TimeoutError
bnagaev@116 126
bnagaev@116 127 lava flow
bnagaev@116 128 """
bnagaev@116 129 print 'Bron and Kerbosh algorithm started'
bnagaev@116 130 cliques = []
bnagaev@116 131
bnagaev@116 132 depth = 0
bnagaev@116 133 list_candidates = [copy(self.nodes)]
bnagaev@116 134 list_used = [set()]
bnagaev@116 135 compsub = []
bnagaev@116 136
bnagaev@116 137 start_time = datetime.now()
bnagaev@121 138 timeout_timedelta = timedelta(timeout)
bnagaev@121 139
bnagaev@116 140 while True: # ????????...
bnagaev@116 141 if depth == -1:
bnagaev@116 142 break # ??????! ?????? ???????????????? (????????????????) ????????????????
bnagaev@116 143 candidates = copy(list_candidates[depth])
bnagaev@116 144 used = copy(list_used[depth])
bnagaev@116 145 if not candidates: # ???????? candidates ???? ??????????
bnagaev@116 146 depth -= 1
bnagaev@116 147 if compsub:
bnagaev@116 148 compsub.pop()
bnagaev@116 149 continue
bnagaev@116 150
bnagaev@116 151 # ?? used ???? ???????????????? ??????????????, ?????????????????????? ???? ?????????? ?????????????????? ???? candidates
bnagaev@116 152 # (?????? ???? used ???? ?????????????????? ???????? ???? ?? 1 ???? candidates)
bnagaev@116 153 used_candidates = False
bnagaev@116 154
bnagaev@116 155 for used1 in used:
bnagaev@116 156 for candidates1 in candidates:
bnagaev@116 157 if not self.bounded(used1, candidates1):
bnagaev@116 158 break
bnagaev@116 159 else:
bnagaev@116 160 used_candidates = True
bnagaev@116 161
bnagaev@116 162 if used_candidates:
bnagaev@116 163 depth -= 1
bnagaev@116 164
bnagaev@116 165 if compsub:
bnagaev@116 166 compsub.pop()
bnagaev@116 167 continue
bnagaev@116 168
bnagaev@116 169 # ???????????????? ?????????????? v ???? candidates ?? ?????????????????? ???? ?? compsub
bnagaev@116 170 v = candidates.pop()
bnagaev@116 171 candidates.add(v)
bnagaev@116 172 compsub.append(v)
bnagaev@116 173 # ?????????????????? new_candidates ?? new_used, ???????????? ???? candidates ?? used ??????????????, ???? ?????????????????????? ?? v
bnagaev@116 174 # (???? ????????, ???????????????? ???????????? ?????????????????????? ?? v)
bnagaev@116 175 new_candidates = set()
bnagaev@116 176 for candidates1 in candidates:
bnagaev@116 177 if self.bounded(candidates1, v) and candidates1 != v:
bnagaev@116 178 new_candidates.add(candidates1)
bnagaev@116 179
bnagaev@116 180 new_used = set()
bnagaev@116 181 for used1 in used:
bnagaev@116 182 if self.bounded(used1, v) and used1 != v:
bnagaev@116 183 new_used.add(used1)
bnagaev@116 184
bnagaev@116 185 # ?????????????? v ???? candidates ?? ???????????????? ?? used
bnagaev@116 186 list_candidates[depth].remove(v)
bnagaev@116 187 list_used[depth].add(v)
bnagaev@116 188 # ???????? new_candidates ?? new_used ??????????
bnagaev@116 189 if not new_candidates and not new_used:
bnagaev@116 190 # compsub ??? ??????????
bnagaev@116 191 if len(compsub) >= minsize:
bnagaev@116 192 cliques.append(frozenset(compsub))
bnagaev@116 193 else:
bnagaev@116 194 # ?????????? ???????????????????? ???????????????? bron_kerbosh(new_candidates, new_used)
bnagaev@116 195 depth += 1
bnagaev@116 196
bnagaev@116 197 # TIMEOUT check start
bnagaev@116 198 if timeout != -1:
bnagaev@121 199 if datetime.now() - start_time > timeout_timedelta:
bnagaev@116 200 raise TimeoutError
bnagaev@116 201 # TIMEOUT check end
bnagaev@116 202
bnagaev@116 203 if depth >= len(list_candidates):
bnagaev@116 204 list_candidates.append(set())
bnagaev@116 205 list_used.append(set())
bnagaev@116 206
bnagaev@116 207 list_candidates[depth] = copy(new_candidates)
bnagaev@116 208 list_used[depth] = copy(new_used)
bnagaev@116 209
bnagaev@116 210 continue
bnagaev@116 211
bnagaev@116 212 # ?????????????? v ???? compsub
bnagaev@116 213 if compsub:
bnagaev@116 214 compsub.pop()
bnagaev@116 215
bnagaev@116 216 return cliques
bnagaev@116 217
bnagaev@116 218
bnagaev@116 219 def fast_cliques(self, minsize=1):
bnagaev@116 220 """
bnagaev@116 221 returns list of cliques
bnagaev@116 222 clique is frozenset
bnagaev@116 223 """
bnagaev@116 224 print 'Fast algorithm started'
bnagaev@116 225 cliques = []
bnagaev@116 226
bnagaev@116 227 while True:
bnagaev@117 228 graph = Graph(self.nodes, self.lines)
bnagaev@116 229 for clique in cliques:
bnagaev@116 230 graph.drop_nodes(clique)
bnagaev@119 231 if not graph.nodes:
bnagaev@116 232 break
bnagaev@116 233
bnagaev@116 234 while True:
bnagaev@116 235 # drop nodes, while its is possible
bnagaev@119 236 if len(graph.nodes) == 1:
bnagaev@119 237 break
bnagaev@116 238 c = graph.count_all()
bnagaev@116 239 min_count = min(c.values())
bnagaev@116 240 bad_nodes = [node for (node, count) in c.items() if count == min_count]
bnagaev@119 241 if len(bad_nodes) == len(graph.nodes) and min_count != 0:
bnagaev@116 242 break
bnagaev@116 243
bnagaev@116 244 costs = dict([(node, graph.cost_one(node)) for node in bad_nodes])
bnagaev@116 245 min_cost = min(costs.values())
bnagaev@116 246 for node, cost in costs.items():
bnagaev@116 247 if cost == min_cost:
bnagaev@116 248 graph.drop_node(node)
bnagaev@119 249 break
bnagaev@116 250
bnagaev@116 251 while True:
bnagaev@116 252 # add nodes, while its is possible
bnagaev@116 253 candidats = {}
bnagaev@116 254 for node in self.nodes:
bnagaev@117 255 c = len([i for i in graph.nodes if self.bounded(node, i)])
bnagaev@117 256 if c == len(self.nodes):
bnagaev@117 257 graph1 = Graph(graph.nodes, graph.lines)
bnagaev@116 258 graph1.add_node(node, self)
bnagaev@116 259 candidats[node] = graph1.cost_one(node)
bnagaev@116 260 if not candidats:
bnagaev@116 261 break
bnagaev@116 262
bnagaev@116 263 max_cost = max(candidats.values())
bnagaev@116 264 node = [node for (node, cost) in candidats.items() if cost == max_cost][0]
bnagaev@116 265 graph.add_node(node, self)
bnagaev@116 266
bnagaev@116 267 cliques.append(frozenset(graph.nodes))
bnagaev@116 268
bnagaev@116 269 return cliques
bnagaev@116 270
bnagaev@116 271
bnagaev@116 272 def cliques(self, timeout=-1, minsize=1):
bnagaev@116 273 """
bnagaev@116 274 returns length-sorted list of cliques
bnagaev@116 275 clique is frozenset
bnagaev@116 276
bnagaev@116 277 can change self!
bnagaev@116 278
bnagaev@116 279 try to execute bron_kerbosh
bnagaev@116 280 if it raises TimeoutError, executes fast_cliques
bnagaev@116 281 """
bnagaev@116 282
bnagaev@117 283 self.drop_if_count(minsize)
bnagaev@116 284
bnagaev@116 285 try:
bnagaev@117 286 cliques = self.bron_kerbosh(timeout, minsize)
bnagaev@117 287 cliques.sort(key=lambda clique: len(clique), reverse=True)
bnagaev@116 288 except TimeoutError:
bnagaev@117 289 cliques = self.fast_cliques(minsize)
bnagaev@117 290 return cliques
bnagaev@116 291
bnagaev@116 292 if __name__ == "__main__":
bnagaev@116 293 import doctest
bnagaev@116 294 doctest.testmod()