allpy
changeset 610:ee2c10aa74b8
paired_cores/score.py: change method of score evaluation
Boundary positions of blocks are not considered.
Columns represented with one sequence or pure gap columns are not considered.
Calculate weighted average of number of connected components in columns.
Weight of column = l / sum(l)
(l = number of sequnces representing column)
author | boris (kodomo) <bnagaev@gmail.com> |
---|---|
date | Tue, 05 Apr 2011 23:11:41 +0400 |
parents | 56d62d405021 |
children | 9b20dda5a2d4 |
files | pair_cores/score.py |
diffstat | 1 files changed, 21 insertions(+), 15 deletions(-) [+] |
line diff
1.1 --- a/pair_cores/score.py Sun Apr 03 17:05:32 2011 +0400 1.2 +++ b/pair_cores/score.py Tue Apr 05 23:11:41 2011 +0400 1.3 @@ -9,36 +9,42 @@ 1.4 def cn2(n): 1.5 return n*(n-1)/2 1.6 1.7 -def score(in_file, pair_cores_file): 1.8 +def score(in_file, pair_cores_file, debug=False): 1.9 alignment = Alignment().append_file(open(in_file)) 1.10 pair_cores = alignment.blocks_from_file(open(pair_cores_file)) 1.11 1.12 - score = 0.0 1.13 - max_score = 0.0 1.14 + C = [] 1.15 + L = [] 1.16 1.17 column2blocks = {} 1.18 for column in alignment.columns: 1.19 column2blocks[column] = [] 1.20 for b in pair_cores: 1.21 - for column in b.columns: 1.22 + for column in b.columns[1:-1]: 1.23 column2blocks[column].append(b) 1.24 for column in alignment.columns: 1.25 - sequence_graph = Graph() 1.26 - pairs_set = set() 1.27 - for b in column2blocks[column]: 1.28 - assert len(b.sequences) == 2 1.29 - sequence_graph.set_edge(b.sequences[0], b.sequences[1]) 1.30 - for g in sequence_graph.connected_components(): 1.31 - assert len(g) > 1 1.32 - score += cn2(len(g)) 1.33 - max_score += cn2(len(column)) 1.34 + if len(column) >= 2: 1.35 + sequence_graph = Graph() 1.36 + for b in column2blocks[column]: 1.37 + assert len(b.sequences) == 2 1.38 + sequence_graph.set_edge(b.sequences[0], b.sequences[1]) 1.39 + c = len(sequence_graph.connected_components()) 1.40 + singles = len(column) - len(sequence_graph) 1.41 + c += singles 1.42 + C.append(c) 1.43 + L.append(len(column)) 1.44 + if debug: 1.45 + print C[-1], L[-1] 1.46 1.47 - print("%f = %i / %i %s %s" % (float(score)/max_score, score, max_score, in_file, pair_cores_file)) 1.48 + score = sum([i*j for (i,j) in zip(C,L)]) / float(sum(L)) 1.49 + print("%(score)f %(n)i %(name)s" % {'score': score, 1.50 + 'n': len(alignment.sequences), 'name': in_file}) 1.51 1.52 try: 1.53 in_file = sys.argv[1] 1.54 pair_cores_file = sys.argv[2] 1.55 - score(in_file, pair_cores_file) 1.56 + debug = len(sys.argv) >= 4 and sys.argv[3] == 'debug' 1.57 + score(in_file, pair_cores_file, debug) 1.58 except: 1.59 pass 1.60