Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/ee2c10aa74b8/test/usecase2.py
Дата изменения: Unknown
Дата индексирования: Mon Feb 4 05:20:25 2013
Кодировка:
allpy: ee2c10aa74b8 test/usecase2.py

allpy

view test/usecase2.py @ 610:ee2c10aa74b8

paired_cores/score.py: change method of score evaluation Boundary positions of blocks are not considered. Columns represented with one sequence or pure gap columns are not considered. Calculate weighted average of number of connected components in columns. Weight of column = l / sum(l) (l = number of sequnces representing column)
author boris (kodomo) <bnagaev@gmail.com>
date Tue, 05 Apr 2011 23:11:41 +0400
parents 0411fa851bba
children cc1959669928
line source
1 # Fragments are in pair_repeat.fasta
2 from allpy import dna
3 width = 15
4 treshold = 14
6 def my_column_mark(column, threshold):
7 """Helper to mark column (given as dict) by identity."""
8 count = {}
9 for sequence, monomer in column:
10 code = monomer.code1
11 count[code] = count.get(code, 0) + 1
12 for code in count:
13 if count[code] > threshold:
14 return "+"
15 return "-"
17 def my_pair_mark(column):
18 """Helper to mark column of 2 sequences (given as list) by identity."""
19 if column[0] is None or column[1] is None:
20 return "-"
21 if column[0].code1 == column[0].code1:
22 return "+"
23 return "-"
25 def find_runs(markup):
26 """Fund long positive runs.
28 This obscure and probably broken function has nothing to do with allpy,
29 so it's presence in the example is unnecessary.
30 """
31 position = 0
32 count = 0
33 plus_positions=[]
34 for i in range(len(markup)):
35 position += 1
36 if position < width :
37 if markup[i]=="+":
38 count += 1
39 continue
40 if position > width:
41 if markup[i-width] == "+":
42 count -=1
43 if count >= treshold:
44 plus_positions.append(position)
45 if len(plus_positions)==0:
46 raise Exception("No blocks in alignment")
48 blocks=[]
49 start = plus_positions[0]-width + 1
50 stop = plus_positions[0]
51 for p in plus_positions[1:]:
52 if p == stop +1:
53 stop = p
54 continue
55 blocks.append((start,stop))
56 start = p - width + 1
57 stop = p
58 return blocks
60 def main():
61 alignment = dna.Alignment().append_file(open("pair_repeat.fasta"))
62 if len(alignment.sequences) != 2:
63 raise Exception("Input must have exactly 2 sequences!")
64 alignment.realign("needle", gap_open = 0)
65 markup = []
66 for column in alignment.columns_as_lists():
67 markup.append(my_pair_mark(column))
68 markup = alignment.map_columns(my_pair_mark)
69 print find_runs(markup)
71 try:
72 main()
73 except Exception, e:
74 print "An error has occured:", e