Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/7184863832b9/test/usecase2.py
Дата изменения: Unknown
Дата индексирования: Mon Feb 4 04:25:21 2013
Кодировка:
allpy: 7184863832b9 test/usecase2.py

allpy

view test/usecase2.py @ 391:7184863832b9

Added test/freqs.py: a tool to print frequencies of letters by position in alignment
author Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date Wed, 02 Feb 2011 22:09:41 +0300
parents 0411fa851bba
children cc1959669928
line source
1 # Fragments are in pair_repeat.fasta
2 from allpy import dna
3 width = 15
4 treshold = 14
6 def my_column_mark(column, threshold):
7 """Helper to mark column (given as dict) by identity."""
8 count = {}
9 for sequence, monomer in column:
10 code = monomer.code1
11 count[code] = count.get(code, 0) + 1
12 for code in count:
13 if count[code] > threshold:
14 return "+"
15 return "-"
17 def my_pair_mark(column):
18 """Helper to mark column of 2 sequences (given as list) by identity."""
19 if column[0] is None or column[1] is None:
20 return "-"
21 if column[0].code1 == column[0].code1:
22 return "+"
23 return "-"
25 def find_runs(markup):
26 """Fund long positive runs.
28 This obscure and probably broken function has nothing to do with allpy,
29 so it's presence in the example is unnecessary.
30 """
31 position = 0
32 count = 0
33 plus_positions=[]
34 for i in range(len(markup)):
35 position += 1
36 if position < width :
37 if markup[i]=="+":
38 count += 1
39 continue
40 if position > width:
41 if markup[i-width] == "+":
42 count -=1
43 if count >= treshold:
44 plus_positions.append(position)
45 if len(plus_positions)==0:
46 raise Exception("No blocks in alignment")
48 blocks=[]
49 start = plus_positions[0]-width + 1
50 stop = plus_positions[0]
51 for p in plus_positions[1:]:
52 if p == stop +1:
53 stop = p
54 continue
55 blocks.append((start,stop))
56 start = p - width + 1
57 stop = p
58 return blocks
60 def main():
61 alignment = dna.Alignment().append_file(open("pair_repeat.fasta"))
62 if len(alignment.sequences) != 2:
63 raise Exception("Input must have exactly 2 sequences!")
64 alignment.realign("needle", gap_open = 0)
65 markup = []
66 for column in alignment.columns_as_lists():
67 markup.append(my_pair_mark(column))
68 markup = alignment.map_columns(my_pair_mark)
69 print find_runs(markup)
71 try:
72 main()
73 except Exception, e:
74 print "An error has occured:", e