view test/usecase2.py @ 1130:b4b6cfe02285
pair-cores/web: substitute example of input with one with more blocks in output
author |
Boris Nagaev <bnagaev@gmail.com> |
date |
Thu, 10 Jan 2013 19:28:03 +0400 |
parents |
cc1959669928 |
children |
08d892230e8c |
line source
3 from collections import deque
5 from allpy.processors import Needle, Left
6 from allpy.fileio import FastaFile
7 from allpy.util import open
11 if __name__ == "__main__":
12 infile = open(sys.argv[1])
13 outfile = open(sys.argv[2])
15 def has_identity(column):
16 as_list = column.values()
17 return len(column) == 2 and as_list[0] == as_list[1]
19 def is_good_window(window):
20 sum_id = sum(int(has_identity(column)) for column in window)
21 return len(window) == width and sum_id >= threshold
23 def find_runs(alignment):
24 window = deque([], width)
27 for column in alignment.columns:
29 in_block, was_in_block = is_good_window(window), in_block
30 if in_block and not was_in_block:
31 block = dna.Block.from_alignment(alignment, columns=list(window))
34 block.columns.append(column)
37 def blocks_markup(alignment, blocks):
38 for column in alignment.columns:
41 for column in block.columns:
43 return "".join(column.in_block for column in alignment.columns)
46 alignment = dna.Alignment().append_file(infile)
47 assert len(alignment.sequences) == 2, "Input must have TWO sequences!"
48 alignment.realign(Left())
49 alignment.realign(Needle())
50 blocks = find_runs(alignment)
52 for n, block in enumerate(blocks, 1):
53 block.to_file(open("block_%02d.fasta" % n, "w"))
55 alignment.to_file(outfile)
56 FastaFile(outfile).write_string(
57 blocks_markup(alignment, blocks),
59 "In run with window %s and threshold %s" % (width, threshold)
65 print "An error has occured:", e