allpy
changeset 324:38888b46c342
Added usecases from wiki to test/ directory
author | Daniil Alexeyevsky <me.dendik@gmail.com> |
---|---|
date | Fri, 17 Dec 2010 00:45:20 +0300 |
parents | 2a6c2cffa891 |
children | 6fdc4406e2fd |
files | test/usecase1.py test/usecase2.py test/usecase3.py |
diffstat | 3 files changed, 143 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/test/usecase1.py Fri Dec 17 00:45:20 2010 +0300 1.3 @@ -0,0 +1,35 @@ 1.4 +from allpy import protein 1.5 + 1.6 +# Create sequences from string representation of sequence body 1.7 +sequence_1 = allpy.protein.Sequence.from_string("mkstf", name="E2E4") 1.8 +sequence_2 = allpy.protein.Sequence.from_string("mstkfff") 1.9 + 1.10 +# Create alignment from sequences 1.11 +alignment = protein.Alignment() 1.12 +alignment.append_sequence(sequence_1) 1.13 +alignment.append_sequence(sequence_2) 1.14 +alignment.realign("muscle") 1.15 + 1.16 +# For each sequence, print number of gaps and non-gaps in alignment 1.17 +for row in alignment.rows(): 1.18 + gaps = 0 1.19 + monomers = 0 1.20 + for column in alignment.columns: 1.21 + if column in row: 1.22 + monomers += 1 1.23 + else: 1.24 + gaps += 1 1.25 + print "%s: %s gaps, %s non-gaps" % (row.sequence.name, gaps, monomers) 1.26 + 1.27 +# Print number of gaps in each column 1.28 +gaps = [] 1.29 +for column in alignment.columns: 1.30 + column_gaps = 0 1.31 + for sequence in alignment.sequences: 1.32 + if sequence not in column: 1.33 + column_gaps += 1 1.34 + gaps.append(column_gaps) 1.35 +print " ".join(map(str, gaps)) 1.36 + 1.37 +# Write alignment to file 1.38 +alingment.to_fasta(open("new_file.fasta", "w"))
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/test/usecase2.py Fri Dec 17 00:45:20 2010 +0300 2.3 @@ -0,0 +1,72 @@ 2.4 +# Fragments are in pair_repeat.fasta 2.5 +from allpy import dna 2.6 +width = 15 2.7 +treshold = 14 2.8 + 2.9 +def my_column_mark(column, threshold): 2.10 + """Helper to mark column (given as dict) by identity.""" 2.11 + count = {} 2.12 + for sequence, monomer in column: 2.13 + code = monomer.code1 2.14 + count[code] = count.get(code, 0) + 1 2.15 + for code in count: 2.16 + if count[code] > threshold: 2.17 + return "+" 2.18 + return "-" 2.19 + 2.20 +def my_pair_mark(column_as_list): 2.21 + """Helper to mark column of 2 sequences (given as list) by identity.""" 2.22 + if column[0] is None or column[1] is None: 2.23 + return "-" 2.24 + if column[0].code1 == column[0].code1: 2.25 + return "+" 2.26 + return "-" 2.27 + 2.28 +def find_runs(markup): 2.29 + """Fund long positive runs. 2.30 + 2.31 + This obscure and probably broken function has nothing to do with allpy, 2.32 + so it's presence in the example is unnecessary. 2.33 + """ 2.34 + position = 0 2.35 + count = 0 2.36 + plus_positions=[] 2.37 + for i in range(len(column_marks)): 2.38 + position += 1 2.39 + if position < width : 2.40 + if column_marks[i]=="+": 2.41 + count += 1 2.42 + column_window_marks += "-" 2.43 + continue 2.44 + if position > width: 2.45 + if column_marks[i-width] == "+": 2.46 + count -=1 2.47 + if count >= treshold: 2.48 + plus_positions.append(position) 2.49 + if len(positions)==0: 2.50 + raise Exception("No blocks in alignment") 2.51 + blocks=[] 2.52 + start = positions[0]-width + 1 2.53 + stop = positions[0] 2.54 + 2.55 + for p in positions[1:]: 2.56 + if p == stop +1: 2.57 + stop = p 2.58 + cuntinue 2.59 + blocks.append((start,stop)) 2.60 + start = p - width + 1 2.61 + stop = p 2.62 + 2.63 +def main(): 2.64 + alignment = dna.Alignment.from_fasta(open("pair_repeat.fasta")) 2.65 + if alignment.height != 2: 2.66 + raise Exception("Input must have exactly 2 sequences!") 2.67 + alignment.realign("needle", gap_open = 0) 2.68 + markup = alignment.map_columns(my_pair_mark) 2.69 + print find_runs(markup) 2.70 + 2.71 +try: 2.72 + main() 2.73 +except Exception, e: 2.74 + print "An error has occured:", e 2.75 +
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/test/usecase3.py Fri Dec 17 00:45:20 2010 +0300 3.3 @@ -0,0 +1,36 @@ 3.4 +from allpy import protein 3.5 +alignment = protein.Alignment.from_fasta(open("aln.fasta")) 3.6 +#conservative = [(10,20), (40,50)] 3.7 +conservative = [(0,6),(18,37)] 3.8 + 3.9 +def ranges_to_markup(ranges): 3.10 + """Convert list of ranges to line of markup. 3.11 + 3.12 + This has nothing to do with allpy. 3.13 + """ 3.14 + markup = ["-"] * len(alignment.columns) 3.15 + for begin, end in ranges: 3.16 + for i in range(begin, end+1): 3.17 + markup[i] = "+" 3.18 + return "".join(markup) 3.19 + 3.20 +def markup_to_blocks(markup): 3.21 + """Convert markup line to a bunch of blocks, one for each sequential run.""" 3.22 + current = None 3.23 + blocks = {} 3.24 + for mark, column in zip(markup, alignment.columns): 3.25 + if mark != current: 3.26 + block = protein.Block.from_alignment(alignment, columns=[]) 3.27 + blocks[mark] = blocks.get(mark, []) + [block] 3.28 + current = mark 3.29 + blocks[mark][-1].columns.append(column) 3.30 + return blocks 3.31 + 3.32 +def main(): 3.33 + markup = ranges_to_markup(conservative) 3.34 + blocks = markup_to_blocks(markup) 3.35 + for block in blocks["-"]: 3.36 + block.flush_left() 3.37 + alignment.to_fasta(open("output.fasta", "w")) 3.38 + 3.39 +main()